summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib')
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysis.cpp187
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp45
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp36
-rw-r--r--contrib/llvm/lib/Analysis/AliasDebugger.cpp29
-rw-r--r--contrib/llvm/lib/Analysis/AliasSetTracker.cpp105
-rw-r--r--contrib/llvm/lib/Analysis/Analysis.cpp67
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp459
-rw-r--r--contrib/llvm/lib/Analysis/CFGPrinter.cpp27
-rw-r--r--contrib/llvm/lib/Analysis/CaptureTracking.cpp3
-rw-r--r--contrib/llvm/lib/Analysis/ConstantFolding.cpp298
-rw-r--r--contrib/llvm/lib/Analysis/DIBuilder.cpp801
-rw-r--r--contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp129
-rw-r--r--contrib/llvm/lib/Analysis/DebugInfo.cpp244
-rw-r--r--contrib/llvm/lib/Analysis/DomPrinter.cpp50
-rw-r--r--contrib/llvm/lib/Analysis/DominanceFrontier.cpp137
-rw-r--r--contrib/llvm/lib/Analysis/IPA/CallGraph.cpp24
-rw-r--r--contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp77
-rw-r--r--contrib/llvm/lib/Analysis/IPA/IPA.cpp29
-rw-r--r--contrib/llvm/lib/Analysis/IVUsers.cpp12
-rw-r--r--contrib/llvm/lib/Analysis/InlineCost.cpp486
-rw-r--r--contrib/llvm/lib/Analysis/InstCount.cpp6
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp1904
-rw-r--r--contrib/llvm/lib/Analysis/IntervalPartition.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/LazyValueInfo.cpp844
-rw-r--r--contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp20
-rw-r--r--contrib/llvm/lib/Analysis/Lint.cpp110
-rw-r--r--contrib/llvm/lib/Analysis/LiveValues.cpp15
-rw-r--r--contrib/llvm/lib/Analysis/Loads.cpp4
-rw-r--r--contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp17
-rw-r--r--contrib/llvm/lib/Analysis/LoopInfo.cpp22
-rw-r--r--contrib/llvm/lib/Analysis/LoopPass.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/MemDepPrinter.cpp167
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp373
-rw-r--r--contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp6
-rw-r--r--contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp88
-rw-r--r--contrib/llvm/lib/Analysis/PHITransAddr.cpp154
-rw-r--r--contrib/llvm/lib/Analysis/PathNumbering.cpp525
-rw-r--r--contrib/llvm/lib/Analysis/PathProfileInfo.cpp434
-rw-r--r--contrib/llvm/lib/Analysis/PathProfileVerifier.cpp207
-rw-r--r--contrib/llvm/lib/Analysis/PointerTracking.cpp316
-rw-r--r--contrib/llvm/lib/Analysis/PostDominators.cpp10
-rw-r--r--contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp11
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfo.cpp17
-rw-r--r--contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp3
-rw-r--r--contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp11
-rw-r--r--contrib/llvm/lib/Analysis/RegionInfo.cpp168
-rw-r--r--contrib/llvm/lib/Analysis/RegionPass.cpp275
-rw-r--r--contrib/llvm/lib/Analysis/RegionPrinter.cpp36
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp1170
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp41
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp51
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp232
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp369
-rw-r--r--contrib/llvm/lib/Archive/Archive.cpp54
-rw-r--r--contrib/llvm/lib/Archive/ArchiveInternals.h2
-rw-r--r--contrib/llvm/lib/Archive/ArchiveWriter.cpp117
-rw-r--r--contrib/llvm/lib/AsmParser/LLLexer.cpp21
-rw-r--r--contrib/llvm/lib/AsmParser/LLLexer.h4
-rw-r--r--contrib/llvm/lib/AsmParser/LLParser.cpp151
-rw-r--r--contrib/llvm/lib/AsmParser/LLParser.h8
-rw-r--r--contrib/llvm/lib/AsmParser/LLToken.h3
-rw-r--r--contrib/llvm/lib/AsmParser/Parser.cpp10
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp181
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h5
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.cpp68
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.h54
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp64
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp61
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp122
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp138
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp681
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h139
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp338
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h155
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfTableException.cpp349
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/CallingConvLower.cpp40
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp61
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp99
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h12
-rw-r--r--contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/ELF.h2
-rw-r--r--contrib/llvm/lib/CodeGen/ELFWriter.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/EdgeBundles.cpp86
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp82
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadata.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/GCStrategy.cpp44
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp247
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp287
-rw-r--r--contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp43
-rw-r--r--contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp26
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp711
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.h63
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp312
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp357
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp315
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalUnion.h258
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp129
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.h135
-rw-r--r--contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp20
-rw-r--r--contrib/llvm/lib/CodeGen/LiveVariables.cpp42
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp29
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp79
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp162
-rw-r--r--contrib/llvm/lib/CodeGen/MachineDominators.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp69
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp171
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp506
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopRanges.cpp116
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp76
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp64
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp312
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp377
-rw-r--r--contrib/llvm/lib/CodeGen/OptimizePHIs.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/PBQP/Graph.h425
-rw-r--r--contrib/llvm/lib/CodeGen/PBQP/HeuristicBase.h246
-rw-r--r--contrib/llvm/lib/CodeGen/PBQP/HeuristicSolver.h616
-rw-r--r--contrib/llvm/lib/CodeGen/PBQP/Heuristics/Briggs.h460
-rw-r--r--contrib/llvm/lib/CodeGen/PBQP/Math.h288
-rw-r--r--contrib/llvm/lib/CodeGen/PBQP/Solution.h89
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.cpp143
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.h115
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp61
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.h25
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp131
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp74
-rw-r--r--contrib/llvm/lib/CodeGen/PreAllocSplitting.cpp43
-rw-r--r--contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp62
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.h4
-rw-r--r--contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.h181
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp523
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp68
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp1285
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp109
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp994
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/RenderMachineFunction.h4
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAG.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp137
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h10
-rw-r--r--contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp (renamed from contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp)137
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp1211
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp84
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp141
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp1056
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp33
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp426
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp49
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h34
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp66
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp325
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp51
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp1969
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp333
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h35
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp684
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp850
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h14
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp791
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp669
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp247
-rw-r--r--contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.h13
-rw-r--r--contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp446
-rw-r--r--contrib/llvm/lib/CodeGen/SlotIndexes.cpp33
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.cpp330
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.h108
-rw-r--r--contrib/llvm/lib/CodeGen/Spiller.cpp316
-rw-r--r--contrib/llvm/lib/CodeGen/Spiller.h12
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.cpp1491
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.h419
-rw-r--r--contrib/llvm/lib/CodeGen/Splitter.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/Splitter.h4
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp28
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp1694
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplication.cpp21
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp50
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp253
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp54
-rw-r--r--contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp165
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.h33
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp896
-rw-r--r--contrib/llvm/lib/CompilerDriver/Action.cpp26
-rw-r--r--contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp66
-rw-r--r--contrib/llvm/lib/CompilerDriver/Main.cpp9
-rw-r--r--contrib/llvm/lib/CompilerDriver/Tool.cpp6
-rw-r--r--contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp378
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp6
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp4
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp23
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp4
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp11
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp4
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/JIT/TargetSelect.cpp2
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp92
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h68
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile13
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/TargetSelect.cpp91
-rw-r--r--contrib/llvm/lib/Linker/LinkItems.cpp13
-rw-r--r--contrib/llvm/lib/Linker/LinkModules.cpp100
-rw-r--r--contrib/llvm/lib/Linker/Linker.cpp18
-rw-r--r--contrib/llvm/lib/MC/ELFObjectWriter.cpp1858
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfo.cpp7
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp9
-rw-r--r--contrib/llvm/lib/MC/MCAsmStreamer.cpp382
-rw-r--r--contrib/llvm/lib/MC/MCAssembler.cpp794
-rw-r--r--contrib/llvm/lib/MC/MCCodeEmitter.cpp12
-rw-r--r--contrib/llvm/lib/MC/MCContext.cpp117
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h8
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDInst.h2
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp23
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDOperand.h2
-rw-r--r--contrib/llvm/lib/MC/MCDisassembler/EDToken.h2
-rw-r--r--contrib/llvm/lib/MC/MCDwarf.cpp793
-rw-r--r--contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp23
-rw-r--r--contrib/llvm/lib/MC/MCELFStreamer.cpp350
-rw-r--r--contrib/llvm/lib/MC/MCExpr.cpp284
-rw-r--r--contrib/llvm/lib/MC/MCLoggingStreamer.cpp60
-rw-r--r--contrib/llvm/lib/MC/MCMachOStreamer.cpp355
-rw-r--r--contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp22
-rw-r--r--contrib/llvm/lib/MC/MCNullStreamer.cpp31
-rw-r--r--contrib/llvm/lib/MC/MCObjectStreamer.cpp187
-rw-r--r--contrib/llvm/lib/MC/MCObjectWriter.cpp65
-rw-r--r--contrib/llvm/lib/MC/MCParser/AsmLexer.cpp182
-rw-r--r--contrib/llvm/lib/MC/MCParser/AsmParser.cpp889
-rw-r--r--contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp144
-rw-r--r--contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp460
-rw-r--r--contrib/llvm/lib/MC/MCPureStreamer.cpp234
-rw-r--r--contrib/llvm/lib/MC/MCSectionCOFF.cpp8
-rw-r--r--contrib/llvm/lib/MC/MCSectionELF.cpp121
-rw-r--r--contrib/llvm/lib/MC/MCSectionMachO.cpp62
-rw-r--r--contrib/llvm/lib/MC/MCStreamer.cpp218
-rw-r--r--contrib/llvm/lib/MC/MCSymbol.cpp13
-rw-r--r--contrib/llvm/lib/MC/MachObjectWriter.cpp935
-rw-r--r--contrib/llvm/lib/MC/TargetAsmBackend.cpp25
-rw-r--r--contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp371
-rw-r--r--contrib/llvm/lib/MC/WinCOFFStreamer.cpp180
-rw-r--r--contrib/llvm/lib/Object/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Object/COFFObjectFile.cpp375
-rw-r--r--contrib/llvm/lib/Object/ELFObjectFile.cpp686
-rw-r--r--contrib/llvm/lib/Object/MachOObject.cpp342
-rw-r--r--contrib/llvm/lib/Object/Makefile14
-rw-r--r--contrib/llvm/lib/Object/ObjectFile.cpp71
-rw-r--r--contrib/llvm/lib/Support/APFloat.cpp28
-rw-r--r--contrib/llvm/lib/Support/APInt.cpp234
-rw-r--r--contrib/llvm/lib/Support/Allocator.cpp10
-rw-r--r--contrib/llvm/lib/Support/Atomic.cpp (renamed from contrib/llvm/lib/System/Atomic.cpp)2
-rw-r--r--contrib/llvm/lib/Support/CommandLine.cpp84
-rw-r--r--contrib/llvm/lib/Support/ConstantRange.cpp94
-rw-r--r--contrib/llvm/lib/Support/CrashRecoveryContext.cpp30
-rw-r--r--contrib/llvm/lib/Support/Debug.cpp2
-rw-r--r--contrib/llvm/lib/Support/Disassembler.cpp (renamed from contrib/llvm/lib/System/Disassembler.cpp)2
-rw-r--r--contrib/llvm/lib/Support/Dwarf.cpp4
-rw-r--r--contrib/llvm/lib/Support/DynamicLibrary.cpp (renamed from contrib/llvm/lib/System/DynamicLibrary.cpp)17
-rw-r--r--contrib/llvm/lib/Support/Errno.cpp (renamed from contrib/llvm/lib/System/Errno.cpp)4
-rw-r--r--contrib/llvm/lib/Support/ErrorHandling.cpp11
-rw-r--r--contrib/llvm/lib/Support/FileUtilities.cpp26
-rw-r--r--contrib/llvm/lib/Support/FoldingSet.cpp29
-rw-r--r--contrib/llvm/lib/Support/FormattedStream.cpp1
-rw-r--r--contrib/llvm/lib/Support/GraphWriter.cpp48
-rw-r--r--contrib/llvm/lib/Support/Host.cpp (renamed from contrib/llvm/lib/System/Host.cpp)56
-rw-r--r--contrib/llvm/lib/Support/IncludeFile.cpp (renamed from contrib/llvm/lib/System/IncludeFile.cpp)4
-rw-r--r--contrib/llvm/lib/Support/IntEqClasses.cpp70
-rw-r--r--contrib/llvm/lib/Support/IntervalMap.cpp161
-rw-r--r--contrib/llvm/lib/Support/ManagedStatic.cpp2
-rw-r--r--contrib/llvm/lib/Support/Memory.cpp (renamed from contrib/llvm/lib/System/Memory.cpp)8
-rw-r--r--contrib/llvm/lib/Support/MemoryBuffer.cpp113
-rw-r--r--contrib/llvm/lib/Support/Mutex.cpp (renamed from contrib/llvm/lib/System/Mutex.cpp)6
-rw-r--r--contrib/llvm/lib/Support/Path.cpp (renamed from contrib/llvm/lib/System/Path.cpp)67
-rw-r--r--contrib/llvm/lib/Support/PathV2.cpp774
-rw-r--r--contrib/llvm/lib/Support/PluginLoader.cpp4
-rw-r--r--contrib/llvm/lib/Support/PrettyStackTrace.cpp17
-rw-r--r--contrib/llvm/lib/Support/Process.cpp (renamed from contrib/llvm/lib/System/Process.cpp)4
-rw-r--r--contrib/llvm/lib/Support/Program.cpp (renamed from contrib/llvm/lib/System/Program.cpp)6
-rw-r--r--contrib/llvm/lib/Support/RWMutex.cpp (renamed from contrib/llvm/lib/System/RWMutex.cpp)4
-rw-r--r--contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp (renamed from contrib/llvm/lib/System/SearchForAddressOfSpecialSymbol.cpp)11
-rw-r--r--contrib/llvm/lib/Support/Signals.cpp (renamed from contrib/llvm/lib/System/Signals.cpp)4
-rw-r--r--contrib/llvm/lib/Support/SourceMgr.cpp31
-rw-r--r--contrib/llvm/lib/Support/Statistic.cpp2
-rw-r--r--contrib/llvm/lib/Support/StringMap.cpp2
-rw-r--r--contrib/llvm/lib/Support/StringRef.cpp73
-rw-r--r--contrib/llvm/lib/Support/SystemUtils.cpp40
-rw-r--r--contrib/llvm/lib/Support/TargetRegistry.cpp2
-rw-r--r--contrib/llvm/lib/Support/ThreadLocal.cpp (renamed from contrib/llvm/lib/System/ThreadLocal.cpp)5
-rw-r--r--contrib/llvm/lib/Support/Threading.cpp (renamed from contrib/llvm/lib/System/Threading.cpp)66
-rw-r--r--contrib/llvm/lib/Support/TimeValue.cpp (renamed from contrib/llvm/lib/System/TimeValue.cpp)5
-rw-r--r--contrib/llvm/lib/Support/Timer.cpp4
-rw-r--r--contrib/llvm/lib/Support/ToolOutputFile.cpp43
-rw-r--r--contrib/llvm/lib/Support/Triple.cpp106
-rw-r--r--contrib/llvm/lib/Support/Twine.cpp34
-rw-r--r--contrib/llvm/lib/Support/Unix/Host.inc (renamed from contrib/llvm/lib/System/Unix/Host.inc)11
-rw-r--r--contrib/llvm/lib/Support/Unix/Memory.inc (renamed from contrib/llvm/lib/System/Unix/Memory.inc)12
-rw-r--r--contrib/llvm/lib/Support/Unix/Mutex.inc (renamed from contrib/llvm/lib/System/Unix/Mutex.inc)10
-rw-r--r--contrib/llvm/lib/Support/Unix/Path.inc (renamed from contrib/llvm/lib/System/Unix/Path.inc)150
-rw-r--r--contrib/llvm/lib/Support/Unix/PathV2.inc507
-rw-r--r--contrib/llvm/lib/Support/Unix/Process.inc (renamed from contrib/llvm/lib/System/Unix/Process.inc)26
-rw-r--r--contrib/llvm/lib/Support/Unix/Program.inc (renamed from contrib/llvm/lib/System/Unix/Program.inc)68
-rw-r--r--contrib/llvm/lib/Support/Unix/README.txt16
-rw-r--r--contrib/llvm/lib/Support/Unix/RWMutex.inc (renamed from contrib/llvm/lib/System/Unix/RWMutex.inc)6
-rw-r--r--contrib/llvm/lib/Support/Unix/Signals.inc (renamed from contrib/llvm/lib/System/Unix/Signals.inc)24
-rw-r--r--contrib/llvm/lib/Support/Unix/ThreadLocal.inc (renamed from contrib/llvm/lib/System/Unix/ThreadLocal.inc)6
-rw-r--r--contrib/llvm/lib/Support/Unix/TimeValue.inc (renamed from contrib/llvm/lib/System/Unix/TimeValue.inc)12
-rw-r--r--contrib/llvm/lib/Support/Unix/Unix.h (renamed from contrib/llvm/lib/System/Unix/Unix.h)4
-rw-r--r--contrib/llvm/lib/Support/Unix/system_error.inc34
-rw-r--r--contrib/llvm/lib/Support/Valgrind.cpp (renamed from contrib/llvm/lib/System/Valgrind.cpp)2
-rw-r--r--contrib/llvm/lib/Support/Windows/DynamicLibrary.inc (renamed from contrib/llvm/lib/System/Win32/DynamicLibrary.inc)104
-rw-r--r--contrib/llvm/lib/Support/Windows/Host.inc (renamed from contrib/llvm/lib/System/Win32/Host.inc)4
-rw-r--r--contrib/llvm/lib/Support/Windows/Memory.inc (renamed from contrib/llvm/lib/System/Win32/Memory.inc)12
-rw-r--r--contrib/llvm/lib/Support/Windows/Mutex.inc (renamed from contrib/llvm/lib/System/Win32/Mutex.inc)16
-rw-r--r--contrib/llvm/lib/Support/Windows/Path.inc (renamed from contrib/llvm/lib/System/Win32/Path.inc)129
-rw-r--r--contrib/llvm/lib/Support/Windows/PathV2.inc750
-rw-r--r--contrib/llvm/lib/Support/Windows/Process.inc (renamed from contrib/llvm/lib/System/Win32/Process.inc)17
-rw-r--r--contrib/llvm/lib/Support/Windows/Program.inc (renamed from contrib/llvm/lib/System/Win32/Program.inc)22
-rw-r--r--contrib/llvm/lib/Support/Windows/RWMutex.inc (renamed from contrib/llvm/lib/System/Win32/RWMutex.inc)10
-rw-r--r--contrib/llvm/lib/Support/Windows/Signals.inc (renamed from contrib/llvm/lib/System/Win32/Signals.inc)172
-rw-r--r--contrib/llvm/lib/Support/Windows/ThreadLocal.inc (renamed from contrib/llvm/lib/System/Win32/ThreadLocal.inc)11
-rw-r--r--contrib/llvm/lib/Support/Windows/TimeValue.inc (renamed from contrib/llvm/lib/System/Win32/TimeValue.inc)6
-rw-r--r--contrib/llvm/lib/Support/Windows/Windows.h120
-rw-r--r--contrib/llvm/lib/Support/Windows/explicit_symbols.inc66
-rw-r--r--contrib/llvm/lib/Support/Windows/system_error.inc142
-rw-r--r--contrib/llvm/lib/Support/raw_ostream.cpp103
-rw-r--r--contrib/llvm/lib/Support/regexec.c5
-rw-r--r--contrib/llvm/lib/Support/system_error.cpp130
-rw-r--r--contrib/llvm/lib/System/Alarm.cpp33
-rw-r--r--contrib/llvm/lib/System/Unix/Alarm.inc72
-rw-r--r--contrib/llvm/lib/System/Win32/Alarm.inc43
-rw-r--r--contrib/llvm/lib/System/Win32/Win32.h57
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.h114
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td58
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAddressingModes.h12
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp512
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp2225
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h112
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInfo.h249
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp1305
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h171
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp948
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h65
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h73
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.h160
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td29
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp368
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp27
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp26
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h43
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp83
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h58
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp1227
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp1670
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFixupKinds.h97
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameInfo.h32
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp1021
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.h74
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp69
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp121
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h54
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp1823
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp2278
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h88
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrFormats.td1191
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp33
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.h5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td3554
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td3650
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td1661
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td2725
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td1146
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp13
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMJITInfo.h2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp519
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp1230
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp73
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCExpr.h73
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp147
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h60
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h13122
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td90
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSchedule.td140
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA8.td862
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleA9.td1799
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleV6.td130
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h6
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp119
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h48
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp62
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.h36
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp19
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h11
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp192
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp1530
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp49
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp259
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h298
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp (renamed from contrib/llvm/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp)457
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h (renamed from contrib/llvm/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h)69
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/Makefile15
-rw-r--r--contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp321
-rw-r--r--contrib/llvm/lib/Target/ARM/NEONPreAllocPass.cpp406
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp352
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h52
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp84
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h17
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp332
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h5
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2HazardRecognizer.cpp53
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2HazardRecognizer.h40
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp44
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h8
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp1
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp133
-rw-r--r--contrib/llvm/lib/Target/Alpha/Alpha.h7
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp (renamed from contrib/llvm/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp)2
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaCodeEmitter.cpp222
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp143
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h43
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp19
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp87
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h5
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td6
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaJITInfo.cpp310
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaJITInfo.h53
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp152
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h10
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaSchedule.td4
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp10
-rw-r--r--contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h20
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp (renamed from contrib/llvm/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp)0
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp124
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h46
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp6
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp61
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h6
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td8
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp106
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h8
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td20
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp2
-rw-r--r--contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h17
-rw-r--r--contrib/llvm/lib/Target/CBackend/CBackend.cpp337
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU.h1
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td79
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp (renamed from contrib/llvm/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp)37
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUFrameInfo.cpp29
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp276
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h (renamed from contrib/llvm/lib/Target/CellSPU/SPUFrameInfo.h)35
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp4
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h4
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp223
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp787
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h23
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp15
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h4
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td396
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUMCAsmInfo.cpp3
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUNodes.td18
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp153
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUOperands.td18
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp264
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h16
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSchedule.td8
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp21
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h6
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp13
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h15
-rw-r--r--contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp37
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt8
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp127
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp568
-rw-r--r--contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile15
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt16
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp647
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h55
-rw-r--r--contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile16
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt8
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp69
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h43
-rw-r--r--contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile16
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlaze.h8
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlaze.td41
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp163
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp (renamed from contrib/llvm/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp)156
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td14
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp191
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp111
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h58
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp450
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h53
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp87
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp720
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h46
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td253
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td326
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td272
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp179
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h166
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td927
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td6
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMCAsmInfo.h4
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp223
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp166
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h (renamed from contrib/llvm/lib/Target/ARM/ARMMCInstLower.h)30
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h86
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp343
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h20
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td140
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h47
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td4
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp66
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h33
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp9
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h7
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp (renamed from contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp)3
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h (renamed from contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h)0
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile15
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430.td1
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp (renamed from contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp)2
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp223
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h53
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp17
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp22
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp52
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h9
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td16
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp (renamed from contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp)0
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h (renamed from contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h)0
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp170
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h6
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td8
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp14
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h12
-rw-r--r--contrib/llvm/lib/Target/Mangler.cpp10
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.td30
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp (renamed from contrib/llvm/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp)25
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp13
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp314
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFrameLowering.h48
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp28
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp620
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.h18
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFPU.td2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.td355
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMachineFunction.h34
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp287
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h5
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSchedule.td2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.h4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp20
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.h21
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp29
-rw-r--r--contrib/llvm/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp512
-rw-r--r--contrib/llvm/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h88
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16.h134
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16.td40
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16ABINames.h399
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16DebugInfo.cpp490
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16DebugInfo.h156
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp50
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16ISelDAGToDAG.h60
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16ISelLowering.cpp2000
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16ISelLowering.h253
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16InstrFormats.td117
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16InstrInfo.cpp224
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16InstrInfo.h76
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16InstrInfo.td540
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16MCAsmInfo.cpp59
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16MCAsmInfo.h35
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16MachineFunctionInfo.h52
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16MemSelOpt.cpp254
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp299
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h83
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp182
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h60
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16RegisterInfo.cpp84
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16RegisterInfo.h64
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16RegisterInfo.td33
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16Section.cpp104
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16Section.h99
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16SelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16Subtarget.cpp27
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16Subtarget.h44
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16TargetMachine.cpp55
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16TargetMachine.h70
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16TargetObjectFile.cpp384
-rw-r--r--contrib/llvm/lib/Target/PIC16/PIC16TargetObjectFile.h168
-rw-r--r--contrib/llvm/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp22
-rw-r--r--contrib/llvm/lib/Target/PTX/CMakeLists.txt26
-rw-r--r--contrib/llvm/lib/Target/PTX/Makefile26
-rw-r--r--contrib/llvm/lib/Target/PTX/PTX.h49
-rw-r--r--contrib/llvm/lib/Target/PTX/PTX.td54
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp347
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp24
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXFrameLowering.h43
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp151
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp210
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelLowering.h67
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrFormats.td24
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp87
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.h75
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.td257
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMCAsmInfo.cpp30
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMCAsmInfo.h28
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp542
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp96
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h79
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp (renamed from contrib/llvm/lib/Target/TargetFrameInfo.cpp)12
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h63
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td102
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp23
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSubtarget.h32
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp60
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXTargetMachine.h60
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt7
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/Makefile15
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp21
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp292
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h69
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h62
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp119
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp (renamed from contrib/llvm/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp)452
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp253
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h45
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp971
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h (renamed from contrib/llvm/lib/Target/PowerPC/PPCFrameInfo.h)50
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp56
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h20
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp210
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp731
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td57
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td39
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp81
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h26
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td177
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCAsmInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp195
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp172
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp975
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h19
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td13
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h18
-rw-r--r--contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp230
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp (renamed from contrib/llvm/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp)6
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcCallingConv.td10
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp80
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h41
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp18
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp721
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.h3
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp195
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h11
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td221
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h11
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp53
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h9
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td3
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp6
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h15
-rw-r--r--contrib/llvm/lib/Target/SubtargetFeature.cpp3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp (renamed from contrib/llvm/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp)6
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp386
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h57
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp31
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp18
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h6
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp150
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h10
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td56
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMCAsmInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperands.td15
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp214
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h12
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td48
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h12
-rw-r--r--contrib/llvm/lib/Target/Target.cpp15
-rw-r--r--contrib/llvm/lib/Target/TargetAsmInfo.cpp27
-rw-r--r--contrib/llvm/lib/Target/TargetData.cpp58
-rw-r--r--contrib/llvm/lib/Target/TargetELFWriterInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/TargetFrameLowering.cpp53
-rw-r--r--contrib/llvm/lib/Target/TargetInstrInfo.cpp93
-rw-r--r--contrib/llvm/lib/Target/TargetLibraryInfo.cpp55
-rw-r--r--contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp8
-rw-r--r--contrib/llvm/lib/Target/TargetMachine.cpp4
-rw-r--r--contrib/llvm/lib/Target/TargetRegisterInfo.cpp43
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp9
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp437
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp15
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h2
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c31
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h4
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h3
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt8
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/Makefile15
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp (renamed from contrib/llvm/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp)2
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h (renamed from contrib/llvm/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h)0
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp (renamed from contrib/llvm/lib/Target/X86/AsmPrinter/X86InstComments.cpp)2
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h (renamed from contrib/llvm/lib/Target/X86/AsmPrinter/X86InstComments.h)0
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp (renamed from contrib/llvm/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp)3
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h (renamed from contrib/llvm/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h)0
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt6
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/Makefile15
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp (renamed from contrib/llvm/lib/Target/X86/X86ShuffleDecode.h)53
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h69
-rw-r--r--contrib/llvm/lib/Target/X86/X86.h10
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td28
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmBackend.cpp270
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp97
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.td67
-rw-r--r--contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp21
-rw-r--r--contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp55
-rw-r--r--contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h19
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp300
-rw-r--r--contrib/llvm/lib/Target/X86/X86FixupKinds.h16
-rw-r--r--contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp129
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp994
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.h65
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp200
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp3146
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h243
-rw-r--r--contrib/llvm/lib/Target/X86/X86Instr3DNow.td77
-rw-r--r--contrib/llvm/lib/Target/X86/X86Instr64bit.td2250
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrArithmetic.td1125
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrBuilder.h37
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td104
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCompiler.td1626
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrControl.td294
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrExtension.td172
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFPStack.td82
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFormats.td24
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td107
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp448
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h84
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td4842
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMMX.td607
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td571
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td746
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSystem.td390
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrVMX.td54
-rw-r--r--contrib/llvm/lib/Target/X86/X86JITInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp15
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp149
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.cpp117
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.h2
-rw-r--r--contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp32
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp955
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.h17
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.td100
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp52
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h9
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp18
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h36
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.cpp55
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.h75
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp (renamed from contrib/llvm/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp)0
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreCallingConv.td3
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameInfo.cpp27
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameInfo.h34
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp387
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h59
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp21
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp172
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.h1
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp66
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h9
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td76
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp284
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h11
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td4
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h8
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp49
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp117
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp86
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp76
-rw-r--r--contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp30
-rw-r--r--contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp141
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp830
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/IPO/IPO.cpp38
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Inliner.cpp56
-rw-r--r--contrib/llvm/lib/Transforms/IPO/Internalize.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp646
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp216
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PruneEH.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombine.h28
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp350
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp597
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp288
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp35
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp772
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp315
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp79
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp294
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp116
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp100
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp272
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp604
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp32
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp1423
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp22
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h7
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ADCE.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp11
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp369
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp86
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DCE.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp847
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp470
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp813
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp49
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp998
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LICM.cpp324
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp26
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp594
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp1270
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp170
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp491
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp200
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp57
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp60
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp146
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp729
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp38
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SCCP.cpp39
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Scalar.cpp55
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp1155
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp6
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp342
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Sink.cpp20
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp18
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp139
-rw-r--r--contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp28
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp157
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp56
-rw-r--r--contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp54
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp40
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp45
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneModule.cpp8
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp4
-rw-r--r--contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp173
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LCSSA.cpp12
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp148
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp69
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp38
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp27
-rw-r--r--contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp209
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp171
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp2003
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp94
-rw-r--r--contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp37
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp178
-rw-r--r--contrib/llvm/lib/VMCore/AsmWriter.cpp40
-rw-r--r--contrib/llvm/lib/VMCore/Attributes.cpp33
-rw-r--r--contrib/llvm/lib/VMCore/AutoUpgrade.cpp569
-rw-r--r--contrib/llvm/lib/VMCore/BasicBlock.cpp9
-rw-r--r--contrib/llvm/lib/VMCore/ConstantFold.cpp159
-rw-r--r--contrib/llvm/lib/VMCore/ConstantFold.h2
-rw-r--r--contrib/llvm/lib/VMCore/Constants.cpp382
-rw-r--r--contrib/llvm/lib/VMCore/ConstantsContext.h30
-rw-r--r--contrib/llvm/lib/VMCore/Core.cpp234
-rw-r--r--contrib/llvm/lib/VMCore/Dominators.cpp275
-rw-r--r--contrib/llvm/lib/VMCore/Function.cpp21
-rw-r--r--contrib/llvm/lib/VMCore/Globals.cpp49
-rw-r--r--contrib/llvm/lib/VMCore/IRBuilder.cpp81
-rw-r--r--contrib/llvm/lib/VMCore/InlineAsm.cpp79
-rw-r--r--contrib/llvm/lib/VMCore/Instruction.cpp36
-rw-r--r--contrib/llvm/lib/VMCore/Instructions.cpp212
-rw-r--r--contrib/llvm/lib/VMCore/LLVMContext.cpp42
-rw-r--r--contrib/llvm/lib/VMCore/LLVMContextImpl.cpp13
-rw-r--r--contrib/llvm/lib/VMCore/LLVMContextImpl.h8
-rw-r--r--contrib/llvm/lib/VMCore/LeakDetector.cpp4
-rw-r--r--contrib/llvm/lib/VMCore/Metadata.cpp13
-rw-r--r--contrib/llvm/lib/VMCore/Module.cpp2
-rw-r--r--contrib/llvm/lib/VMCore/Pass.cpp1
-rw-r--r--contrib/llvm/lib/VMCore/PassManager.cpp128
-rw-r--r--contrib/llvm/lib/VMCore/PassRegistry.cpp173
-rw-r--r--contrib/llvm/lib/VMCore/PrintModulePass.cpp4
-rw-r--r--contrib/llvm/lib/VMCore/Type.cpp30
-rw-r--r--contrib/llvm/lib/VMCore/TypesContext.h2
-rw-r--r--contrib/llvm/lib/VMCore/Use.cpp122
-rw-r--r--contrib/llvm/lib/VMCore/User.cpp81
-rw-r--r--contrib/llvm/lib/VMCore/Value.cpp97
-rw-r--r--contrib/llvm/lib/VMCore/ValueTypes.cpp5
-rw-r--r--contrib/llvm/lib/VMCore/Verifier.cpp32
955 files changed, 115610 insertions, 73499 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index 1f2528f..be02ddb 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -30,12 +30,13 @@
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Type.h"
#include "llvm/Target/TargetData.h"
using namespace llvm;
// Register the AliasAnalysis interface, providing a nice name to refer to.
-static RegisterAnalysisGroup<AliasAnalysis> Z("Alias Analysis");
+INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA)
char AliasAnalysis::ID = 0;
//===----------------------------------------------------------------------===//
@@ -43,15 +44,15 @@ char AliasAnalysis::ID = 0;
//===----------------------------------------------------------------------===//
AliasAnalysis::AliasResult
-AliasAnalysis::alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size) {
+AliasAnalysis::alias(const Location &LocA, const Location &LocB) {
assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- return AA->alias(V1, V1Size, V2, V2Size);
+ return AA->alias(LocA, LocB);
}
-bool AliasAnalysis::pointsToConstantMemory(const Value *P) {
+bool AliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- return AA->pointsToConstantMemory(P);
+ return AA->pointsToConstantMemory(Loc, OrLocal);
}
void AliasAnalysis::deleteValue(Value *V) {
@@ -64,49 +65,55 @@ void AliasAnalysis::copyValue(Value *From, Value *To) {
AA->copyValue(From, To);
}
+void AliasAnalysis::addEscapingUse(Use &U) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ AA->addEscapingUse(U);
+}
+
+
AliasAnalysis::ModRefResult
AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
- const Value *P, unsigned Size) {
- // Don't assert AA because BasicAA calls us in order to make use of the
- // logic here.
+ const Location &Loc) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
ModRefBehavior MRB = getModRefBehavior(CS);
if (MRB == DoesNotAccessMemory)
return NoModRef;
ModRefResult Mask = ModRef;
- if (MRB == OnlyReadsMemory)
+ if (onlyReadsMemory(MRB))
Mask = Ref;
- else if (MRB == AliasAnalysis::AccessesArguments) {
+
+ if (onlyAccessesArgPointees(MRB)) {
bool doesAlias = false;
- for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
- AI != AE; ++AI)
- if (!isNoAlias(*AI, ~0U, P, Size)) {
- doesAlias = true;
- break;
- }
+ if (doesAccessArgPointees(MRB))
+ for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI)
+ if (!isNoAlias(Location(*AI), Loc)) {
+ doesAlias = true;
+ break;
+ }
if (!doesAlias)
return NoModRef;
}
- // If P points to a constant memory location, the call definitely could not
+ // If Loc is a constant memory location, the call definitely could not
// modify the memory location.
- if ((Mask & Mod) && pointsToConstantMemory(P))
+ if ((Mask & Mod) && pointsToConstantMemory(Loc))
Mask = ModRefResult(Mask & ~Mod);
- // If this is BasicAA, don't forward.
+ // If this is the end of the chain, don't forward.
if (!AA) return Mask;
// Otherwise, fall back to the next AA in the chain. But we can merge
// in any mask we've managed to compute.
- return ModRefResult(AA->getModRefInfo(CS, P, Size) & Mask);
+ return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask);
}
AliasAnalysis::ModRefResult
AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
- // Don't assert AA because BasicAA calls us in order to make use of the
- // logic here.
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
// If CS1 or CS2 are readnone, they don't interact.
ModRefBehavior CS1B = getModRefBehavior(CS1);
@@ -116,45 +123,47 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
if (CS2B == DoesNotAccessMemory) return NoModRef;
// If they both only read from memory, there is no dependence.
- if (CS1B == OnlyReadsMemory && CS2B == OnlyReadsMemory)
+ if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
return NoModRef;
AliasAnalysis::ModRefResult Mask = ModRef;
// If CS1 only reads memory, the only dependence on CS2 can be
// from CS1 reading memory written by CS2.
- if (CS1B == OnlyReadsMemory)
+ if (onlyReadsMemory(CS1B))
Mask = ModRefResult(Mask & Ref);
// If CS2 only access memory through arguments, accumulate the mod/ref
// information from CS1's references to the memory referenced by
// CS2's arguments.
- if (CS2B == AccessesArguments) {
+ if (onlyAccessesArgPointees(CS2B)) {
AliasAnalysis::ModRefResult R = NoModRef;
- for (ImmutableCallSite::arg_iterator
- I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
- R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask);
- if (R == Mask)
- break;
- }
+ if (doesAccessArgPointees(CS2B))
+ for (ImmutableCallSite::arg_iterator
+ I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+ R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask);
+ if (R == Mask)
+ break;
+ }
return R;
}
// If CS1 only accesses memory through arguments, check if CS2 references
// any of the memory referenced by CS1's arguments. If not, return NoModRef.
- if (CS1B == AccessesArguments) {
+ if (onlyAccessesArgPointees(CS1B)) {
AliasAnalysis::ModRefResult R = NoModRef;
- for (ImmutableCallSite::arg_iterator
- I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I)
- if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) {
- R = Mask;
- break;
- }
+ if (doesAccessArgPointees(CS1B))
+ for (ImmutableCallSite::arg_iterator
+ I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I)
+ if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) {
+ R = Mask;
+ break;
+ }
if (R == NoModRef)
return R;
}
- // If this is BasicAA, don't forward.
+ // If this is the end of the chain, don't forward.
if (!AA) return Mask;
// Otherwise, fall back to the next AA in the chain. But we can merge
@@ -164,8 +173,7 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
AliasAnalysis::ModRefBehavior
AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
- // Don't assert AA because BasicAA calls us in order to make use of the
- // logic here.
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
ModRefBehavior Min = UnknownModRefBehavior;
@@ -174,12 +182,12 @@ AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
if (const Function *F = CS.getCalledFunction())
Min = getModRefBehavior(F);
- // If this is BasicAA, don't forward.
+ // If this is the end of the chain, don't forward.
if (!AA) return Min;
// Otherwise, fall back to the next AA in the chain. But we can merge
// in any result we've managed to compute.
- return std::min(AA->getModRefBehavior(CS), Min);
+ return ModRefBehavior(AA->getModRefBehavior(CS) & Min);
}
AliasAnalysis::ModRefBehavior
@@ -188,20 +196,66 @@ AliasAnalysis::getModRefBehavior(const Function *F) {
return AA->getModRefBehavior(F);
}
-
//===----------------------------------------------------------------------===//
// AliasAnalysis non-virtual helper method implementation
//===----------------------------------------------------------------------===//
+AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) {
+ return Location(LI->getPointerOperand(),
+ getTypeStoreSize(LI->getType()),
+ LI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) {
+ return Location(SI->getPointerOperand(),
+ getTypeStoreSize(SI->getValueOperand()->getType()),
+ SI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
+ return Location(VI->getPointerOperand(),
+ UnknownSize,
+ VI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+
+AliasAnalysis::Location
+AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
+ uint64_t Size = UnknownSize;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ Size = C->getValue().getZExtValue();
+
+ // memcpy/memmove can have TBAA tags. For memcpy, they apply
+ // to both the source and the destination.
+ MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+
+ return Location(MTI->getRawSource(), Size, TBAATag);
+}
+
+AliasAnalysis::Location
+AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
+ uint64_t Size = UnknownSize;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ Size = C->getValue().getZExtValue();
+
+ // memcpy/memmove can have TBAA tags. For memcpy, they apply
+ // to both the source and the destination.
+ MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+
+ return Location(MTI->getRawDest(), Size, TBAATag);
+}
+
+
+
AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const LoadInst *L, const Value *P, unsigned Size) {
+AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
// Be conservative in the face of volatile.
if (L->isVolatile())
return ModRef;
// If the load address doesn't alias the given address, it doesn't read
// or write the specified memory.
- if (!alias(L->getOperand(0), getTypeStoreSize(L->getType()), P, Size))
+ if (!alias(getLocation(L), Loc))
return NoModRef;
// Otherwise, a load just reads.
@@ -209,20 +263,19 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Value *P, unsigned Size) {
}
AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const StoreInst *S, const Value *P, unsigned Size) {
+AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
// Be conservative in the face of volatile.
if (S->isVolatile())
return ModRef;
// If the store address cannot alias the pointer in question, then the
// specified memory cannot be modified by the store.
- if (!alias(S->getOperand(1),
- getTypeStoreSize(S->getOperand(0)->getType()), P, Size))
+ if (!alias(getLocation(S), Loc))
return NoModRef;
// If the pointer is a pointer to constant memory, then it could not have been
// modified by this store.
- if (pointsToConstantMemory(P))
+ if (pointsToConstantMemory(Loc))
return NoModRef;
// Otherwise, a store just writes.
@@ -230,29 +283,21 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Value *P, unsigned Size)
}
AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const VAArgInst *V, const Value *P, unsigned Size) {
+AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
// If the va_arg address cannot alias the pointer in question, then the
// specified memory cannot be accessed by the va_arg.
- if (!alias(V->getOperand(0), UnknownSize, P, Size))
+ if (!alias(getLocation(V), Loc))
return NoModRef;
// If the pointer is a pointer to constant memory, then it could not have been
// modified by this va_arg.
- if (pointsToConstantMemory(P))
+ if (pointsToConstantMemory(Loc))
return NoModRef;
// Otherwise, a va_arg reads and writes.
return ModRef;
}
-
-AliasAnalysis::ModRefBehavior
-AliasAnalysis::getIntrinsicModRefBehavior(unsigned iid) {
-#define GET_INTRINSIC_MODREF_BEHAVIOR
-#include "llvm/Intrinsics.gen"
-#undef GET_INTRINSIC_MODREF_BEHAVIOR
-}
-
// AliasAnalysis destructor: DO NOT move this to the header file for
// AliasAnalysis or else clients of the AliasAnalysis class may not depend on
// the AliasAnalysis.o file in the current .a file, causing alias analysis
@@ -277,16 +322,16 @@ void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
/// getTypeStoreSize - Return the TargetData store size for the given type,
/// if known, or a conservative value otherwise.
///
-unsigned AliasAnalysis::getTypeStoreSize(const Type *Ty) {
- return TD ? TD->getTypeStoreSize(Ty) : ~0u;
+uint64_t AliasAnalysis::getTypeStoreSize(const Type *Ty) {
+ return TD ? TD->getTypeStoreSize(Ty) : UnknownSize;
}
/// canBasicBlockModify - Return true if it is possible for execution of the
/// specified basic block to modify the value pointed to by Ptr.
///
bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
- const Value *Ptr, unsigned Size) {
- return canInstructionRangeModify(BB.front(), BB.back(), Ptr, Size);
+ const Location &Loc) {
+ return canInstructionRangeModify(BB.front(), BB.back(), Loc);
}
/// canInstructionRangeModify - Return true if it is possible for the execution
@@ -296,7 +341,7 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
///
bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
const Instruction &I2,
- const Value *Ptr, unsigned Size) {
+ const Location &Loc) {
assert(I1.getParent() == I2.getParent() &&
"Instructions not in same basic block!");
BasicBlock::const_iterator I = &I1;
@@ -304,7 +349,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
++E; // Convert from inclusive to exclusive range.
for (; I != E; ++I) // Check every instruction in range
- if (getModRefInfo(I, Ptr, Size) & Mod)
+ if (getModRefInfo(I, Loc) & Mod)
return true;
return false;
}
@@ -336,9 +381,3 @@ bool llvm::isIdentifiedObject(const Value *V) {
return A->hasNoAliasAttr() || A->hasByValAttr();
return false;
}
-
-// Because of the way .a files work, we must force the BasicAA implementation to
-// be pulled in if the AliasAnalysis classes are pulled in. Otherwise we run
-// the risk of AliasAnalysis being used, but the default implementation not
-// being linked into the tool that uses it.
-DEFINING_FILE_FOR(AliasAnalysis)
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
index b178041..d947220 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
@@ -29,13 +29,14 @@ PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
namespace {
class AliasAnalysisCounter : public ModulePass, public AliasAnalysis {
- unsigned No, May, Must;
+ unsigned No, May, Partial, Must;
unsigned NoMR, JustRef, JustMod, MR;
Module *M;
public:
static char ID; // Class identification, replacement for typeinfo
AliasAnalysisCounter() : ModulePass(ID) {
- No = May = Must = 0;
+ initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry());
+ No = May = Partial = Must = 0;
NoMR = JustRef = JustMod = MR = 0;
}
@@ -44,7 +45,7 @@ namespace {
<< Val*100/Sum << "%)\n";
}
~AliasAnalysisCounter() {
- unsigned AASum = No+May+Must;
+ unsigned AASum = No+May+Partial+Must;
unsigned MRSum = NoMR+JustRef+JustMod+MR;
if (AASum + MRSum) { // Print a report if any counted queries occurred...
errs() << "\n===== Alias Analysis Counter Report =====\n"
@@ -53,9 +54,12 @@ namespace {
if (AASum) {
printLine("no alias", No, AASum);
printLine("may alias", May, AASum);
+ printLine("partial alias", Partial, AASum);
printLine("must alias", Must, AASum);
errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/"
- << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n";
+ << May*100/AASum << "%/"
+ << Partial*100/AASum << "%/"
+ << Must*100/AASum<<"%\n\n";
}
errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n";
@@ -94,17 +98,16 @@ namespace {
}
// FIXME: We could count these too...
- bool pointsToConstantMemory(const Value *P) {
- return getAnalysis<AliasAnalysis>().pointsToConstantMemory(P);
+ bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+ return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal);
}
// Forwarding functions: just delegate to a real AA implementation, counting
// the number of responses...
- AliasResult alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size);
+ AliasResult alias(const Location &LocA, const Location &LocB);
ModRefResult getModRefInfo(ImmutableCallSite CS,
- const Value *P, unsigned Size);
+ const Location &Loc);
ModRefResult getModRefInfo(ImmutableCallSite CS1,
ImmutableCallSite CS2) {
return AliasAnalysis::getModRefInfo(CS1,CS2);
@@ -114,32 +117,32 @@ namespace {
char AliasAnalysisCounter::ID = 0;
INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
- "Count Alias Analysis Query Responses", false, true, false);
+ "Count Alias Analysis Query Responses", false, true, false)
ModulePass *llvm::createAliasAnalysisCounterPass() {
return new AliasAnalysisCounter();
}
AliasAnalysis::AliasResult
-AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size) {
- AliasResult R = getAnalysis<AliasAnalysis>().alias(V1, V1Size, V2, V2Size);
+AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
+ AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
const char *AliasString;
switch (R) {
default: llvm_unreachable("Unknown alias type!");
case NoAlias: No++; AliasString = "No alias"; break;
case MayAlias: May++; AliasString = "May alias"; break;
+ case PartialAlias: Partial++; AliasString = "Partial alias"; break;
case MustAlias: Must++; AliasString = "Must alias"; break;
}
if (PrintAll || (PrintAllFailures && R == MayAlias)) {
errs() << AliasString << ":\t";
- errs() << "[" << V1Size << "B] ";
- WriteAsOperand(errs(), V1, true, M);
+ errs() << "[" << LocA.Size << "B] ";
+ WriteAsOperand(errs(), LocA.Ptr, true, M);
errs() << ", ";
- errs() << "[" << V2Size << "B] ";
- WriteAsOperand(errs(), V2, true, M);
+ errs() << "[" << LocB.Size << "B] ";
+ WriteAsOperand(errs(), LocB.Ptr, true, M);
errs() << "\n";
}
@@ -148,8 +151,8 @@ AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size,
AliasAnalysis::ModRefResult
AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
- const Value *P, unsigned Size) {
- ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, P, Size);
+ const Location &Loc) {
+ ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
const char *MRString;
switch (R) {
@@ -162,8 +165,8 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
if (PrintAll || (PrintAllFailures && R == ModRef)) {
errs() << MRString << ": Ptr: ";
- errs() << "[" << Size << "B] ";
- WriteAsOperand(errs(), P, true, M);
+ errs() << "[" << Loc.Size << "B] ";
+ WriteAsOperand(errs(), Loc.Ptr, true, M);
errs() << "\t<->" << *CS.getInstruction() << '\n';
}
return R;
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index ce363cb..1afc1b7 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -36,6 +36,7 @@ static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden);
static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintPartialAlias("print-partial-aliases", cl::ReallyHidden);
static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden);
static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden);
@@ -45,12 +46,14 @@ static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
namespace {
class AAEval : public FunctionPass {
- unsigned NoAlias, MayAlias, MustAlias;
+ unsigned NoAlias, MayAlias, PartialAlias, MustAlias;
unsigned NoModRef, Mod, Ref, ModRef;
public:
static char ID; // Pass identification, replacement for typeid
- AAEval() : FunctionPass(ID) {}
+ AAEval() : FunctionPass(ID) {
+ initializeAAEvalPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
@@ -58,11 +61,12 @@ namespace {
}
bool doInitialization(Module &M) {
- NoAlias = MayAlias = MustAlias = 0;
+ NoAlias = MayAlias = PartialAlias = MustAlias = 0;
NoModRef = Mod = Ref = ModRef = 0;
if (PrintAll) {
- PrintNoAlias = PrintMayAlias = PrintMustAlias = true;
+ PrintNoAlias = PrintMayAlias = true;
+ PrintPartialAlias = PrintMustAlias = true;
PrintNoModRef = PrintMod = PrintRef = PrintModRef = true;
}
return false;
@@ -74,8 +78,11 @@ namespace {
}
char AAEval::ID = 0;
-INITIALIZE_PASS(AAEval, "aa-eval",
- "Exhaustive Alias Analysis Precision Evaluator", false, true);
+INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
+ "Exhaustive Alias Analysis Precision Evaluator", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AAEval, "aa-eval",
+ "Exhaustive Alias Analysis Precision Evaluator", false, true)
FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
@@ -155,7 +162,7 @@ bool AAEval::runOnFunction(Function &F) {
}
}
- if (PrintNoAlias || PrintMayAlias || PrintMustAlias ||
+ if (PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias ||
PrintNoModRef || PrintMod || PrintRef || PrintModRef)
errs() << "Function: " << F.getName() << ": " << Pointers.size()
<< " pointers, " << CallSites.size() << " call sites\n";
@@ -163,12 +170,12 @@ bool AAEval::runOnFunction(Function &F) {
// iterate over the worklist, and run the full (n^2)/2 disambiguations
for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
I1 != E; ++I1) {
- unsigned I1Size = ~0u;
+ uint64_t I1Size = AliasAnalysis::UnknownSize;
const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
- unsigned I2Size = ~0u;
+ uint64_t I2Size = AliasAnalysis::UnknownSize;
const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
@@ -179,6 +186,10 @@ bool AAEval::runOnFunction(Function &F) {
case AliasAnalysis::MayAlias:
PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent());
++MayAlias; break;
+ case AliasAnalysis::PartialAlias:
+ PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+ F.getParent());
+ ++PartialAlias; break;
case AliasAnalysis::MustAlias:
PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
++MustAlias; break;
@@ -195,7 +206,7 @@ bool AAEval::runOnFunction(Function &F) {
for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
V != Ve; ++V) {
- unsigned Size = ~0u;
+ uint64_t Size = AliasAnalysis::UnknownSize;
const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
@@ -250,7 +261,7 @@ static void PrintPercent(unsigned Num, unsigned Sum) {
}
bool AAEval::doFinalization(Module &M) {
- unsigned AliasSum = NoAlias + MayAlias + MustAlias;
+ unsigned AliasSum = NoAlias + MayAlias + PartialAlias + MustAlias;
errs() << "===== Alias Analysis Evaluator Report =====\n";
if (AliasSum == 0) {
errs() << " Alias Analysis Evaluator Summary: No pointers!\n";
@@ -260,10 +271,13 @@ bool AAEval::doFinalization(Module &M) {
PrintPercent(NoAlias, AliasSum);
errs() << " " << MayAlias << " may alias responses ";
PrintPercent(MayAlias, AliasSum);
+ errs() << " " << PartialAlias << " partial alias responses ";
+ PrintPercent(PartialAlias, AliasSum);
errs() << " " << MustAlias << " must alias responses ";
PrintPercent(MustAlias, AliasSum);
errs() << " Alias Analysis Evaluator Pointer Alias Summary: "
<< NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/"
+ << PartialAlias*100/AliasSum << "%/"
<< MustAlias*100/AliasSum << "%\n";
}
diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
index b9fe646..f15c051 100644
--- a/contrib/llvm/lib/Analysis/AliasDebugger.cpp
+++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
@@ -39,7 +39,9 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
- AliasDebugger() : ModulePass(ID) {}
+ AliasDebugger() : ModulePass(ID) {
+ initializeAliasDebuggerPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M) {
InitializeAliasAnalysis(this); // set up super class
@@ -92,17 +94,18 @@ namespace {
//------------------------------------------------
// Implement the AliasAnalysis API
//
- AliasResult alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size) {
- assert(Vals.find(V1) != Vals.end() && "Never seen value in AA before");
- assert(Vals.find(V2) != Vals.end() && "Never seen value in AA before");
- return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
+ AliasResult alias(const Location &LocA, const Location &LocB) {
+ assert(Vals.find(LocA.Ptr) != Vals.end() &&
+ "Never seen value in AA before");
+ assert(Vals.find(LocB.Ptr) != Vals.end() &&
+ "Never seen value in AA before");
+ return AliasAnalysis::alias(LocA, LocB);
}
ModRefResult getModRefInfo(ImmutableCallSite CS,
- const Value *P, unsigned Size) {
- assert(Vals.find(P) != Vals.end() && "Never seen value in AA before");
- return AliasAnalysis::getModRefInfo(CS, P, Size);
+ const Location &Loc) {
+ assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+ return AliasAnalysis::getModRefInfo(CS, Loc);
}
ModRefResult getModRefInfo(ImmutableCallSite CS1,
@@ -110,9 +113,9 @@ namespace {
return AliasAnalysis::getModRefInfo(CS1,CS2);
}
- bool pointsToConstantMemory(const Value *P) {
- assert(Vals.find(P) != Vals.end() && "Never seen value in AA before");
- return AliasAnalysis::pointsToConstantMemory(P);
+ bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+ assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
}
virtual void deleteValue(Value *V) {
@@ -129,7 +132,7 @@ namespace {
char AliasDebugger::ID = 0;
INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
- "AA use debugger", false, true, false);
+ "AA use debugger", false, true, false)
Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index e74543b..3a46976d 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Pass.h"
#include "llvm/Type.h"
#include "llvm/Target/TargetData.h"
@@ -45,7 +46,12 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
PointerRec *R = AS.getSomePointer();
// If the pointers are not a must-alias pair, this set becomes a may alias.
- if (AA.alias(L->getValue(), L->getSize(), R->getValue(), R->getSize())
+ if (AA.alias(AliasAnalysis::Location(L->getValue(),
+ L->getSize(),
+ L->getTBAAInfo()),
+ AliasAnalysis::Location(R->getValue(),
+ R->getSize(),
+ R->getTBAAInfo()))
!= AliasAnalysis::MustAlias)
AliasTy = MayAlias;
}
@@ -87,7 +93,8 @@ void AliasSet::removeFromTracker(AliasSetTracker &AST) {
}
void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
- unsigned Size, bool KnownMustAlias) {
+ uint64_t Size, const MDNode *TBAAInfo,
+ bool KnownMustAlias) {
assert(!Entry.hasAliasSet() && "Entry already in set!");
// Check to see if we have to downgrade to _may_ alias.
@@ -95,16 +102,18 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
if (PointerRec *P = getSomePointer()) {
AliasAnalysis &AA = AST.getAliasAnalysis();
AliasAnalysis::AliasResult Result =
- AA.alias(P->getValue(), P->getSize(), Entry.getValue(), Size);
- if (Result == AliasAnalysis::MayAlias)
+ AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(),
+ P->getTBAAInfo()),
+ AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo));
+ if (Result != AliasAnalysis::MustAlias)
AliasTy = MayAlias;
else // First entry of must alias must have maximum size!
- P->updateSize(Size);
+ P->updateSizeAndTBAAInfo(Size, TBAAInfo);
assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!");
}
Entry.setAliasSet(this);
- Entry.updateSize(Size);
+ Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
// Add it to the end of the list...
assert(*PtrListEnd == 0 && "End of list is not null?");
@@ -120,7 +129,7 @@ void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS);
if (Behavior == AliasAnalysis::DoesNotAccessMemory)
return;
- else if (Behavior == AliasAnalysis::OnlyReadsMemory) {
+ if (AliasAnalysis::onlyReadsMemory(Behavior)) {
AliasTy = MayAlias;
AccessTy |= Refs;
return;
@@ -134,7 +143,8 @@ void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
/// aliasesPointer - Return true if the specified pointer "may" (or must)
/// alias one of the members in the set.
///
-bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size,
+bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
+ const MDNode *TBAAInfo,
AliasAnalysis &AA) const {
if (AliasTy == MustAlias) {
assert(CallSites.empty() && "Illegal must alias set!");
@@ -143,19 +153,26 @@ bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size,
// SOME value in the set.
PointerRec *SomePtr = getSomePointer();
assert(SomePtr && "Empty must-alias set??");
- return AA.alias(SomePtr->getValue(), SomePtr->getSize(), Ptr, Size);
+ return AA.alias(AliasAnalysis::Location(SomePtr->getValue(),
+ SomePtr->getSize(),
+ SomePtr->getTBAAInfo()),
+ AliasAnalysis::Location(Ptr, Size, TBAAInfo));
}
// If this is a may-alias set, we have to check all of the pointers in the set
// to be sure it doesn't alias the set...
for (iterator I = begin(), E = end(); I != E; ++I)
- if (AA.alias(Ptr, Size, I.getPointer(), I.getSize()))
+ if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo),
+ AliasAnalysis::Location(I.getPointer(), I.getSize(),
+ I.getTBAAInfo())))
return true;
// Check the call sites list and invoke list...
if (!CallSites.empty()) {
for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
- if (AA.getModRefInfo(CallSites[i], Ptr, Size) != AliasAnalysis::NoModRef)
+ if (AA.getModRefInfo(CallSites[i],
+ AliasAnalysis::Location(Ptr, Size, TBAAInfo)) !=
+ AliasAnalysis::NoModRef)
return true;
}
@@ -198,10 +215,11 @@ void AliasSetTracker::clear() {
/// that may alias the pointer, merge them together and return the unified set.
///
AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
- unsigned Size) {
+ uint64_t Size,
+ const MDNode *TBAAInfo) {
AliasSet *FoundSet = 0;
for (iterator I = begin(), E = end(); I != E; ++I) {
- if (I->Forward || !I->aliasesPointer(Ptr, Size, AA)) continue;
+ if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
if (FoundSet == 0) { // If this is the first alias set ptr can go into.
FoundSet = I; // Remember it.
@@ -216,9 +234,10 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
/// containsPointer - Return true if the specified location is represented by
/// this alias set, false otherwise. This does not modify the AST object or
/// alias sets.
-bool AliasSetTracker::containsPointer(Value *Ptr, unsigned Size) const {
+bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
+ const MDNode *TBAAInfo) const {
for (const_iterator I = begin(), E = end(); I != E; ++I)
- if (!I->Forward && I->aliasesPointer(Ptr, Size, AA))
+ if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA))
return true;
return false;
}
@@ -244,33 +263,34 @@ AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) {
/// getAliasSetForPointer - Return the alias set that the specified pointer
/// lives in.
-AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, unsigned Size,
+AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
+ const MDNode *TBAAInfo,
bool *New) {
AliasSet::PointerRec &Entry = getEntryFor(Pointer);
// Check to see if the pointer is already known.
if (Entry.hasAliasSet()) {
- Entry.updateSize(Size);
+ Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
// Return the set!
return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
}
- if (AliasSet *AS = findAliasSetForPointer(Pointer, Size)) {
+ if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) {
// Add it to the alias set it aliases.
- AS->addPointer(*this, Entry, Size);
+ AS->addPointer(*this, Entry, Size, TBAAInfo);
return *AS;
}
if (New) *New = true;
// Otherwise create a new alias set to hold the loaded pointer.
AliasSets.push_back(new AliasSet());
- AliasSets.back().addPointer(*this, Entry, Size);
+ AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo);
return AliasSets.back();
}
-bool AliasSetTracker::add(Value *Ptr, unsigned Size) {
+bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
bool NewPtr;
- addPointer(Ptr, Size, AliasSet::NoModRef, NewPtr);
+ addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr);
return NewPtr;
}
@@ -279,6 +299,7 @@ bool AliasSetTracker::add(LoadInst *LI) {
bool NewPtr;
AliasSet &AS = addPointer(LI->getOperand(0),
AA.getTypeStoreSize(LI->getType()),
+ LI->getMetadata(LLVMContext::MD_tbaa),
AliasSet::Refs, NewPtr);
if (LI->isVolatile()) AS.setVolatile();
return NewPtr;
@@ -289,6 +310,7 @@ bool AliasSetTracker::add(StoreInst *SI) {
Value *Val = SI->getOperand(0);
AliasSet &AS = addPointer(SI->getOperand(1),
AA.getTypeStoreSize(Val->getType()),
+ SI->getMetadata(LLVMContext::MD_tbaa),
AliasSet::Mods, NewPtr);
if (SI->isVolatile()) AS.setVolatile();
return NewPtr;
@@ -296,7 +318,9 @@ bool AliasSetTracker::add(StoreInst *SI) {
bool AliasSetTracker::add(VAArgInst *VAAI) {
bool NewPtr;
- addPointer(VAAI->getOperand(0), ~0, AliasSet::ModRef, NewPtr);
+ addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize,
+ VAAI->getMetadata(LLVMContext::MD_tbaa),
+ AliasSet::ModRef, NewPtr);
return NewPtr;
}
@@ -358,6 +382,7 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
bool X;
for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
+ ASI.getTBAAInfo(),
(AliasSet::AccessType)AS.AccessTy, X);
if (AS.isVolatile()) NewAS.setVolatile();
}
@@ -393,31 +418,36 @@ void AliasSetTracker::remove(AliasSet &AS) {
AS.removeFromTracker(*this);
}
-bool AliasSetTracker::remove(Value *Ptr, unsigned Size) {
- AliasSet *AS = findAliasSetForPointer(Ptr, Size);
+bool
+AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+ AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo);
if (!AS) return false;
remove(*AS);
return true;
}
bool AliasSetTracker::remove(LoadInst *LI) {
- unsigned Size = AA.getTypeStoreSize(LI->getType());
- AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size);
+ uint64_t Size = AA.getTypeStoreSize(LI->getType());
+ const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa);
+ AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo);
if (!AS) return false;
remove(*AS);
return true;
}
bool AliasSetTracker::remove(StoreInst *SI) {
- unsigned Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
- AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size);
+ uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
+ const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa);
+ AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo);
if (!AS) return false;
remove(*AS);
return true;
}
bool AliasSetTracker::remove(VAArgInst *VAAI) {
- AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0), ~0);
+ AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0),
+ AliasAnalysis::UnknownSize,
+ VAAI->getMetadata(LLVMContext::MD_tbaa));
if (!AS) return false;
remove(*AS);
return true;
@@ -507,7 +537,9 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
// Add it to the alias set it aliases...
I = PointerMap.find(From);
AliasSet *AS = I->second->getAliasSet(*this);
- AS->addPointer(*this, Entry, I->second->getSize(), true);
+ AS->addPointer(*this, Entry, I->second->getSize(),
+ I->second->getTBAAInfo(),
+ true);
}
@@ -587,7 +619,9 @@ namespace {
AliasSetTracker *Tracker;
public:
static char ID; // Pass identification, replacement for typeid
- AliasSetPrinter() : FunctionPass(ID) {}
+ AliasSetPrinter() : FunctionPass(ID) {
+ initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -607,5 +641,8 @@ namespace {
}
char AliasSetPrinter::ID = 0;
-INITIALIZE_PASS(AliasSetPrinter, "print-alias-sets",
- "Alias Set Printer", false, true);
+INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
+ "Alias Set Printer", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",
+ "Alias Set Printer", false, true)
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index 398dec7..1af1c35 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -8,22 +8,83 @@
//===----------------------------------------------------------------------===//
#include "llvm-c/Analysis.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Analysis/Verifier.h"
#include <cstring>
using namespace llvm;
+/// initializeAnalysis - Initialize all passes linked into the Analysis library.
+void llvm::initializeAnalysis(PassRegistry &Registry) {
+ initializeAliasAnalysisAnalysisGroup(Registry);
+ initializeAliasAnalysisCounterPass(Registry);
+ initializeAAEvalPass(Registry);
+ initializeAliasDebuggerPass(Registry);
+ initializeAliasSetPrinterPass(Registry);
+ initializeNoAAPass(Registry);
+ initializeBasicAliasAnalysisPass(Registry);
+ initializeCFGViewerPass(Registry);
+ initializeCFGPrinterPass(Registry);
+ initializeCFGOnlyViewerPass(Registry);
+ initializeCFGOnlyPrinterPass(Registry);
+ initializePrintDbgInfoPass(Registry);
+ initializeDominanceFrontierPass(Registry);
+ initializeDomViewerPass(Registry);
+ initializeDomPrinterPass(Registry);
+ initializeDomOnlyViewerPass(Registry);
+ initializePostDomViewerPass(Registry);
+ initializeDomOnlyPrinterPass(Registry);
+ initializePostDomPrinterPass(Registry);
+ initializePostDomOnlyViewerPass(Registry);
+ initializePostDomOnlyPrinterPass(Registry);
+ initializeIVUsersPass(Registry);
+ initializeInstCountPass(Registry);
+ initializeIntervalPartitionPass(Registry);
+ initializeLazyValueInfoPass(Registry);
+ initializeLibCallAliasAnalysisPass(Registry);
+ initializeLintPass(Registry);
+ initializeLiveValuesPass(Registry);
+ initializeLoopDependenceAnalysisPass(Registry);
+ initializeLoopInfoPass(Registry);
+ initializeMemDepPrinterPass(Registry);
+ initializeMemoryDependenceAnalysisPass(Registry);
+ initializeModuleDebugInfoPrinterPass(Registry);
+ initializePostDominatorTreePass(Registry);
+ initializePostDominanceFrontierPass(Registry);
+ initializeProfileEstimatorPassPass(Registry);
+ initializeNoProfileInfoPass(Registry);
+ initializeNoPathProfileInfoPass(Registry);
+ initializeProfileInfoAnalysisGroup(Registry);
+ initializePathProfileInfoAnalysisGroup(Registry);
+ initializeLoaderPassPass(Registry);
+ initializePathProfileLoaderPassPass(Registry);
+ initializeProfileVerifierPassPass(Registry);
+ initializePathProfileVerifierPass(Registry);
+ initializeRegionInfoPass(Registry);
+ initializeRegionViewerPass(Registry);
+ initializeRegionPrinterPass(Registry);
+ initializeRegionOnlyViewerPass(Registry);
+ initializeRegionOnlyPrinterPass(Registry);
+ initializeScalarEvolutionPass(Registry);
+ initializeScalarEvolutionAliasAnalysisPass(Registry);
+ initializeTypeBasedAliasAnalysisPass(Registry);
+}
+
+void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
+ initializeAnalysis(*unwrap(R));
+}
+
LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
char **OutMessages) {
std::string Messages;
-
+
LLVMBool Result = verifyModule(*unwrap(M),
static_cast<VerifierFailureAction>(Action),
OutMessages? &Messages : 0);
-
+
if (OutMessages)
*OutMessages = strdup(Messages.c_str());
-
+
return Result;
}
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 113c72b..f7bcd9e 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1,4 +1,4 @@
-//===- BasicAliasAnalysis.cpp - Local Alias Analysis Impl -----------------===//
+//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the default implementation of the Alias Analysis interface
-// that simply implements a few identities (two different globals cannot alias,
-// etc), but otherwise does no analysis.
+// This file defines the primary stateless implementation of the
+// Alias Analysis interface that implements identities (two different
+// globals cannot alias, etc), but does no stateful analysis.
//
//===----------------------------------------------------------------------===//
@@ -22,10 +22,12 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -95,104 +97,54 @@ static bool isEscapeSource(const Value *V) {
return false;
}
-/// isObjectSmallerThan - Return true if we can prove that the object specified
-/// by V is smaller than Size.
-static bool isObjectSmallerThan(const Value *V, unsigned Size,
- const TargetData &TD) {
+/// getObjectSize - Return the size of the object specified by V, or
+/// UnknownSize if unknown.
+static uint64_t getObjectSize(const Value *V, const TargetData &TD) {
const Type *AccessTy;
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ if (!GV->hasDefinitiveInitializer())
+ return AliasAnalysis::UnknownSize;
AccessTy = GV->getType()->getElementType();
} else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
if (!AI->isArrayAllocation())
AccessTy = AI->getType()->getElementType();
else
- return false;
+ return AliasAnalysis::UnknownSize;
} else if (const CallInst* CI = extractMallocCall(V)) {
if (!isArrayMalloc(V, &TD))
// The size is the argument to the malloc call.
if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
- return (C->getZExtValue() < Size);
- return false;
+ return C->getZExtValue();
+ return AliasAnalysis::UnknownSize;
} else if (const Argument *A = dyn_cast<Argument>(V)) {
if (A->hasByValAttr())
AccessTy = cast<PointerType>(A->getType())->getElementType();
else
- return false;
+ return AliasAnalysis::UnknownSize;
} else {
- return false;
+ return AliasAnalysis::UnknownSize;
}
if (AccessTy->isSized())
- return TD.getTypeAllocSize(AccessTy) < Size;
- return false;
+ return TD.getTypeAllocSize(AccessTy);
+ return AliasAnalysis::UnknownSize;
}
-//===----------------------------------------------------------------------===//
-// NoAA Pass
-//===----------------------------------------------------------------------===//
-
-namespace {
- /// NoAA - This class implements the -no-aa pass, which always returns "I
- /// don't know" for alias queries. NoAA is unlike other alias analysis
- /// implementations, in that it does not chain to a previous analysis. As
- /// such it doesn't follow many of the rules that other alias analyses must.
- ///
- struct NoAA : public ImmutablePass, public AliasAnalysis {
- static char ID; // Class identification, replacement for typeinfo
- NoAA() : ImmutablePass(ID) {}
- explicit NoAA(char &PID) : ImmutablePass(PID) { }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- }
-
- virtual void initializePass() {
- TD = getAnalysisIfAvailable<TargetData>();
- }
-
- virtual AliasResult alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size) {
- return MayAlias;
- }
-
- virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
- return UnknownModRefBehavior;
- }
- virtual ModRefBehavior getModRefBehavior(const Function *F) {
- return UnknownModRefBehavior;
- }
-
- virtual bool pointsToConstantMemory(const Value *P) { return false; }
- virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
- const Value *P, unsigned Size) {
- return ModRef;
- }
- virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
- return ModRef;
- }
-
- virtual void deleteValue(Value *V) {}
- virtual void copyValue(Value *From, Value *To) {}
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const void *ID) {
- if (ID == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
- };
-} // End of anonymous namespace
-
-// Register this pass...
-char NoAA::ID = 0;
-INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
- "No Alias Analysis (always returns 'may' alias)",
- true, true, false);
+/// isObjectSmallerThan - Return true if we can prove that the object specified
+/// by V is smaller than Size.
+static bool isObjectSmallerThan(const Value *V, uint64_t Size,
+ const TargetData &TD) {
+ uint64_t ObjectSize = getObjectSize(V, TD);
+ return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
+}
-ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
+/// isObjectSize - Return true if we can prove that the object specified
+/// by V has size Size.
+static bool isObjectSize(const Value *V, uint64_t Size,
+ const TargetData &TD) {
+ uint64_t ObjectSize = getObjectSize(V, TD);
+ return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size;
+}
//===----------------------------------------------------------------------===//
// GetElementPtr Instruction Decomposition and Analysis
@@ -272,14 +224,14 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
Value *CastOp = cast<CastInst>(V)->getOperand(0);
unsigned OldWidth = Scale.getBitWidth();
unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
- Scale.trunc(SmallWidth);
- Offset.trunc(SmallWidth);
+ Scale = Scale.trunc(SmallWidth);
+ Offset = Offset.trunc(SmallWidth);
Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
TD, Depth+1);
- Scale.zext(OldWidth);
- Offset.zext(OldWidth);
+ Scale = Scale.zext(OldWidth);
+ Offset = Offset.zext(OldWidth);
return Result;
}
@@ -299,7 +251,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
/// the gep cannot necessarily be reconstructed from its decomposed form.
///
/// When TargetData is around, this function is capable of analyzing everything
-/// that Value::getUnderlyingObject() can look through. When not, it just looks
+/// that GetUnderlyingObject can look through. When not, it just looks
/// through pointer casts.
///
static const Value *
@@ -328,6 +280,14 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
V = Op->getOperand(0);
continue;
}
+
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ // TODO: Get a DominatorTree and use it here.
+ if (const Value *Simplified =
+ SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
+ V = Simplified;
+ continue;
+ }
const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
if (GEPOp == 0)
@@ -386,8 +346,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
- BaseOffs += IndexOffset.getZExtValue()*Scale;
- Scale *= IndexScale.getZExtValue();
+ BaseOffs += IndexOffset.getSExtValue()*Scale;
+ Scale *= IndexScale.getSExtValue();
// If we already had an occurrance of this index variable, merge this
@@ -407,7 +367,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// pointer size.
if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
Scale <<= ShiftBits;
- Scale >>= ShiftBits;
+ Scale = (int64_t)Scale >> ShiftBits;
}
if (Scale) {
@@ -485,25 +445,34 @@ static bool notDifferentParent(const Value *O1, const Value *O2) {
#endif
namespace {
- /// BasicAliasAnalysis - This is the default alias analysis implementation.
- /// Because it doesn't chain to a previous alias analysis (like -no-aa), it
- /// derives from the NoAA class.
- struct BasicAliasAnalysis : public NoAA {
+ /// BasicAliasAnalysis - This is the primary alias analysis implementation.
+ struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
static char ID; // Class identification, replacement for typeinfo
- BasicAliasAnalysis() : NoAA(ID) {}
+ BasicAliasAnalysis() : ImmutablePass(ID) {
+ initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ InitializeAliasAnalysis(this);
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ }
- virtual AliasResult alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size) {
+ virtual AliasResult alias(const Location &LocA,
+ const Location &LocB) {
assert(Visited.empty() && "Visited must be cleared after use!");
- assert(notDifferentParent(V1, V2) &&
+ assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
"BasicAliasAnalysis doesn't support interprocedural queries.");
- AliasResult Alias = aliasCheck(V1, V1Size, V2, V2Size);
+ AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag,
+ LocB.Ptr, LocB.Size, LocB.TBAATag);
Visited.clear();
return Alias;
}
virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
- const Value *P, unsigned Size);
+ const Location &Loc);
virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
ImmutableCallSite CS2) {
@@ -513,7 +482,7 @@ namespace {
/// pointsToConstantMemory - Chase pointers until we find a (constant
/// global) or not.
- virtual bool pointsToConstantMemory(const Value *P);
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
/// getModRefBehavior - Return the behavior when calling the given
/// call site.
@@ -539,46 +508,102 @@ namespace {
// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
// instruction against another.
- AliasResult aliasGEP(const GEPOperator *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size,
+ AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo,
const Value *UnderlyingV1, const Value *UnderlyingV2);
// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
// instruction against another.
- AliasResult aliasPHI(const PHINode *PN, unsigned PNSize,
- const Value *V2, unsigned V2Size);
+ AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
+ const MDNode *PNTBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo);
/// aliasSelect - Disambiguate a Select instruction against another value.
- AliasResult aliasSelect(const SelectInst *SI, unsigned SISize,
- const Value *V2, unsigned V2Size);
-
- AliasResult aliasCheck(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size);
+ AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
+ const MDNode *SITBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo);
+
+ AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
+ const MDNode *V1TBAATag,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAATag);
};
} // End of anonymous namespace
// Register this pass...
char BasicAliasAnalysis::ID = 0;
INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa",
- "Basic Alias Analysis (default AA impl)",
- false, true, true);
+ "Basic Alias Analysis (stateless AA impl)",
+ false, true, false)
ImmutablePass *llvm::createBasicAliasAnalysisPass() {
return new BasicAliasAnalysis();
}
+/// pointsToConstantMemory - Returns whether the given pointer value
+/// points to memory that is local to the function, with global constants being
+/// considered local to all functions.
+bool
+BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+ assert(Visited.empty() && "Visited must be cleared after use!");
+
+ unsigned MaxLookup = 8;
+ SmallVector<const Value *, 16> Worklist;
+ Worklist.push_back(Loc.Ptr);
+ do {
+ const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD);
+ if (!Visited.insert(V)) {
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+
+ // An alloca instruction defines local memory.
+ if (OrLocal && isa<AllocaInst>(V))
+ continue;
+
+ // A global constant counts as local memory for our purposes.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ // Note: this doesn't require GV to be "ODR" because it isn't legal for a
+ // global to be marked constant in some modules and non-constant in
+ // others. GV may even be a declaration, not a definition.
+ if (!GV->isConstant()) {
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+ continue;
+ }
+
+ // If both select values point to local memory, then so does the select.
+ if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ // If all values incoming to a phi node point to local memory, then so does
+ // the phi.
+ if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+ // Don't bother inspecting phi nodes with many operands.
+ if (PN->getNumIncomingValues() > MaxLookup) {
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+ }
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ Worklist.push_back(PN->getIncomingValue(i));
+ continue;
+ }
-/// pointsToConstantMemory - Chase pointers until we find a (constant
-/// global) or not.
-bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) {
- if (const GlobalVariable *GV =
- dyn_cast<GlobalVariable>(P->getUnderlyingObject()))
- // Note: this doesn't require GV to be "ODR" because it isn't legal for a
- // global to be marked constant in some modules and non-constant in others.
- // GV may even be a declaration, not a definition.
- return GV->isConstant();
+ // Otherwise be conservative.
+ Visited.clear();
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
- return NoAA::pointsToConstantMemory(P);
+ } while (!Worklist.empty() && --MaxLookup);
+
+ Visited.clear();
+ return Worklist.empty();
}
/// getModRefBehavior - Return the behavior when calling the given call site.
@@ -596,22 +621,32 @@ BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
Min = OnlyReadsMemory;
// The AliasAnalysis base class has some smarts, lets use them.
- return std::min(AliasAnalysis::getModRefBehavior(CS), Min);
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
}
/// getModRefBehavior - Return the behavior when calling the given function.
/// For use when the call site is not known.
AliasAnalysis::ModRefBehavior
BasicAliasAnalysis::getModRefBehavior(const Function *F) {
+ // If the function declares it doesn't access memory, we can't do better.
if (F->doesNotAccessMemory())
- // Can't do better than this.
return DoesNotAccessMemory;
+
+ // For intrinsics, we can check the table.
+ if (unsigned iid = F->getIntrinsicID()) {
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
+ }
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If the function declares it only reads memory, go with that.
if (F->onlyReadsMemory())
- return OnlyReadsMemory;
- if (unsigned id = F->getIntrinsicID())
- return getIntrinsicModRefBehavior(id);
+ Min = OnlyReadsMemory;
- return NoAA::getModRefBehavior(F);
+ // Otherwise be conservative.
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
}
/// getModRefInfo - Check to see if the specified callsite can clobber the
@@ -620,13 +655,13 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
/// simple "address taken" analysis on local objects.
AliasAnalysis::ModRefResult
BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
- const Value *P, unsigned Size) {
- assert(notDifferentParent(CS.getInstruction(), P) &&
+ const Location &Loc) {
+ assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
"AliasAnalysis query involving multiple functions!");
- const Value *Object = P->getUnderlyingObject();
+ const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
- // If this is a tail call and P points to a stack location, we know that
+ // If this is a tail call and Loc.Ptr points to a stack location, we know that
// the tail call cannot access or modify the local stack.
// We cannot exclude byval arguments here; these belong to the caller of
// the current function not to the current function, and a tail callee
@@ -650,11 +685,11 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))
continue;
- // If this is a no-capture pointer argument, see if we can tell that it
+ // If this is a no-capture pointer argument, see if we can tell that it
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
// escape.
- if (!isNoAlias(cast<Value>(CI), UnknownSize, P, UnknownSize)) {
+ if (!isNoAlias(Location(cast<Value>(CI)), Loc)) {
PassedAsArg = true;
break;
}
@@ -664,6 +699,8 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
return NoModRef;
}
+ ModRefResult Min = ModRef;
+
// Finally, handle specific knowledge of intrinsics.
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
if (II != 0)
@@ -671,15 +708,20 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
default: break;
case Intrinsic::memcpy:
case Intrinsic::memmove: {
- unsigned Len = UnknownSize;
+ uint64_t Len = UnknownSize;
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
Len = LenCI->getZExtValue();
Value *Dest = II->getArgOperand(0);
Value *Src = II->getArgOperand(1);
- if (isNoAlias(Dest, Len, P, Size)) {
- if (isNoAlias(Src, Len, P, Size))
+ // If it can't overlap the source dest, then it doesn't modref the loc.
+ if (isNoAlias(Location(Dest, Len), Loc)) {
+ if (isNoAlias(Location(Src, Len), Loc))
return NoModRef;
- return Ref;
+ // If it can't overlap the dest, then worst case it reads the loc.
+ Min = Ref;
+ } else if (isNoAlias(Location(Src, Len), Loc)) {
+ // If it can't overlap the source, then worst case it mutates the loc.
+ Min = Mod;
}
break;
}
@@ -687,11 +729,13 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
// Since memset is 'accesses arguments' only, the AliasAnalysis base class
// will handle it for the variable length case.
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
- unsigned Len = LenCI->getZExtValue();
+ uint64_t Len = LenCI->getZExtValue();
Value *Dest = II->getArgOperand(0);
- if (isNoAlias(Dest, Len, P, Size))
+ if (isNoAlias(Location(Dest, Len), Loc))
return NoModRef;
}
+ // We know that memset doesn't load anything.
+ Min = Mod;
break;
case Intrinsic::atomic_cmp_swap:
case Intrinsic::atomic_swap:
@@ -707,42 +751,49 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
case Intrinsic::atomic_load_umin:
if (TD) {
Value *Op1 = II->getArgOperand(0);
- unsigned Op1Size = TD->getTypeStoreSize(Op1->getType());
- if (isNoAlias(Op1, Op1Size, P, Size))
+ uint64_t Op1Size = TD->getTypeStoreSize(Op1->getType());
+ MDNode *Tag = II->getMetadata(LLVMContext::MD_tbaa);
+ if (isNoAlias(Location(Op1, Op1Size, Tag), Loc))
return NoModRef;
}
break;
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start: {
- unsigned PtrSize =
+ uint64_t PtrSize =
cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
- if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size))
+ if (isNoAlias(Location(II->getArgOperand(1),
+ PtrSize,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
return NoModRef;
break;
}
case Intrinsic::invariant_end: {
- unsigned PtrSize =
+ uint64_t PtrSize =
cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
- if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size))
+ if (isNoAlias(Location(II->getArgOperand(2),
+ PtrSize,
+ II->getMetadata(LLVMContext::MD_tbaa)),
+ Loc))
return NoModRef;
break;
}
}
// The AliasAnalysis base class has some smarts, lets use them.
- return AliasAnalysis::getModRefInfo(CS, P, Size);
+ return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
}
-
/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
/// against another pointer. We know that V1 is a GEP, but we don't know
-/// anything about V2. UnderlyingV1 is GEP1->getUnderlyingObject(),
+/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, TD),
/// UnderlyingV2 is the same for V2.
///
AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
- const Value *V2, unsigned V2Size,
+BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo,
const Value *UnderlyingV1,
const Value *UnderlyingV2) {
// If this GEP has been visited before, we're on a use-def cycle.
@@ -759,8 +810,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
// out if the indexes to the GEP tell us anything about the derived pointer.
if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
// Do the base pointers alias?
- AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize,
- UnderlyingV2, UnknownSize);
+ AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
+ UnderlyingV2, UnknownSize, 0);
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
@@ -782,7 +833,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
// to handle without it.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
assert(TD == 0 &&
- "DecomposeGEPExpression and getUnderlyingObject disagree!");
+ "DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
@@ -800,7 +851,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
if (V1Size == UnknownSize && V2Size == UnknownSize)
return MayAlias;
- AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, V2, V2Size);
+ AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0,
+ V2, V2Size, V2TBAAInfo);
if (R != MustAlias)
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
@@ -817,7 +869,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
// to handle without it.
if (GEP1BasePtr != UnderlyingV1) {
assert(TD == 0 &&
- "DecomposeGEPExpression and getUnderlyingObject disagree!");
+ "DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
}
@@ -831,6 +883,17 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
return MustAlias;
+ // If there is a difference betwen the pointers, but the difference is
+ // less than the size of the associated memory object, then we know
+ // that the objects are partially overlapping.
+ if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
+ if (GEP1BaseOffset >= 0 ?
+ (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset < V2Size) :
+ (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset < V1Size &&
+ GEP1BaseOffset != INT64_MIN))
+ return PartialAlias;
+ }
+
// If we have a known constant offset, see if this offset is larger than the
// access size being queried. If so, and if no variable indices can remove
// pieces of this constant, then we know we have a no-alias. For example,
@@ -850,8 +913,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
// If our known offset is bigger than the access size, we know we don't have
// an alias.
if (GEP1BaseOffset) {
- if (GEP1BaseOffset >= (int64_t)V2Size ||
- GEP1BaseOffset <= -(int64_t)V1Size)
+ if (GEP1BaseOffset >= 0 ?
+ (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset >= V2Size) :
+ (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset >= V1Size &&
+ GEP1BaseOffset != INT64_MIN))
return NoAlias;
}
@@ -861,8 +926,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
/// instruction against another.
AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
- const Value *V2, unsigned V2Size) {
+BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
+ const MDNode *SITBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo) {
// If this select has been visited before, we're on a use-def cycle.
// Such cycles are only valid when PHI nodes are involved or in unreachable
// code. The visitPHI function catches cycles containing PHIs, but there
@@ -875,13 +942,13 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
if (SI->getCondition() == SI2->getCondition()) {
AliasResult Alias =
- aliasCheck(SI->getTrueValue(), SISize,
- SI2->getTrueValue(), V2Size);
+ aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo,
+ SI2->getTrueValue(), V2Size, V2TBAAInfo);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
- aliasCheck(SI->getFalseValue(), SISize,
- SI2->getFalseValue(), V2Size);
+ aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo,
+ SI2->getFalseValue(), V2Size, V2TBAAInfo);
if (ThisAlias != Alias)
return MayAlias;
return Alias;
@@ -890,7 +957,7 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
// If both arms of the Select node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
AliasResult Alias =
- aliasCheck(V2, V2Size, SI->getTrueValue(), SISize);
+ aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo);
if (Alias == MayAlias)
return MayAlias;
@@ -900,7 +967,7 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
Visited.erase(V2);
AliasResult ThisAlias =
- aliasCheck(V2, V2Size, SI->getFalseValue(), SISize);
+ aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo);
if (ThisAlias != Alias)
return MayAlias;
return Alias;
@@ -909,8 +976,10 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
// against another.
AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
- const Value *V2, unsigned V2Size) {
+BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
+ const MDNode *PNTBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo) {
// The PHI node has already been visited, avoid recursion any further.
if (!Visited.insert(PN))
return MayAlias;
@@ -921,16 +990,16 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
if (PN2->getParent() == PN->getParent()) {
AliasResult Alias =
- aliasCheck(PN->getIncomingValue(0), PNSize,
+ aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo,
PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)),
- V2Size);
+ V2Size, V2TBAAInfo);
if (Alias == MayAlias)
return MayAlias;
for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
AliasResult ThisAlias =
- aliasCheck(PN->getIncomingValue(i), PNSize,
+ aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
- V2Size);
+ V2Size, V2TBAAInfo);
if (ThisAlias != Alias)
return MayAlias;
}
@@ -951,7 +1020,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
V1Srcs.push_back(PV1);
}
- AliasResult Alias = aliasCheck(V2, V2Size, V1Srcs[0], PNSize);
+ AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo,
+ V1Srcs[0], PNSize, PNTBAAInfo);
// Early exit if the check of the first PHI source against V2 is MayAlias.
// Other results are not possible.
if (Alias == MayAlias)
@@ -967,7 +1037,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
// don't need to assume that V2 is being visited recursively.
Visited.erase(V2);
- AliasResult ThisAlias = aliasCheck(V2, V2Size, V, PNSize);
+ AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo,
+ V, PNSize, PNTBAAInfo);
if (ThisAlias != Alias || ThisAlias == MayAlias)
return MayAlias;
}
@@ -979,8 +1050,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
// such as array references.
//
AliasAnalysis::AliasResult
-BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size) {
+BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
+ const MDNode *V1TBAAInfo,
+ const Value *V2, uint64_t V2Size,
+ const MDNode *V2TBAAInfo) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
if (V1Size == 0 || V2Size == 0)
@@ -997,8 +1070,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
return NoAlias; // Scalars cannot alias each other
// Figure out what objects these things are pointing to if we can.
- const Value *O1 = V1->getUnderlyingObject();
- const Value *O2 = V2->getUnderlyingObject();
+ const Value *O1 = GetUnderlyingObject(V1, TD);
+ const Value *O2 = GetUnderlyingObject(V2, TD);
// Null values in the default address space don't point to any object, so they
// don't alias any other pointer.
@@ -1059,25 +1132,39 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
std::swap(V1Size, V2Size);
std::swap(O1, O2);
}
- if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1))
- return aliasGEP(GV1, V1Size, V2, V2Size, O1, O2);
+ if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
+ AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2);
+ if (Result != MayAlias) return Result;
+ }
if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
std::swap(V1, V2);
std::swap(V1Size, V2Size);
}
- if (const PHINode *PN = dyn_cast<PHINode>(V1))
- return aliasPHI(PN, V1Size, V2, V2Size);
+ if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
+ AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo,
+ V2, V2Size, V2TBAAInfo);
+ if (Result != MayAlias) return Result;
+ }
if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
std::swap(V1, V2);
std::swap(V1Size, V2Size);
}
- if (const SelectInst *S1 = dyn_cast<SelectInst>(V1))
- return aliasSelect(S1, V1Size, V2, V2Size);
+ if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
+ AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo,
+ V2, V2Size, V2TBAAInfo);
+ if (Result != MayAlias) return Result;
+ }
- return NoAA::alias(V1, V1Size, V2, V2Size);
-}
+ // If both pointers are pointing into the same object and one of them
+ // accesses is accessing the entire object, then the accesses must
+ // overlap in some way.
+ if (TD && O1 == O2)
+ if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) ||
+ (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD)))
+ return PartialAlias;
-// Make sure that anything that uses AliasAnalysis pulls in this file.
-DEFINING_FILE_FOR(BasicAliasAnalysis)
+ return AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
+ Location(V2, V2Size, V2TBAAInfo));
+}
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
index 617a362..7bb063f 100644
--- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -25,7 +25,9 @@ using namespace llvm;
namespace {
struct CFGViewer : public FunctionPass {
static char ID; // Pass identifcation, replacement for typeid
- CFGViewer() : FunctionPass(ID) {}
+ CFGViewer() : FunctionPass(ID) {
+ initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F) {
F.viewCFG();
@@ -41,12 +43,14 @@ namespace {
}
char CFGViewer::ID = 0;
-INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true);
+INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
namespace {
struct CFGOnlyViewer : public FunctionPass {
static char ID; // Pass identifcation, replacement for typeid
- CFGOnlyViewer() : FunctionPass(ID) {}
+ CFGOnlyViewer() : FunctionPass(ID) {
+ initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F) {
F.viewCFGOnly();
@@ -63,13 +67,14 @@ namespace {
char CFGOnlyViewer::ID = 0;
INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
- "View CFG of function (with no function bodies)", false, true);
+ "View CFG of function (with no function bodies)", false, true)
namespace {
struct CFGPrinter : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGPrinter() : FunctionPass(ID) {}
- explicit CFGPrinter(char &pid) : FunctionPass(pid) {}
+ CFGPrinter() : FunctionPass(ID) {
+ initializeCFGPrinterPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F) {
std::string Filename = "cfg." + F.getNameStr() + ".dot";
@@ -96,13 +101,15 @@ namespace {
char CFGPrinter::ID = 0;
INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file",
- false, true);
+ false, true)
namespace {
struct CFGOnlyPrinter : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGOnlyPrinter() : FunctionPass(ID) {}
- explicit CFGOnlyPrinter(char &pid) : FunctionPass(pid) {}
+ CFGOnlyPrinter() : FunctionPass(ID) {
+ initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
virtual bool runOnFunction(Function &F) {
std::string Filename = "cfg." + F.getNameStr() + ".dot";
errs() << "Writing '" << Filename << "'...";
@@ -128,7 +135,7 @@ namespace {
char CFGOnlyPrinter::ID = 0;
INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
"Print CFG of function to 'dot' file (with no function bodies)",
- false, true);
+ false, true)
/// viewCFG - This function is meant for use from the debugger. You can just
/// say 'call F->viewCFG()' and a ghostview window should pop up from the
diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
index 90eae20..42a54d9 100644
--- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp
@@ -95,6 +95,9 @@ bool llvm::PointerMayBeCaptured(const Value *V,
case Instruction::Load:
// Loading from a pointer does not cause it to be captured.
break;
+ case Instruction::VAArg:
+ // "va-arg" from a pointer does not cause it to be captured.
+ break;
case Instruction::Ret:
if (ReturnCaptures)
return true;
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index 0bf7967..cd8d52c 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -30,6 +30,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/FEnv.h"
#include <cerrno>
#include <cmath>
using namespace llvm;
@@ -53,7 +54,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
// vector so the code below can handle it uniformly.
if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
Constant *Ops = C; // don't take the address of C!
- return FoldBitCast(ConstantVector::get(&Ops, 1), DestTy, TD);
+ return FoldBitCast(ConstantVector::get(Ops), DestTy, TD);
}
// If this is a bitcast from constant vector -> vector, fold it.
@@ -166,7 +167,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
}
}
- return ConstantVector::get(Result.data(), Result.size());
+ return ConstantVector::get(Result);
}
@@ -339,6 +340,13 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
return true;
}
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ if (CE->getOpcode() == Instruction::IntToPtr &&
+ CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext()))
+ return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
+ BytesLeft, TD);
+ }
+
// Otherwise, unknown initializer type.
return false;
}
@@ -466,7 +474,8 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
// If this load comes from anywhere in a constant global, and if the global
// is all undef or zero, we know what it loads.
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getUnderlyingObject())){
+ if (GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) {
if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
const Type *ResTy = cast<PointerType>(C->getType())->getElementType();
if (GV->getInitializer()->isNullValue())
@@ -537,7 +546,7 @@ static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps,
for (unsigned i = 1; i != NumOps; ++i) {
if ((i == 1 ||
!isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
- reinterpret_cast<Value *const *>(Ops+1),
+ reinterpret_cast<Value *const *>(Ops+1),
i-1))) &&
Ops[i]->getType() != IntPtrTy) {
Any = true;
@@ -567,16 +576,35 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
Constant *Ptr = Ops[0];
if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
return 0;
-
- unsigned BitWidth =
- TD->getTypeSizeInBits(TD->getIntPtrType(Ptr->getContext()));
+
+ const Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
// If this is a constant expr gep that is effectively computing an
// "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
for (unsigned i = 1; i != NumOps; ++i)
- if (!isa<ConstantInt>(Ops[i]))
+ if (!isa<ConstantInt>(Ops[i])) {
+
+ // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
+ // "inttoptr (sub (ptrtoint Ptr), V)"
+ if (NumOps == 2 &&
+ cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
+ assert((CE == 0 || CE->getType() == IntPtrTy) &&
+ "CastGEPIndices didn't canonicalize index types!");
+ if (CE && CE->getOpcode() == Instruction::Sub &&
+ CE->getOperand(0)->isNullValue()) {
+ Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
+ Res = ConstantExpr::getSub(Res, CE->getOperand(1));
+ Res = ConstantExpr::getIntToPtr(Res, ResultTy);
+ if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
+ Res = ConstantFoldConstantExpression(ResCE, TD);
+ return Res;
+ }
+ }
return 0;
+ }
+ unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
APInt Offset = APInt(BitWidth,
TD->getIndexedOffset(Ptr->getType(),
(Value**)Ops+1, NumOps-1));
@@ -609,10 +637,8 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
APInt BasePtr(BitWidth, 0);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
if (CE->getOpcode() == Instruction::IntToPtr)
- if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) {
- BasePtr = Base->getValue();
- BasePtr.zextOrTrunc(BitWidth);
- }
+ if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
+ BasePtr = Base->getValue().zextOrTrunc(BitWidth);
if (Ptr->isNullValue() || BasePtr != 0) {
Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
return ConstantExpr::getIntToPtr(C, ResultTy);
@@ -638,12 +664,19 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
// Determine which element of the array the offset points into.
APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
+ const IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
if (ElemSize == 0)
- return 0;
- APInt NewIdx = Offset.udiv(ElemSize);
- Offset -= NewIdx * ElemSize;
- NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Ty->getContext()),
- NewIdx));
+ // The element size is 0. This may be [0 x Ty]*, so just use a zero
+ // index for this level and proceed to the next level to see if it can
+ // accommodate the offset.
+ NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0));
+ else {
+ // The element size is non-zero divide the offset by the element
+ // size (rounding down), to compute the index at this level.
+ APInt NewIdx = Offset.udiv(ElemSize);
+ Offset -= NewIdx * ElemSize;
+ NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx));
+ }
Ty = ATy->getElementType();
} else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
// Determine which field of the struct the offset points into. The
@@ -687,27 +720,34 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
// Constant Folding public APIs
//===----------------------------------------------------------------------===//
-
-/// ConstantFoldInstruction - Attempt to constant fold the specified
-/// instruction. If successful, the constant result is returned, if not, null
-/// is returned. Note that this function can only fail when attempting to fold
-/// instructions like loads and stores, which have no constant expression form.
-///
+/// ConstantFoldInstruction - Try to constant fold the specified instruction.
+/// If successful, the constant result is returned, if not, null is returned.
+/// Note that this fails if not all of the operands are constant. Otherwise,
+/// this function can only fail when attempting to fold instructions like loads
+/// and stores, which have no constant expression form.
Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+ // Handle PHI nodes quickly here...
if (PHINode *PN = dyn_cast<PHINode>(I)) {
- if (PN->getNumIncomingValues() == 0)
- return UndefValue::get(PN->getType());
-
- Constant *Result = dyn_cast<Constant>(PN->getIncomingValue(0));
- if (Result == 0) return 0;
-
- // Handle PHI nodes specially here...
- for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) != Result && PN->getIncomingValue(i) != PN)
- return 0; // Not all the same incoming constants...
+ Constant *CommonValue = 0;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PN->getIncomingValue(i);
+ // If the incoming value is undef then skip it. Note that while we could
+ // skip the value if it is equal to the phi node itself we choose not to
+ // because that would break the rule that constant folding only applies if
+ // all operands are constants.
+ if (isa<UndefValue>(Incoming))
+ continue;
+ // If the incoming value is not a constant, or is a different constant to
+ // the one we saw previously, then give up.
+ Constant *C = dyn_cast<Constant>(Incoming);
+ if (!C || (CommonValue && C != CommonValue))
+ return 0;
+ CommonValue = C;
+ }
- // If we reach here, all incoming values are the same constant.
- return Result;
+ // If we reach here, all incoming values are the same constant or undef.
+ return CommonValue ? CommonValue : UndefValue::get(PN->getType());
}
// Scan the operand list, checking to see if they are all constants, if so,
@@ -725,7 +765,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
return ConstantFoldLoadInst(LI, TD);
-
+
+ if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I))
+ return ConstantExpr::getInsertValue(
+ cast<Constant>(IVI->getAggregateOperand()),
+ cast<Constant>(IVI->getInsertedValueOperand()),
+ IVI->idx_begin(), IVI->getNumIndices());
+
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
+ return ConstantExpr::getExtractValue(
+ cast<Constant>(EVI->getAggregateOperand()),
+ EVI->idx_begin(), EVI->getNumIndices());
+
return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
Ops.data(), Ops.size(), TD);
}
@@ -736,7 +787,8 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
const TargetData *TD) {
SmallVector<Constant*, 8> Ops;
- for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) {
+ for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
+ i != e; ++i) {
Constant *NewC = cast<Constant>(*i);
// Recursively fold the ConstantExpr's operands.
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
@@ -1000,8 +1052,17 @@ llvm::canConstantFoldCallTo(const Function *F) {
case Intrinsic::usub_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::ssub_with_overflow:
+ case Intrinsic::smul_with_overflow:
case Intrinsic::convert_from_fp16:
case Intrinsic::convert_to_fp16:
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
return true;
default:
return false;
@@ -1039,10 +1100,10 @@ llvm::canConstantFoldCallTo(const Function *F) {
static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
const Type *Ty) {
- errno = 0;
+ sys::llvm_fenv_clearexcept();
V = NativeFP(V);
- if (errno != 0) {
- errno = 0;
+ if (sys::llvm_fenv_testexcept()) {
+ sys::llvm_fenv_clearexcept();
return 0;
}
@@ -1056,10 +1117,10 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
double V, double W, const Type *Ty) {
- errno = 0;
+ sys::llvm_fenv_clearexcept();
V = NativeFP(V, W);
- if (errno != 0) {
- errno = 0;
+ if (sys::llvm_fenv_testexcept()) {
+ sys::llvm_fenv_clearexcept();
return 0;
}
@@ -1071,6 +1132,36 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
return 0; // dummy return to suppress warning
}
+/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
+/// conversion of a constant floating point. If roundTowardZero is false, the
+/// default IEEE rounding is used (toward nearest, ties to even). This matches
+/// the behavior of the non-truncating SSE instructions in the default rounding
+/// mode. The desired integer type Ty is used to select how many bits are
+/// available for the result. Returns null if the conversion cannot be
+/// performed, otherwise returns the Constant value resulting from the
+/// conversion.
+static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
+ const Type *Ty) {
+ assert(Op && "Called with NULL operand");
+ APFloat Val(Op->getValueAPF());
+
+ // All of these conversion intrinsics form an integer of at most 64bits.
+ unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
+ assert(ResultWidth <= 64 &&
+ "Can only constant fold conversions to 64 and 32 bit ints");
+
+ uint64_t UIntVal;
+ bool isExact = false;
+ APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
+ : APFloat::rmNearestTiesToEven;
+ APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
+ /*isSigned=*/true, mode,
+ &isExact);
+ if (status != APFloat::opOK && status != APFloat::opInexact)
+ return 0;
+ return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
+}
+
/// ConstantFoldCall - Attempt to constant fold a call to the specified function
/// with the specified arguments, returning null if unsuccessful.
Constant *
@@ -1082,7 +1173,7 @@ llvm::ConstantFoldCall(Function *F,
const Type *Ty = F->getReturnType();
if (NumOperands == 1) {
if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
- if (Name == "llvm.convert.to.fp16") {
+ if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) {
APFloat Val(Op->getValueAPF());
bool lost = false;
@@ -1093,6 +1184,13 @@ llvm::ConstantFoldCall(Function *F,
if (!Ty->isFloatTy() && !Ty->isDoubleTy())
return 0;
+
+ /// We only fold functions with finite arguments. Folding NaN and inf is
+ /// likely to be aborted with an exception anyway, and some host libms
+ /// have known errors raising exceptions.
+ if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
+ return 0;
+
/// Currently APFloat versions of these functions do not exist, so we use
/// the host native double versions. Float versions are not called
/// directly but for all these it is true (float)(f((double)arg)) ==
@@ -1133,8 +1231,8 @@ llvm::ConstantFoldCall(Function *F,
return ConstantFoldFP(log, V, Ty);
else if (Name == "log10" && V > 0)
return ConstantFoldFP(log10, V, Ty);
- else if (Name == "llvm.sqrt.f32" ||
- Name == "llvm.sqrt.f64") {
+ else if (F->getIntrinsicID() == Intrinsic::sqrt &&
+ (Ty->isFloatTy() || Ty->isDoubleTy())) {
if (V >= -0.0)
return ConstantFoldFP(sqrt, V, Ty);
else // Undefined
@@ -1164,18 +1262,18 @@ llvm::ConstantFoldCall(Function *F,
}
return 0;
}
-
-
+
if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
- if (Name.startswith("llvm.bswap"))
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::bswap:
return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
- else if (Name.startswith("llvm.ctpop"))
+ case Intrinsic::ctpop:
return ConstantInt::get(Ty, Op->getValue().countPopulation());
- else if (Name.startswith("llvm.cttz"))
+ case Intrinsic::cttz:
return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
- else if (Name.startswith("llvm.ctlz"))
+ case Intrinsic::ctlz:
return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
- else if (Name == "llvm.convert.from.fp16") {
+ case Intrinsic::convert_from_fp16: {
APFloat Val(Op->getValue());
bool lost = false;
@@ -1183,24 +1281,44 @@ llvm::ConstantFoldCall(Function *F,
Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
// Conversion is always precise.
- status = status;
+ (void)status;
assert(status == APFloat::opOK && !lost &&
"Precision lost during fp16 constfolding");
return ConstantFP::get(F->getContext(), Val);
}
- return 0;
+ default:
+ return 0;
+ }
}
-
+
+ if (ConstantVector *Op = dyn_cast<ConstantVector>(Operands[0])) {
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+ return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty);
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+ return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty);
+ }
+ }
+
if (isa<UndefValue>(Operands[0])) {
- if (Name.startswith("llvm.bswap"))
+ if (F->getIntrinsicID() == Intrinsic::bswap)
return Operands[0];
return 0;
}
return 0;
}
-
+
if (NumOperands == 2) {
if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
if (!Ty->isFloatTy() && !Ty->isDoubleTy())
@@ -1223,11 +1341,11 @@ llvm::ConstantFoldCall(Function *F,
if (Name == "atan2")
return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
} else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
- if (Name == "llvm.powi.f32")
+ if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
return ConstantFP::get(F->getContext(),
APFloat((float)std::pow((float)Op1V,
(int)Op2C->getZExtValue())));
- if (Name == "llvm.powi.f64")
+ if (F->getIntrinsicID() == Intrinsic::powi && Ty->isDoubleTy())
return ConstantFP::get(F->getContext(),
APFloat((double)std::pow((double)Op1V,
(int)Op2C->getZExtValue())));
@@ -1240,42 +1358,37 @@ llvm::ConstantFoldCall(Function *F,
if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
switch (F->getIntrinsicID()) {
default: break;
- case Intrinsic::uadd_with_overflow: {
- Constant *Res = ConstantExpr::getAdd(Op1, Op2); // result.
- Constant *Ops[] = {
- Res, ConstantExpr::getICmp(CmpInst::ICMP_ULT, Res, Op1) // overflow.
- };
- return ConstantStruct::get(F->getContext(), Ops, 2, false);
- }
- case Intrinsic::usub_with_overflow: {
- Constant *Res = ConstantExpr::getSub(Op1, Op2); // result.
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow: {
+ APInt Res;
+ bool Overflow;
+ switch (F->getIntrinsicID()) {
+ default: assert(0 && "Invalid case");
+ case Intrinsic::sadd_with_overflow:
+ Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::uadd_with_overflow:
+ Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::ssub_with_overflow:
+ Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::usub_with_overflow:
+ Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow);
+ break;
+ case Intrinsic::smul_with_overflow:
+ Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
+ break;
+ }
Constant *Ops[] = {
- Res, ConstantExpr::getICmp(CmpInst::ICMP_UGT, Res, Op1) // overflow.
+ ConstantInt::get(F->getContext(), Res),
+ ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow)
};
return ConstantStruct::get(F->getContext(), Ops, 2, false);
}
- case Intrinsic::sadd_with_overflow: {
- Constant *Res = ConstantExpr::getAdd(Op1, Op2); // result.
- Constant *Overflow = ConstantExpr::getSelect(
- ConstantExpr::getICmp(CmpInst::ICMP_SGT,
- ConstantInt::get(Op1->getType(), 0), Op1),
- ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op2),
- ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op2)); // overflow.
-
- Constant *Ops[] = { Res, Overflow };
- return ConstantStruct::get(F->getContext(), Ops, 2, false);
- }
- case Intrinsic::ssub_with_overflow: {
- Constant *Res = ConstantExpr::getSub(Op1, Op2); // result.
- Constant *Overflow = ConstantExpr::getSelect(
- ConstantExpr::getICmp(CmpInst::ICMP_SGT,
- ConstantInt::get(Op2->getType(), 0), Op2),
- ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op1),
- ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op1)); // overflow.
-
- Constant *Ops[] = { Res, Overflow };
- return ConstantStruct::get(F->getContext(), Ops, 2, false);
- }
}
}
@@ -1285,4 +1398,3 @@ llvm::ConstantFoldCall(Function *F,
}
return 0;
}
-
diff --git a/contrib/llvm/lib/Analysis/DIBuilder.cpp b/contrib/llvm/lib/Analysis/DIBuilder.cpp
new file mode 100644
index 0000000..c1072df
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DIBuilder.cpp
@@ -0,0 +1,801 @@
+//===--- DIBuilder.cpp - Debug Information Builder ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DIBuilder.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
+ assert((Tag & LLVMDebugVersionMask) == 0 &&
+ "Tag too large for debug encoding!");
+ return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion);
+}
+
+DIBuilder::DIBuilder(Module &m)
+ : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {}
+
+/// CreateCompileUnit - A CompileUnit provides an anchor for all debugging
+/// information generated during this instance of compilation.
+void DIBuilder::CreateCompileUnit(unsigned Lang, StringRef Filename,
+ StringRef Directory, StringRef Producer,
+ bool isOptimized, StringRef Flags,
+ unsigned RunTimeVer) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
+ MDString::get(VMContext, Filename),
+ MDString::get(VMContext, Directory),
+ MDString::get(VMContext, Producer),
+ // Deprecate isMain field.
+ ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ MDString::get(VMContext, Flags),
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
+ };
+ TheCU = DICompileUnit(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateFile - Create a file descriptor to hold debugging information
+/// for a file.
+DIFile DIBuilder::CreateFile(StringRef Filename, StringRef Directory) {
+ assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
+ MDString::get(VMContext, Filename),
+ MDString::get(VMContext, Directory),
+ TheCU
+ };
+ return DIFile(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateEnumerator - Create a single enumerator value.
+DIEnumerator DIBuilder::CreateEnumerator(StringRef Name, uint64_t Val) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Val)
+ };
+ return DIEnumerator(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateBasicType - Create debugging information entry for a basic
+/// type, e.g 'char'.
+DIType DIBuilder::CreateBasicType(StringRef Name, uint64_t SizeInBits,
+ uint64_t AlignInBits,
+ unsigned Encoding) {
+ // Basic types are encoded in DIBasicType format. Line number, filename,
+ // offset and flags are always empty here.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
+ TheCU,
+ MDString::get(VMContext, Name),
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
+ ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateQaulifiedType - Create debugging information entry for a qualified
+/// type, e.g. 'const int'.
+DIType DIBuilder::CreateQualifiedType(unsigned Tag, DIType FromTy) {
+ // Qualified types are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ TheCU,
+ MDString::get(VMContext, StringRef()), // Empty name.
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ FromTy
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreatePointerType - Create debugging information entry for a pointer.
+DIType DIBuilder::CreatePointerType(DIType PointeeTy, uint64_t SizeInBits,
+ uint64_t AlignInBits, StringRef Name) {
+ // Pointer types are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
+ TheCU,
+ MDString::get(VMContext, Name),
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ PointeeTy
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateReferenceType - Create debugging information entry for a reference.
+DIType DIBuilder::CreateReferenceType(DIType RTy) {
+ // References are encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_reference_type),
+ TheCU,
+ NULL, // Name
+ NULL, // Filename
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ RTy
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateTypedef - Create debugging information entry for a typedef.
+DIType DIBuilder::CreateTypedef(DIType Ty, StringRef Name, DIFile File,
+ unsigned LineNo) {
+ // typedefs are encoded in DIDerivedType format.
+ assert(Ty.Verify() && "Invalid typedef type!");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
+ Ty.getContext(),
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ Ty
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateFriend - Create debugging information entry for a 'friend'.
+DIType DIBuilder::CreateFriend(DIType Ty, DIType FriendTy) {
+ // typedefs are encoded in DIDerivedType format.
+ assert(Ty.Verify() && "Invalid type!");
+ assert(FriendTy.Verify() && "Invalid friend type!");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_friend),
+ Ty,
+ NULL, // Name
+ Ty.getFile(),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+ FriendTy
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateInheritance - Create debugging information entry to establish
+/// inheritnace relationship between two types.
+DIType DIBuilder::CreateInheritance(DIType Ty, DIType BaseTy,
+ uint64_t BaseOffset, unsigned Flags) {
+ // TAG_inheritance is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
+ Ty,
+ NULL, // Name
+ Ty.getFile(),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+ ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ BaseTy
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateMemberType - Create debugging information entry for a member.
+DIType DIBuilder::CreateMemberType(StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType Ty) {
+ // TAG_member is encoded in DIDerivedType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_member),
+ File, // Or TheCU ? Ty ?
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ Ty
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateClassType - Create debugging information entry for a class.
+DIType DIBuilder::CreateClassType(DIDescriptor Context, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ uint64_t OffsetInBits, unsigned Flags,
+ DIType DerivedFrom, DIArray Elements,
+ MDNode *VTableHoder, MDNode *TemplateParams) {
+ // TAG_class_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
+ Context,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ DerivedFrom,
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ VTableHoder,
+ TemplateParams
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateTemplateTypeParameter - Create debugging information for template
+/// type parameter.
+DITemplateTypeParameter
+DIBuilder::CreateTemplateTypeParameter(DIDescriptor Context, StringRef Name,
+ DIType Ty, MDNode *File, unsigned LineNo,
+ unsigned ColumnNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
+ Context,
+ MDString::get(VMContext, Name),
+ Ty,
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+ };
+ return DITemplateTypeParameter(MDNode::get(VMContext, &Elts[0],
+ array_lengthof(Elts)));
+}
+
+/// CreateTemplateValueParameter - Create debugging information for template
+/// value parameter.
+DITemplateValueParameter
+DIBuilder::CreateTemplateValueParameter(DIDescriptor Context, StringRef Name,
+ DIType Ty, uint64_t Val,
+ MDNode *File, unsigned LineNo,
+ unsigned ColumnNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter),
+ Context,
+ MDString::get(VMContext, Name),
+ Ty,
+ ConstantInt::get(Type::getInt64Ty(VMContext), Val),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+ };
+ return DITemplateValueParameter(MDNode::get(VMContext, &Elts[0],
+ array_lengthof(Elts)));
+}
+
+/// CreateStructType - Create debugging information entry for a struct.
+DIType DIBuilder::CreateStructType(DIDescriptor Context, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits, uint64_t AlignInBits,
+ unsigned Flags, DIArray Elements,
+ unsigned RunTimeLang) {
+ // TAG_structure_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
+ Context,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateUnionType - Create debugging information entry for an union.
+DIType DIBuilder::CreateUnionType(DIDescriptor Scope, StringRef Name,
+ DIFile File,
+ unsigned LineNumber, uint64_t SizeInBits,
+ uint64_t AlignInBits, unsigned Flags,
+ DIArray Elements, unsigned RunTimeLang) {
+ // TAG_union_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateSubroutineType - Create subroutine type.
+DIType DIBuilder::CreateSubroutineType(DIFile File, DIArray ParameterTypes) {
+ // TAG_subroutine_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
+ File,
+ MDString::get(VMContext, ""),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ ParameterTypes,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateEnumerationType - Create debugging information entry for an
+/// enumeration.
+DIType DIBuilder::CreateEnumerationType(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNumber,
+ uint64_t SizeInBits,
+ uint64_t AlignInBits, DIArray Elements) {
+ // TAG_enumeration_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Elements,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
+ NMD->addOperand(Node);
+ return DIType(Node);
+}
+
+/// CreateArrayType - Create debugging information entry for an array.
+DIType DIBuilder::CreateArrayType(uint64_t Size, uint64_t AlignInBits,
+ DIType Ty, DIArray Subscripts) {
+ // TAG_array_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+ TheCU,
+ MDString::get(VMContext, ""),
+ TheCU,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ Ty,
+ Subscripts,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateVectorType - Create debugging information entry for a vector.
+DIType DIBuilder::CreateVectorType(uint64_t Size, uint64_t AlignInBits,
+ DIType Ty, DIArray Subscripts) {
+ // TAG_vector_type is encoded in DICompositeType format.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_vector_type),
+ TheCU,
+ MDString::get(VMContext, ""),
+ TheCU,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ Ty,
+ Subscripts,
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ };
+ return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// CreateArtificialType - Create a new DIType with "artificial" flag set.
+DIType DIBuilder::CreateArtificialType(DIType Ty) {
+ if (Ty.isArtificial())
+ return Ty;
+
+ SmallVector<Value *, 9> Elts;
+ MDNode *N = Ty;
+ assert (N && "Unexpected input DIType!");
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (Value *V = N->getOperand(i))
+ Elts.push_back(V);
+ else
+ Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+ }
+
+ unsigned CurFlags = Ty.getFlags();
+ CurFlags = CurFlags | DIType::FlagArtificial;
+
+ // Flags are stored at this slot.
+ Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
+
+ return DIType(MDNode::get(VMContext, Elts.data(), Elts.size()));
+}
+
+/// RetainType - Retain DIType in a module even if it is not referenced
+/// through debug info anchors.
+void DIBuilder::RetainType(DIType T) {
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty");
+ NMD->addOperand(T);
+}
+
+/// CreateUnspecifiedParameter - Create unspeicified type descriptor
+/// for the subroutine type.
+DIDescriptor DIBuilder::CreateUnspecifiedParameter() {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters)
+ };
+ return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1));
+}
+
+/// CreateTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::CreateTemporaryType() {
+ // Give the temporary MDNode a tag. It doesn't matter what tag we
+ // use here as long as DIType accepts it.
+ Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+ MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+ return DIType(Node);
+}
+
+/// CreateTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::CreateTemporaryType(DIFile F) {
+ // Give the temporary MDNode a tag. It doesn't matter what tag we
+ // use here as long as DIType accepts it.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, DW_TAG_base_type),
+ F.getCompileUnit(),
+ NULL,
+ F
+ };
+ MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+ return DIType(Node);
+}
+
+/// GetOrCreateArray - Get a DIArray, create one if required.
+DIArray DIBuilder::GetOrCreateArray(Value *const *Elements, unsigned NumElements) {
+ if (NumElements == 0) {
+ Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext));
+ return DIArray(MDNode::get(VMContext, &Null, 1));
+ }
+ return DIArray(MDNode::get(VMContext, Elements, NumElements));
+}
+
+/// GetOrCreateSubrange - Create a descriptor for a value range. This
+/// implicitly uniques the values returned.
+DISubrange DIBuilder::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
+ ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
+ };
+
+ return DISubrange(MDNode::get(VMContext, &Elts[0], 3));
+}
+
+/// CreateGlobalVariable - Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+CreateGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
+ DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ TheCU,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+ Val
+ };
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(Node);
+ return DIGlobalVariable(Node);
+}
+
+/// CreateStaticVariable - Create a new descriptor for the specified static
+/// variable.
+DIGlobalVariable DIBuilder::
+CreateStaticVariable(DIDescriptor Context, StringRef Name,
+ StringRef LinkageName, DIFile F, unsigned LineNumber,
+ DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Context,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+ Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+ Val
+ };
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(Node);
+ return DIGlobalVariable(Node);
+}
+
+/// CreateVariable - Create a new descriptor for the specified variable.
+DIVariable DIBuilder::CreateLocalVariable(unsigned Tag, DIDescriptor Scope,
+ StringRef Name, DIFile File,
+ unsigned LineNo, DIType Ty,
+ bool AlwaysPreserve, unsigned Flags) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags)
+ };
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+ if (AlwaysPreserve) {
+ // The optimizer may remove local variable. If there is an interest
+ // to preserve variable info in such situation then stash it in a
+ // named mdnode.
+ DISubprogram Fn(getDISubprogram(Scope));
+ StringRef FName = "fn";
+ if (Fn.getFunction())
+ FName = Fn.getFunction()->getName();
+ char One = '\1';
+ if (FName.startswith(StringRef(&One, 1)))
+ FName = FName.substr(1);
+ NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName);
+ FnLocals->addOperand(Node);
+ }
+ return DIVariable(Node);
+}
+
+/// CreateComplexVariable - Create a new descriptor for the specified variable
+/// which has a complex address expression for its address.
+DIVariable DIBuilder::CreateComplexVariable(unsigned Tag, DIDescriptor Scope,
+ StringRef Name, DIFile F,
+ unsigned LineNo,
+ DIType Ty, Value *const *Addr,
+ unsigned NumAddr) {
+ SmallVector<Value *, 15> Elts;
+ Elts.push_back(GetTagConstant(VMContext, Tag));
+ Elts.push_back(Scope);
+ Elts.push_back(MDString::get(VMContext, Name));
+ Elts.push_back(F);
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo));
+ Elts.push_back(Ty);
+ Elts.append(Addr, Addr+NumAddr);
+
+ return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size()));
+}
+
+/// CreateFunction - Create a new descriptor for the specified function.
+DISubprogram DIBuilder::CreateFunction(DIDescriptor Context,
+ StringRef Name,
+ StringRef LinkageName,
+ DIFile File, unsigned LineNo,
+ DIType Ty,
+ bool isLocalToUnit, bool isDefinition,
+ unsigned Flags, bool isOptimized,
+ Function *Fn) {
+
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Context,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ Ty,
+ ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ Fn
+ };
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(Node);
+ return DISubprogram(Node);
+}
+
+/// CreateMethod - Create a new descriptor for the specified C++ method.
+DISubprogram DIBuilder::CreateMethod(DIDescriptor Context,
+ StringRef Name,
+ StringRef LinkageName,
+ DIFile F,
+ unsigned LineNo, DIType Ty,
+ bool isLocalToUnit,
+ bool isDefinition,
+ unsigned VK, unsigned VIndex,
+ MDNode *VTableHolder,
+ unsigned Flags,
+ bool isOptimized,
+ Function *Fn) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Context,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, LinkageName),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ Ty,
+ ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+ ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
+ ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
+ VTableHolder,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ Fn
+ };
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(Node);
+ return DISubprogram(Node);
+}
+
+/// CreateNameSpace - This creates new descriptor for a namespace
+/// with the specified parent scope.
+DINameSpace DIBuilder::CreateNameSpace(DIDescriptor Scope, StringRef Name,
+ DIFile File, unsigned LineNo) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
+ Scope,
+ MDString::get(VMContext, Name),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
+ };
+ return DINameSpace(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+DILexicalBlock DIBuilder::CreateLexicalBlock(DIDescriptor Scope, DIFile File,
+ unsigned Line, unsigned Col) {
+ // Defeat MDNode uniqing for lexical blocks by using unique id.
+ static unsigned int unique_id = 0;
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+ Scope,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Col),
+ File,
+ ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
+ };
+ return DILexicalBlock(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo,
+ Instruction *InsertBefore) {
+ assert(Storage && "no storage passed to dbg.declare");
+ assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare");
+ if (!DeclareFn)
+ DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+ Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo };
+ return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
+}
+
+/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo,
+ BasicBlock *InsertAtEnd) {
+ assert(Storage && "no storage passed to dbg.declare");
+ assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare");
+ if (!DeclareFn)
+ DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+ Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo };
+
+ // If this block already has a terminator then insert this intrinsic
+ // before the terminator.
+ if (TerminatorInst *T = InsertAtEnd->getTerminator())
+ return CallInst::Create(DeclareFn, Args, Args+2, "", T);
+ else
+ return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
+}
+
+/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
+ DIVariable VarInfo,
+ Instruction *InsertBefore) {
+ assert(V && "no value passed to dbg.value");
+ assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+ if (!ValueFn)
+ ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+ Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
+ ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+ VarInfo };
+ return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
+}
+
+/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
+ DIVariable VarInfo,
+ BasicBlock *InsertAtEnd) {
+ assert(V && "no value passed to dbg.value");
+ assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+ if (!ValueFn)
+ ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+ Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
+ ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+ VarInfo };
+ return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
+}
+
diff --git a/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp
index 0567750..b23c351 100644
--- a/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp
@@ -20,6 +20,7 @@
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Metadata.h"
+#include "llvm/Module.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/Passes.h"
@@ -40,7 +41,9 @@ namespace {
void printVariableDeclaration(const Value *V);
public:
static char ID; // Pass identification
- PrintDbgInfo() : FunctionPass(ID), Out(errs()) {}
+ PrintDbgInfo() : FunctionPass(ID), Out(errs()) {
+ initializePrintDbgInfoPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -48,12 +51,124 @@ namespace {
}
};
char PrintDbgInfo::ID = 0;
- INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo",
- "Print debug info in human readable form", false, false);
}
+INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo",
+ "Print debug info in human readable form", false, false)
+
FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); }
+/// Find the debug info descriptor corresponding to this global variable.
+static Value *findDbgGlobalDeclare(GlobalVariable *V) {
+ const Module *M = V->getParent();
+ NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
+ if (!NMD)
+ return 0;
+
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
+ if (!DIG.isGlobalVariable())
+ continue;
+ if (DIGlobalVariable(DIG).getGlobal() == V)
+ return DIG;
+ }
+ return 0;
+}
+
+/// Find the debug info descriptor corresponding to this function.
+static Value *findDbgSubprogramDeclare(Function *V) {
+ const Module *M = V->getParent();
+ NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp");
+ if (!NMD)
+ return 0;
+
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
+ if (!DIG.isSubprogram())
+ continue;
+ if (DISubprogram(DIG).getFunction() == V)
+ return DIG;
+ }
+ return 0;
+}
+
+/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any.
+/// It looks through pointer casts too.
+static const DbgDeclareInst *findDbgDeclare(const Value *V) {
+ V = V->stripPointerCasts();
+
+ if (!isa<Instruction>(V) && !isa<Argument>(V))
+ return 0;
+
+ const Function *F = NULL;
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ F = I->getParent()->getParent();
+ else if (const Argument *A = dyn_cast<Argument>(V))
+ F = A->getParent();
+
+ for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
+ for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
+ BI != BE; ++BI)
+ if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ if (DDI->getAddress() == V)
+ return DDI;
+
+ return 0;
+}
+
+static bool getLocationInfo(const Value *V, std::string &DisplayName,
+ std::string &Type, unsigned &LineNo,
+ std::string &File, std::string &Dir) {
+ DICompileUnit Unit;
+ DIType TypeD;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
+ Value *DIGV = findDbgGlobalDeclare(GV);
+ if (!DIGV) return false;
+ DIGlobalVariable Var(cast<MDNode>(DIGV));
+
+ StringRef D = Var.getDisplayName();
+ if (!D.empty())
+ DisplayName = D;
+ LineNo = Var.getLineNumber();
+ Unit = Var.getCompileUnit();
+ TypeD = Var.getType();
+ } else if (Function *F = dyn_cast<Function>(const_cast<Value*>(V))){
+ Value *DIF = findDbgSubprogramDeclare(F);
+ if (!DIF) return false;
+ DISubprogram Var(cast<MDNode>(DIF));
+
+ StringRef D = Var.getDisplayName();
+ if (!D.empty())
+ DisplayName = D;
+ LineNo = Var.getLineNumber();
+ Unit = Var.getCompileUnit();
+ TypeD = Var.getType();
+ } else {
+ const DbgDeclareInst *DDI = findDbgDeclare(V);
+ if (!DDI) return false;
+ DIVariable Var(cast<MDNode>(DDI->getVariable()));
+
+ StringRef D = Var.getName();
+ if (!D.empty())
+ DisplayName = D;
+ LineNo = Var.getLineNumber();
+ Unit = Var.getCompileUnit();
+ TypeD = Var.getType();
+ }
+
+ StringRef T = TypeD.getName();
+ if (!T.empty())
+ Type = T;
+ StringRef F = Unit.getFilename();
+ if (!F.empty())
+ File = F;
+ StringRef D = Unit.getDirectory();
+ if (!D.empty())
+ Dir = D;
+ return true;
+}
+
void PrintDbgInfo::printVariableDeclaration(const Value *V) {
std::string DisplayName, File, Directory, Type;
unsigned LineNo;
@@ -63,8 +178,12 @@ void PrintDbgInfo::printVariableDeclaration(const Value *V) {
Out << "; ";
WriteAsOperand(Out, V, false, 0);
- Out << " is variable " << DisplayName
- << " of type " << Type << " declared at ";
+ if (isa<Function>(V))
+ Out << " is function " << DisplayName
+ << " of type " << Type << " declared at ";
+ else
+ Out << " is variable " << DisplayName
+ << " of type " << Type << " declared at ";
if (PrintDirectory)
Out << Directory << "/";
diff --git a/contrib/llvm/lib/Analysis/DebugInfo.cpp b/contrib/llvm/lib/Analysis/DebugInfo.cpp
index 5ca89c6..9db1456 100644
--- a/contrib/llvm/lib/Analysis/DebugInfo.cpp
+++ b/contrib/llvm/lib/Analysis/DebugInfo.cpp
@@ -109,7 +109,9 @@ Function *DIDescriptor::getFunctionField(unsigned Elt) const {
}
unsigned DIVariable::getNumAddrElements() const {
- return DbgNode->getNumOperands()-6;
+ if (getVersion() <= llvm::LLVMDebugVersion8)
+ return DbgNode->getNumOperands()-6;
+ return DbgNode->getNumOperands()-7;
}
@@ -197,6 +199,12 @@ bool DIDescriptor::isGlobal() const {
return isGlobalVariable();
}
+/// isUnspecifiedParmeter - Return true if the specified tag is
+/// DW_TAG_unspecified_parameters.
+bool DIDescriptor::isUnspecifiedParameter() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters;
+}
+
/// isScope - Return true if the specified tag is one of the scope
/// related tag.
bool DIDescriptor::isScope() const {
@@ -213,6 +221,18 @@ bool DIDescriptor::isScope() const {
return false;
}
+/// isTemplateTypeParameter - Return true if the specified tag is
+/// DW_TAG_template_type_parameter.
+bool DIDescriptor::isTemplateTypeParameter() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter;
+}
+
+/// isTemplateValueParameter - Return true if the specified tag is
+/// DW_TAG_template_value_parameter.
+bool DIDescriptor::isTemplateValueParameter() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter;
+}
+
/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit.
bool DIDescriptor::isCompileUnit() const {
return DbgNode && getTag() == dwarf::DW_TAG_compile_unit;
@@ -280,6 +300,26 @@ void DIType::replaceAllUsesWith(DIDescriptor &D) {
}
}
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(MDNode *D) {
+ if (!DbgNode)
+ return;
+
+ // Since we use a TrackingVH for the node, its easy for clients to manufacture
+ // legitimate situations where they want to replaceAllUsesWith() on something
+ // which, due to uniquing, has merged with the source. We shield clients from
+ // this detail by allowing a value to be replaced with replaceAllUsesWith()
+ // itself.
+ if (DbgNode != D) {
+ MDNode *Node = const_cast<MDNode*>(DbgNode);
+ const MDNode *DN = D;
+ const Value *V = cast_or_null<Value>(DN);
+ Node->replaceAllUsesWith(const_cast<Value*>(V));
+ MDNode::deleteTemporary(Node);
+ }
+}
+
/// Verify - Verify that a compile unit is well formed.
bool DICompileUnit::Verify() const {
if (!DbgNode)
@@ -297,9 +337,13 @@ bool DIType::Verify() const {
return false;
if (!getContext().Verify())
return false;
-
- DICompileUnit CU = getCompileUnit();
- if (!CU.Verify())
+ unsigned Tag = getTag();
+ if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
+ Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type &&
+ Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type
+ && Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type
+ && Tag != dwarf::DW_TAG_enumeration_type
+ && getFilename().empty())
return false;
return true;
}
@@ -701,15 +745,13 @@ Constant *DIFactory::GetTagConstant(unsigned TAG) {
/// GetOrCreateArray - Create an descriptor for an array of descriptors.
/// This implicitly uniques the arrays created.
DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) {
- SmallVector<Value*, 16> Elts;
-
- if (NumTys == 0)
- Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
- else
- for (unsigned i = 0; i != NumTys; ++i)
- Elts.push_back(Tys[i]);
+ if (NumTys == 0) {
+ Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext));
+ return DIArray(MDNode::get(VMContext, &Null, 1));
+ }
- return DIArray(MDNode::get(VMContext,Elts.data(), Elts.size()));
+ SmallVector<Value *, 16> Elts(Tys, Tys+NumTys);
+ return DIArray(MDNode::get(VMContext, Elts.data(), Elts.size()));
}
/// GetOrCreateSubrange - Create a descriptor for a value range. This
@@ -724,7 +766,14 @@ DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
return DISubrange(MDNode::get(VMContext, &Elts[0], 3));
}
-
+/// CreateUnspecifiedParameter - Create unspeicified type descriptor
+/// for the subroutine type.
+DIDescriptor DIFactory::CreateUnspecifiedParameter() {
+ Value *Elts[] = {
+ GetTagConstant(dwarf::DW_TAG_unspecified_parameters)
+ };
+ return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1));
+}
/// CreateCompileUnit - Create a new descriptor for the specified compile
/// unit. Note that this does not unique compile units within the module.
@@ -946,7 +995,6 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
return DICompositeType(Node);
}
-
/// CreateTemporaryType - Create a temporary forward-declared type.
DIType DIFactory::CreateTemporaryType() {
// Give the temporary MDNode a tag. It doesn't matter what tag we
@@ -958,6 +1006,19 @@ DIType DIFactory::CreateTemporaryType() {
return DIType(Node);
}
+/// CreateTemporaryType - Create a temporary forward-declared type.
+DIType DIFactory::CreateTemporaryType(DIFile F) {
+ // Give the temporary MDNode a tag. It doesn't matter what tag we
+ // use here as long as DIType accepts it.
+ Value *Elts[] = {
+ GetTagConstant(DW_TAG_base_type),
+ F.getCompileUnit(),
+ NULL,
+ F
+ };
+ MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+ return DIType(Node);
+}
/// CreateCompositeType - Create a composite type like array, struct, etc.
DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
@@ -1011,7 +1072,7 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
bool isDefinition,
unsigned VK, unsigned VIndex,
DIType ContainingType,
- bool isArtificial,
+ unsigned Flags,
bool isOptimized,
Function *Fn) {
@@ -1030,7 +1091,7 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
ContainingType,
- ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
Fn
};
@@ -1064,7 +1125,7 @@ DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration){
DeclNode->getOperand(11), // Virtuality
DeclNode->getOperand(12), // VIndex
DeclNode->getOperand(13), // Containting Type
- DeclNode->getOperand(14), // isArtificial
+ DeclNode->getOperand(14), // Flags
DeclNode->getOperand(15), // isOptimized
SPDeclaration.getFunction()
};
@@ -1142,12 +1203,47 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
return DIGlobalVariable(Node);
}
+/// fixupObjcLikeName - Replace contains special characters used
+/// in a typical Objective-C names with '.' in a given string.
+static void fixupObjcLikeName(std::string &Str) {
+ for (size_t i = 0, e = Str.size(); i < e; ++i) {
+ char C = Str[i];
+ if (C == '[' || C == ']' || C == ' ' || C == ':' || C == '+' ||
+ C == '(' || C == ')')
+ Str[i] = '.';
+ }
+}
+
+/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
+/// to hold function specific information.
+NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
+ SmallString<32> Out;
+ if (FuncName.find('[') == StringRef::npos)
+ return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName)
+ .toStringRef(Out));
+ std::string Name = FuncName;
+ fixupObjcLikeName(Name);
+ return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name)
+ .toStringRef(Out));
+}
+
+/// getFnSpecificMDNode - Return a NameMDNode, if available, that is
+/// suitable to hold function specific information.
+NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
+ if (FuncName.find('[') == StringRef::npos)
+ return M.getNamedMetadata(Twine("llvm.dbg.lv.", FuncName));
+ std::string Name = FuncName;
+ fixupObjcLikeName(Name);
+ return M.getNamedMetadata(Twine("llvm.dbg.lv.", Name));
+}
+
/// CreateVariable - Create a new descriptor for the specified variable.
DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
StringRef Name,
DIFile F,
unsigned LineNo,
- DIType Ty, bool AlwaysPreserve) {
+ DIType Ty, bool AlwaysPreserve,
+ unsigned Flags) {
Value *Elts[] = {
GetTagConstant(Tag),
Context,
@@ -1155,8 +1251,9 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
Ty,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Flags)
};
- MDNode *Node = MDNode::get(VMContext, &Elts[0], 6);
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], 7);
if (AlwaysPreserve) {
// The optimizer may remove local variable. If there is an interest
// to preserve variable info in such situation then stash it in a
@@ -1169,9 +1266,8 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
if (FName.startswith(StringRef(&One, 1)))
FName = FName.substr(1);
- SmallString<32> Out;
- NamedMDNode *FnLocals =
- M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FName).toStringRef(Out));
+
+ NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName);
FnLocals->addOperand(Node);
}
return DIVariable(Node);
@@ -1181,21 +1277,20 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
/// CreateComplexVariable - Create a new descriptor for the specified variable
/// which has a complex address expression for its address.
DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
- const std::string &Name,
- DIFile F,
+ StringRef Name, DIFile F,
unsigned LineNo,
- DIType Ty,
- SmallVector<Value *, 9> &addr) {
- SmallVector<Value *, 9> Elts;
+ DIType Ty, Value *const *Addr,
+ unsigned NumAddr) {
+ SmallVector<Value *, 15> Elts;
Elts.push_back(GetTagConstant(Tag));
Elts.push_back(Context);
Elts.push_back(MDString::get(VMContext, Name));
Elts.push_back(F);
Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo));
Elts.push_back(Ty);
- Elts.insert(Elts.end(), addr.begin(), addr.end());
+ Elts.append(Addr, Addr+NumAddr);
- return DIVariable(MDNode::get(VMContext, &Elts[0], 6+addr.size()));
+ return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size()));
}
@@ -1309,6 +1404,14 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
}
+// RecordType - Record DIType in a module such that it is not lost even if
+// it is not referenced through debug info anchors.
+void DIFactory::RecordType(DIType T) {
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty");
+ NMD->addOperand(T);
+}
+
+
//===----------------------------------------------------------------------===//
// DebugInfoFinder implementations.
//===----------------------------------------------------------------------===//
@@ -1472,89 +1575,6 @@ bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
return true;
}
-/// Find the debug info descriptor corresponding to this global variable.
-static Value *findDbgGlobalDeclare(GlobalVariable *V) {
- const Module *M = V->getParent();
- NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
- if (!NMD)
- return 0;
-
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
- if (!DIG.isGlobalVariable())
- continue;
- if (DIGlobalVariable(DIG).getGlobal() == V)
- return DIG;
- }
- return 0;
-}
-
-/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any.
-/// It looks through pointer casts too.
-static const DbgDeclareInst *findDbgDeclare(const Value *V) {
- V = V->stripPointerCasts();
-
- if (!isa<Instruction>(V) && !isa<Argument>(V))
- return 0;
-
- const Function *F = NULL;
- if (const Instruction *I = dyn_cast<Instruction>(V))
- F = I->getParent()->getParent();
- else if (const Argument *A = dyn_cast<Argument>(V))
- F = A->getParent();
-
- for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
- for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
- BI != BE; ++BI)
- if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
- if (DDI->getAddress() == V)
- return DDI;
-
- return 0;
-}
-
-bool llvm::getLocationInfo(const Value *V, std::string &DisplayName,
- std::string &Type, unsigned &LineNo,
- std::string &File, std::string &Dir) {
- DICompileUnit Unit;
- DIType TypeD;
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
- Value *DIGV = findDbgGlobalDeclare(GV);
- if (!DIGV) return false;
- DIGlobalVariable Var(cast<MDNode>(DIGV));
-
- StringRef D = Var.getDisplayName();
- if (!D.empty())
- DisplayName = D;
- LineNo = Var.getLineNumber();
- Unit = Var.getCompileUnit();
- TypeD = Var.getType();
- } else {
- const DbgDeclareInst *DDI = findDbgDeclare(V);
- if (!DDI) return false;
- DIVariable Var(cast<MDNode>(DDI->getVariable()));
-
- StringRef D = Var.getName();
- if (!D.empty())
- DisplayName = D;
- LineNo = Var.getLineNumber();
- Unit = Var.getCompileUnit();
- TypeD = Var.getType();
- }
-
- StringRef T = TypeD.getName();
- if (!T.empty())
- Type = T;
- StringRef F = Unit.getFilename();
- if (!F.empty())
- File = F;
- StringRef D = Unit.getDirectory();
- if (!D.empty())
- Dir = D;
- return true;
-}
-
/// getDISubprogram - Find subprogram that is enclosing this scope.
DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
DIDescriptor D(Scope);
diff --git a/contrib/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm/lib/Analysis/DomPrinter.cpp
index 9f34094..cde4314 100644
--- a/contrib/llvm/lib/Analysis/DomPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/DomPrinter.cpp
@@ -19,8 +19,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DomPrinter.h"
-
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/DOTGraphTraitsPass.h"
#include "llvm/Analysis/PostDominators.h"
@@ -86,74 +84,90 @@ namespace {
struct DomViewer
: public DOTGraphTraitsViewer<DominatorTree, false> {
static char ID;
- DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){}
+ DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){
+ initializeDomViewerPass(*PassRegistry::getPassRegistry());
+ }
};
struct DomOnlyViewer
: public DOTGraphTraitsViewer<DominatorTree, true> {
static char ID;
- DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){}
+ DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){
+ initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
};
struct PostDomViewer
: public DOTGraphTraitsViewer<PostDominatorTree, false> {
static char ID;
PostDomViewer() :
- DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){}
+ DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){
+ initializePostDomViewerPass(*PassRegistry::getPassRegistry());
+ }
};
struct PostDomOnlyViewer
: public DOTGraphTraitsViewer<PostDominatorTree, true> {
static char ID;
PostDomOnlyViewer() :
- DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){}
+ DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){
+ initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
};
} // end anonymous namespace
char DomViewer::ID = 0;
INITIALIZE_PASS(DomViewer, "view-dom",
- "View dominance tree of function", false, false);
+ "View dominance tree of function", false, false)
char DomOnlyViewer::ID = 0;
INITIALIZE_PASS(DomOnlyViewer, "view-dom-only",
"View dominance tree of function (with no function bodies)",
- false, false);
+ false, false)
char PostDomViewer::ID = 0;
INITIALIZE_PASS(PostDomViewer, "view-postdom",
- "View postdominance tree of function", false, false);
+ "View postdominance tree of function", false, false)
char PostDomOnlyViewer::ID = 0;
INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
"View postdominance tree of function "
"(with no function bodies)",
- false, false);
+ false, false)
namespace {
struct DomPrinter
: public DOTGraphTraitsPrinter<DominatorTree, false> {
static char ID;
- DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {}
+ DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {
+ initializeDomPrinterPass(*PassRegistry::getPassRegistry());
+ }
};
struct DomOnlyPrinter
: public DOTGraphTraitsPrinter<DominatorTree, true> {
static char ID;
- DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {}
+ DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {
+ initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
};
struct PostDomPrinter
: public DOTGraphTraitsPrinter<PostDominatorTree, false> {
static char ID;
PostDomPrinter() :
- DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {}
+ DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {
+ initializePostDomPrinterPass(*PassRegistry::getPassRegistry());
+ }
};
struct PostDomOnlyPrinter
: public DOTGraphTraitsPrinter<PostDominatorTree, true> {
static char ID;
PostDomOnlyPrinter() :
- DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {}
+ DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {
+ initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
};
} // end anonymous namespace
@@ -162,24 +176,24 @@ struct PostDomOnlyPrinter
char DomPrinter::ID = 0;
INITIALIZE_PASS(DomPrinter, "dot-dom",
"Print dominance tree of function to 'dot' file",
- false, false);
+ false, false)
char DomOnlyPrinter::ID = 0;
INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only",
"Print dominance tree of function to 'dot' file "
"(with no function bodies)",
- false, false);
+ false, false)
char PostDomPrinter::ID = 0;
INITIALIZE_PASS(PostDomPrinter, "dot-postdom",
"Print postdominance tree of function to 'dot' file",
- false, false);
+ false, false)
char PostDomOnlyPrinter::ID = 0;
INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
"Print postdominance tree of function to 'dot' file "
"(with no function bodies)",
- false, false);
+ false, false)
// Create methods available outside of this file, to use them
// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
new file mode 100644
index 0000000..6de4e1e
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -0,0 +1,137 @@
+//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char DominanceFrontier::ID = 0;
+INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier",
+ "Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(DominanceFrontier, "domfrontier",
+ "Dominance Frontier Construction", true, true)
+
+namespace {
+ class DFCalculateWorkObject {
+ public:
+ DFCalculateWorkObject(BasicBlock *B, BasicBlock *P,
+ const DomTreeNode *N,
+ const DomTreeNode *PN)
+ : currentBB(B), parentBB(P), Node(N), parentNode(PN) {}
+ BasicBlock *currentBB;
+ BasicBlock *parentBB;
+ const DomTreeNode *Node;
+ const DomTreeNode *parentNode;
+ };
+}
+
+const DominanceFrontier::DomSetType &
+DominanceFrontier::calculate(const DominatorTree &DT,
+ const DomTreeNode *Node) {
+ BasicBlock *BB = Node->getBlock();
+ DomSetType *Result = NULL;
+
+ std::vector<DFCalculateWorkObject> workList;
+ SmallPtrSet<BasicBlock *, 32> visited;
+
+ workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
+ do {
+ DFCalculateWorkObject *currentW = &workList.back();
+ assert (currentW && "Missing work object.");
+
+ BasicBlock *currentBB = currentW->currentBB;
+ BasicBlock *parentBB = currentW->parentBB;
+ const DomTreeNode *currentNode = currentW->Node;
+ const DomTreeNode *parentNode = currentW->parentNode;
+ assert (currentBB && "Invalid work object. Missing current Basic Block");
+ assert (currentNode && "Invalid work object. Missing current Node");
+ DomSetType &S = Frontiers[currentBB];
+
+ // Visit each block only once.
+ if (visited.count(currentBB) == 0) {
+ visited.insert(currentBB);
+
+ // Loop over CFG successors to calculate DFlocal[currentNode]
+ for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB);
+ SI != SE; ++SI) {
+ // Does Node immediately dominate this successor?
+ if (DT[*SI]->getIDom() != currentNode)
+ S.insert(*SI);
+ }
+ }
+
+ // At this point, S is DFlocal. Now we union in DFup's of our children...
+ // Loop through and visit the nodes that Node immediately dominates (Node's
+ // children in the IDomTree)
+ bool visitChild = false;
+ for (DomTreeNode::const_iterator NI = currentNode->begin(),
+ NE = currentNode->end(); NI != NE; ++NI) {
+ DomTreeNode *IDominee = *NI;
+ BasicBlock *childBB = IDominee->getBlock();
+ if (visited.count(childBB) == 0) {
+ workList.push_back(DFCalculateWorkObject(childBB, currentBB,
+ IDominee, currentNode));
+ visitChild = true;
+ }
+ }
+
+ // If all children are visited or there is any child then pop this block
+ // from the workList.
+ if (!visitChild) {
+
+ if (!parentBB) {
+ Result = &S;
+ break;
+ }
+
+ DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end();
+ DomSetType &parentSet = Frontiers[parentBB];
+ for (; CDFI != CDFE; ++CDFI) {
+ if (!DT.properlyDominates(parentNode, DT[*CDFI]))
+ parentSet.insert(*CDFI);
+ }
+ workList.pop_back();
+ }
+
+ } while (!workList.empty());
+
+ return *Result;
+}
+
+void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ OS << " DomFrontier for BB ";
+ if (I->first)
+ WriteAsOperand(OS, I->first, false);
+ else
+ OS << " <<exit node>>";
+ OS << " is:\t";
+
+ const std::set<BasicBlock*> &BBs = I->second;
+
+ for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
+ I != E; ++I) {
+ OS << ' ';
+ if (*I)
+ WriteAsOperand(OS, *I, false);
+ else
+ OS << "<<exit node>>";
+ }
+ OS << "\n";
+ }
+}
+
+void DominanceFrontierBase::dump() const {
+ print(dbgs());
+}
+
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
index b363528..690c4b4 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp
@@ -43,7 +43,9 @@ class BasicCallGraph : public ModulePass, public CallGraph {
public:
static char ID; // Class identification, replacement for typeinfo
BasicCallGraph() : ModulePass(ID), Root(0),
- ExternalCallingNode(0), CallsExternalNode(0) {}
+ ExternalCallingNode(0), CallsExternalNode(0) {
+ initializeBasicCallGraphPass(*PassRegistry::getPassRegistry());
+ }
// runOnModule - Compute the call graph for the specified module.
virtual bool runOnModule(Module &M) {
@@ -171,9 +173,9 @@ private:
} //End anonymous namespace
-static RegisterAnalysisGroup<CallGraph> X("Call Graph");
+INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph)
INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
- "Basic CallGraph Construction", false, true, true);
+ "Basic CallGraph Construction", false, true, true)
char CallGraph::ID = 0;
char BasicCallGraph::ID = 0;
@@ -228,6 +230,21 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
return F;
}
+/// spliceFunction - Replace the function represented by this node by another.
+/// This does not rescan the body of the function, so it is suitable when
+/// splicing the body of the old function to the new while also updating all
+/// callers from old to new.
+///
+void CallGraph::spliceFunction(const Function *From, const Function *To) {
+ assert(FunctionMap.count(From) && "No CallGraphNode for function!");
+ assert(!FunctionMap.count(To) &&
+ "Pointing CallGraphNode at a function that already exists");
+ FunctionMapTy::iterator I = FunctionMap.find(From);
+ I->second->F = const_cast<Function*>(To);
+ FunctionMap[To] = I->second;
+ FunctionMap.erase(I);
+}
+
// getOrInsertFunction - This method is identical to calling operator[], but
// it will insert a new CallGraphNode for the specified function if one does
// not already exist.
@@ -274,7 +291,6 @@ void CallGraphNode::removeCallEdgeFor(CallSite CS) {
}
}
-
// removeAnyCallEdgeTo - This method removes any call edges from this node to
// the specified callee function. This takes more time to execute than
// removeCallEdgeTo, so it should not be used unless necessary.
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
index b7a27cb..725ab72 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -582,7 +582,6 @@ namespace {
public:
static char ID;
- PrintCallGraphPass() : CallGraphSCCPass(ID), Out(dbgs()) {}
PrintCallGraphPass(const std::string &B, raw_ostream &o)
: CallGraphSCCPass(ID), Banner(B), Out(o) {}
diff --git a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp
index 8eed9d6..06ae34c 100644
--- a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
char FindUsedTypes::ID = 0;
INITIALIZE_PASS(FindUsedTypes, "print-used-types",
- "Find Used Types", false, true);
+ "Find Used Types", false, true)
// IncorporateType - Incorporate one type and all of its subtypes into the
// collection of used types.
diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
index 6759b0a..116aaf4 100644
--- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/ADT/Statistic.h"
@@ -88,7 +89,9 @@ namespace {
public:
static char ID;
- GlobalsModRef() : ModulePass(ID) {}
+ GlobalsModRef() : ModulePass(ID) {
+ initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M) {
InitializeAliasAnalysis(this); // set up super class
@@ -106,10 +109,9 @@ namespace {
//------------------------------------------------
// Implement the AliasAnalysis API
//
- AliasResult alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size);
+ AliasResult alias(const Location &LocA, const Location &LocB);
ModRefResult getModRefInfo(ImmutableCallSite CS,
- const Value *P, unsigned Size);
+ const Location &Loc);
ModRefResult getModRefInfo(ImmutableCallSite CS1,
ImmutableCallSite CS2) {
return AliasAnalysis::getModRefInfo(CS1, CS2);
@@ -119,32 +121,38 @@ namespace {
/// called from the specified call site. The call site may be null in which
/// case the most generic behavior of this function should be returned.
ModRefBehavior getModRefBehavior(const Function *F) {
+ ModRefBehavior Min = UnknownModRefBehavior;
+
if (FunctionRecord *FR = getFunctionInfo(F)) {
if (FR->FunctionEffect == 0)
- return DoesNotAccessMemory;
+ Min = DoesNotAccessMemory;
else if ((FR->FunctionEffect & Mod) == 0)
- return OnlyReadsMemory;
+ Min = OnlyReadsMemory;
}
- return AliasAnalysis::getModRefBehavior(F);
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
}
/// getModRefBehavior - Return the behavior of the specified function if
/// called from the specified call site. The call site may be null in which
/// case the most generic behavior of this function should be returned.
ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
- const Function* F = CS.getCalledFunction();
- if (!F) return AliasAnalysis::getModRefBehavior(CS);
- if (FunctionRecord *FR = getFunctionInfo(F)) {
- if (FR->FunctionEffect == 0)
- return DoesNotAccessMemory;
- else if ((FR->FunctionEffect & Mod) == 0)
- return OnlyReadsMemory;
- }
- return AliasAnalysis::getModRefBehavior(CS);
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ if (const Function* F = CS.getCalledFunction())
+ if (FunctionRecord *FR = getFunctionInfo(F)) {
+ if (FR->FunctionEffect == 0)
+ Min = DoesNotAccessMemory;
+ else if ((FR->FunctionEffect & Mod) == 0)
+ Min = OnlyReadsMemory;
+ }
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
}
virtual void deleteValue(Value *V);
virtual void copyValue(Value *From, Value *To);
+ virtual void addEscapingUse(Use &U);
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
@@ -177,9 +185,13 @@ namespace {
}
char GlobalsModRef::ID = 0;
-INITIALIZE_AG_PASS(GlobalsModRef, AliasAnalysis,
+INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
"globalsmodref-aa", "Simple mod/ref analysis for globals",
- false, true, false);
+ false, true, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
+ "globalsmodref-aa", "Simple mod/ref analysis for globals",
+ false, true, false)
Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
@@ -314,7 +326,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
continue;
// Check the value being stored.
- Value *Ptr = SI->getOperand(0)->getUnderlyingObject();
+ Value *Ptr = GetUnderlyingObject(SI->getOperand(0));
if (isMalloc(Ptr)) {
// Okay, easy case.
@@ -476,11 +488,11 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
/// other is some random pointer, we know there cannot be an alias, because the
/// address of the global isn't taken.
AliasAnalysis::AliasResult
-GlobalsModRef::alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size) {
+GlobalsModRef::alias(const Location &LocA,
+ const Location &LocB) {
// Get the base object these pointers point to.
- const Value *UV1 = V1->getUnderlyingObject();
- const Value *UV2 = V2->getUnderlyingObject();
+ const Value *UV1 = GetUnderlyingObject(LocA.Ptr);
+ const Value *UV2 = GetUnderlyingObject(LocB.Ptr);
// If either of the underlying values is a global, they may be non-addr-taken
// globals, which we can answer queries about.
@@ -528,17 +540,18 @@ GlobalsModRef::alias(const Value *V1, unsigned V1Size,
if ((GV1 || GV2) && GV1 != GV2)
return NoAlias;
- return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
+ return AliasAnalysis::alias(LocA, LocB);
}
AliasAnalysis::ModRefResult
GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
- const Value *P, unsigned Size) {
+ const Location &Loc) {
unsigned Known = ModRef;
// If we are asking for mod/ref info of a direct call with a pointer to a
// global we are tracking, return information if we have it.
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(P->getUnderlyingObject()))
+ if (const GlobalValue *GV =
+ dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr)))
if (GV->hasLocalLinkage())
if (const Function *F = CS.getCalledFunction())
if (NonAddressTakenGlobals.count(GV))
@@ -547,7 +560,7 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
if (Known == NoModRef)
return NoModRef; // No need to query other mod/ref analyses
- return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, P, Size));
+ return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
}
@@ -584,3 +597,13 @@ void GlobalsModRef::deleteValue(Value *V) {
void GlobalsModRef::copyValue(Value *From, Value *To) {
AliasAnalysis::copyValue(From, To);
}
+
+void GlobalsModRef::addEscapingUse(Use &U) {
+ // For the purposes of this analysis, it is conservatively correct to treat
+ // a newly escaping value equivalently to a deleted one. We could perhaps
+ // be more precise by processing the new use and attempting to update our
+ // saved analysis results to accomodate it.
+ deleteValue(U);
+
+ AliasAnalysis::addEscapingUse(U);
+}
diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
new file mode 100644
index 0000000..0ba2e04
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
@@ -0,0 +1,29 @@
+//===-- IPA.cpp -----------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the IPA library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeIPA - Initialize all passes linked into the IPA library.
+void llvm::initializeIPA(PassRegistry &Registry) {
+ initializeBasicCallGraphPass(Registry);
+ initializeCallGraphAnalysisGroup(Registry);
+ initializeFindUsedTypesPass(Registry);
+ initializeGlobalsModRefPass(Registry);
+}
+
+void LLVMInitializeIPA(LLVMPassRegistryRef R) {
+ initializeIPA(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
index cdf667a..c838218 100644
--- a/contrib/llvm/lib/Analysis/IVUsers.cpp
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -28,7 +29,13 @@
using namespace llvm;
char IVUsers::ID = 0;
-INITIALIZE_PASS(IVUsers, "iv-users", "Induction Variable Users", false, true);
+INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
+ "Induction Variable Users", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(IVUsers, "iv-users",
+ "Induction Variable Users", false, true)
Pass *llvm::createIVUsersPass() {
return new IVUsers();
@@ -143,7 +150,8 @@ IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
}
IVUsers::IVUsers()
- : LoopPass(ID) {
+ : LoopPass(ID) {
+ initializeIVUsersPass(*PassRegistry::getPassRegistry());
}
void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
index 3e550f3..47f91cf 100644
--- a/contrib/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -16,97 +16,8 @@
#include "llvm/CallingConv.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/ADT/SmallPtrSet.h"
-using namespace llvm;
-
-// CountCodeReductionForConstant - Figure out an approximation for how many
-// instructions will be constant folded if the specified value is constant.
-//
-unsigned InlineCostAnalyzer::FunctionInfo::
-CountCodeReductionForConstant(Value *V) {
- unsigned Reduction = 0;
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- User *U = *UI;
- if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
- // We will be able to eliminate all but one of the successors.
- const TerminatorInst &TI = cast<TerminatorInst>(*U);
- const unsigned NumSucc = TI.getNumSuccessors();
- unsigned Instrs = 0;
- for (unsigned I = 0; I != NumSucc; ++I)
- Instrs += Metrics.NumBBInsts[TI.getSuccessor(I)];
- // We don't know which blocks will be eliminated, so use the average size.
- Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
- } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
- // Turning an indirect call into a direct call is a BIG win
- if (CI->getCalledValue() == V)
- Reduction += InlineConstants::IndirectCallBonus;
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
- // Turning an indirect call into a direct call is a BIG win
- if (II->getCalledValue() == V)
- Reduction += InlineConstants::IndirectCallBonus;
- } else {
- // Figure out if this instruction will be removed due to simple constant
- // propagation.
- Instruction &Inst = cast<Instruction>(*U);
-
- // We can't constant propagate instructions which have effects or
- // read memory.
- //
- // FIXME: It would be nice to capture the fact that a load from a
- // pointer-to-constant-global is actually a *really* good thing to zap.
- // Unfortunately, we don't know the pointer that may get propagated here,
- // so we can't make this decision.
- if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
- isa<AllocaInst>(Inst))
- continue;
-
- bool AllOperandsConstant = true;
- for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
- if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
- AllOperandsConstant = false;
- break;
- }
- if (AllOperandsConstant) {
- // We will get to remove this instruction...
- Reduction += InlineConstants::InstrCost;
-
- // And any other instructions that use it which become constants
- // themselves.
- Reduction += CountCodeReductionForConstant(&Inst);
- }
- }
- }
- return Reduction;
-}
-
-// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
-// the function will be if it is inlined into a context where an argument
-// becomes an alloca.
-//
-unsigned InlineCostAnalyzer::FunctionInfo::
- CountCodeReductionForAlloca(Value *V) {
- if (!V->getType()->isPointerTy()) return 0; // Not a pointer
- unsigned Reduction = 0;
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- Instruction *I = cast<Instruction>(*UI);
- if (isa<LoadInst>(I) || isa<StoreInst>(I))
- Reduction += InlineConstants::InstrCost;
- else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
- // If the GEP has variable indices, we won't be able to do much with it.
- if (GEP->hasAllConstantIndices())
- Reduction += CountCodeReductionForAlloca(GEP);
- } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
- // Track pointer through bitcasts.
- Reduction += CountCodeReductionForAlloca(BCI);
- } else {
- // If there is some other strange instruction, we're not going to be able
- // to do much if we inline this.
- return 0;
- }
- }
-
- return Reduction;
-}
+using namespace llvm;
/// callIsSmall - If a call is likely to lower to a single target instruction,
/// or is otherwise deemed small return true.
@@ -160,6 +71,12 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
// variables as volatile if they are live across a setjmp call, and they
// probably won't do this in callers.
if (const Function *F = CS.getCalledFunction()) {
+ // If a function is both internal and has a single use, then it is
+ // extremely likely to get inlined in the future (it was probably
+ // exposed by an interleaved devirtualization pass).
+ if (F->hasInternalLinkage() && F->hasOneUse())
+ ++NumInlineCandidates;
+
if (F->isDeclaration() &&
(F->getName() == "setjmp" || F->getName() == "_setjmp"))
callsSetJmp = true;
@@ -226,6 +143,86 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
}
+// CountCodeReductionForConstant - Figure out an approximation for how many
+// instructions will be constant folded if the specified value is constant.
+//
+unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
+ unsigned Reduction = 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ User *U = *UI;
+ if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
+ // We will be able to eliminate all but one of the successors.
+ const TerminatorInst &TI = cast<TerminatorInst>(*U);
+ const unsigned NumSucc = TI.getNumSuccessors();
+ unsigned Instrs = 0;
+ for (unsigned I = 0; I != NumSucc; ++I)
+ Instrs += NumBBInsts[TI.getSuccessor(I)];
+ // We don't know which blocks will be eliminated, so use the average size.
+ Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
+ } else {
+ // Figure out if this instruction will be removed due to simple constant
+ // propagation.
+ Instruction &Inst = cast<Instruction>(*U);
+
+ // We can't constant propagate instructions which have effects or
+ // read memory.
+ //
+ // FIXME: It would be nice to capture the fact that a load from a
+ // pointer-to-constant-global is actually a *really* good thing to zap.
+ // Unfortunately, we don't know the pointer that may get propagated here,
+ // so we can't make this decision.
+ if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+ isa<AllocaInst>(Inst))
+ continue;
+
+ bool AllOperandsConstant = true;
+ for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+ AllOperandsConstant = false;
+ break;
+ }
+
+ if (AllOperandsConstant) {
+ // We will get to remove this instruction...
+ Reduction += InlineConstants::InstrCost;
+
+ // And any other instructions that use it which become constants
+ // themselves.
+ Reduction += CountCodeReductionForConstant(&Inst);
+ }
+ }
+ }
+ return Reduction;
+}
+
+// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
+// the function will be if it is inlined into a context where an argument
+// becomes an alloca.
+//
+unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
+ if (!V->getType()->isPointerTy()) return 0; // Not a pointer
+ unsigned Reduction = 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ Instruction *I = cast<Instruction>(*UI);
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ Reduction += InlineConstants::InstrCost;
+ else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ // If the GEP has variable indices, we won't be able to do much with it.
+ if (GEP->hasAllConstantIndices())
+ Reduction += CountCodeReductionForAlloca(GEP);
+ } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+ // Track pointer through bitcasts.
+ Reduction += CountCodeReductionForAlloca(BCI);
+ } else {
+ // If there is some other strange instruction, we're not going to be able
+ // to do much if we inline this.
+ return 0;
+ }
+ }
+
+ return Reduction;
+}
+
/// analyzeFunction - Fill in the current structure with information gleaned
/// from the specified function.
void CodeMetrics::analyzeFunction(Function *F) {
@@ -245,76 +242,246 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
if (Metrics.NumRets==1)
--Metrics.NumInsts;
- // Don't bother calculating argument weights if we are never going to inline
- // the function anyway.
- if (NeverInline())
- return;
-
// Check out all of the arguments to the function, figuring out how much
// code can be eliminated if one of the arguments is a constant.
ArgumentWeights.reserve(F->arg_size());
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
- ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I),
- CountCodeReductionForAlloca(I)));
+ ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I),
+ Metrics.CountCodeReductionForAlloca(I)));
}
/// NeverInline - returns true if the function should never be inlined into
/// any caller
-bool InlineCostAnalyzer::FunctionInfo::NeverInline()
-{
+bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
return (Metrics.callsSetJmp || Metrics.isRecursive ||
Metrics.containsIndirectBr);
+}
+// getSpecializationBonus - The heuristic used to determine the per-call
+// performance boost for using a specialization of Callee with argument
+// specializedArgNo replaced by a constant.
+int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
+ SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+ if (Callee->mayBeOverridden())
+ return 0;
+
+ int Bonus = 0;
+ // If this function uses the coldcc calling convention, prefer not to
+ // specialize it.
+ if (Callee->getCallingConv() == CallingConv::Cold)
+ Bonus -= InlineConstants::ColdccPenalty;
+
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+ unsigned ArgNo = 0;
+ unsigned i = 0;
+ for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end();
+ I != E; ++I, ++ArgNo)
+ if (ArgNo == SpecializedArgNos[i]) {
+ ++i;
+ Bonus += CountBonusForConstant(I);
+ }
+
+ // Calls usually take a long time, so they make the specialization gain
+ // smaller.
+ Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+ return Bonus;
}
-// getInlineCost - The heuristic used to determine if we should inline the
-// function call or not.
-//
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
- SmallPtrSet<const Function*, 16> &NeverInline) {
- return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+
+// ConstantFunctionBonus - Figure out how much of a bonus we can get for
+// possibly devirtualizing a function. We'll subtract the size of the function
+// we may wish to inline from the indirect call bonus providing a limit on
+// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize
+// inlining because we decide we don't want to give a bonus for
+// devirtualizing.
+int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) {
+
+ // This could just be NULL.
+ if (!C) return 0;
+
+ Function *F = dyn_cast<Function>(C);
+ if (!F) return 0;
+
+ int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F);
+ return (Bonus > 0) ? 0 : Bonus;
}
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
- Function *Callee,
- SmallPtrSet<const Function*, 16> &NeverInline) {
- Instruction *TheCall = CS.getInstruction();
- Function *Caller = TheCall->getParent()->getParent();
- bool isDirectCall = CS.getCalledFunction() == Callee;
+// CountBonusForConstant - Figure out an approximation for how much per-call
+// performance boost we can expect if the specified value is constant.
+int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) {
+ unsigned Bonus = 0;
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ User *U = *UI;
+ if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ // Turning an indirect call into a direct call is a BIG win
+ if (CI->getCalledValue() == V)
+ Bonus += ConstantFunctionBonus(CallSite(CI), C);
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+ // Turning an indirect call into a direct call is a BIG win
+ if (II->getCalledValue() == V)
+ Bonus += ConstantFunctionBonus(CallSite(II), C);
+ }
+ // FIXME: Eliminating conditional branches and switches should
+ // also yield a per-call performance boost.
+ else {
+ // Figure out the bonuses that wll accrue due to simple constant
+ // propagation.
+ Instruction &Inst = cast<Instruction>(*U);
- // Don't inline functions which can be redefined at link-time to mean
- // something else. Don't inline functions marked noinline or call sites
- // marked noinline.
- if (Callee->mayBeOverridden() ||
- Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
- CS.isNoInline())
- return llvm::InlineCost::getNever();
+ // We can't constant propagate instructions which have effects or
+ // read memory.
+ //
+ // FIXME: It would be nice to capture the fact that a load from a
+ // pointer-to-constant-global is actually a *really* good thing to zap.
+ // Unfortunately, we don't know the pointer that may get propagated here,
+ // so we can't make this decision.
+ if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+ isa<AllocaInst>(Inst))
+ continue;
+ bool AllOperandsConstant = true;
+ for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+ if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+ AllOperandsConstant = false;
+ break;
+ }
+
+ if (AllOperandsConstant)
+ Bonus += CountBonusForConstant(&Inst);
+ }
+ }
+
+ return Bonus;
+}
+
+int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
// InlineCost - This value measures how good of an inline candidate this call
// site is to inline. A lower inline cost make is more likely for the call to
// be inlined. This value may go negative.
//
int InlineCost = 0;
+ // Compute any size reductions we can expect due to arguments being passed into
+ // the function.
+ //
+ unsigned ArgNo = 0;
+ CallSite::arg_iterator I = CS.arg_begin();
+ for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
+ FI != FE; ++I, ++FI, ++ArgNo) {
+
+ // If an alloca is passed in, inlining this function is likely to allow
+ // significant future optimization possibilities (like scalar promotion, and
+ // scalarization), so encourage the inlining of the function.
+ //
+ if (isa<AllocaInst>(I))
+ InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;
+
+ // If this is a constant being passed into the function, use the argument
+ // weights calculated for the callee to determine how much will be folded
+ // away with this information.
+ else if (isa<Constant>(I))
+ InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
+ }
+
+ // Each argument passed in has a cost at both the caller and the callee
+ // sides. Measurements show that each argument costs about the same as an
+ // instruction.
+ InlineCost -= (CS.arg_size() * InlineConstants::InstrCost);
+
+ // Now that we have considered all of the factors that make the call site more
+ // likely to be inlined, look at factors that make us not want to inline it.
+
+ // Calls usually take a long time, so they make the inlining gain smaller.
+ InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+ // Look at the size of the callee. Each instruction counts as 5.
+ InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
+
+ return InlineCost;
+}
+
+int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
+
+ bool isDirectCall = CS.getCalledFunction() == Callee;
+ Instruction *TheCall = CS.getInstruction();
+ int Bonus = 0;
+
// If there is only one call of the function, and it has internal linkage,
// make it almost guaranteed to be inlined.
//
if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
- InlineCost += InlineConstants::LastCallToStaticBonus;
-
- // If this function uses the coldcc calling convention, prefer not to inline
- // it.
- if (Callee->getCallingConv() == CallingConv::Cold)
- InlineCost += InlineConstants::ColdccPenalty;
+ Bonus += InlineConstants::LastCallToStaticBonus;
// If the instruction after the call, or if the normal destination of the
// invoke is an unreachable instruction, the function is noreturn. As such,
// there is little point in inlining this.
if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
if (isa<UnreachableInst>(II->getNormalDest()->begin()))
- InlineCost += InlineConstants::NoreturnPenalty;
+ Bonus += InlineConstants::NoreturnPenalty;
} else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
- InlineCost += InlineConstants::NoreturnPenalty;
+ Bonus += InlineConstants::NoreturnPenalty;
+
+ // If this function uses the coldcc calling convention, prefer not to inline
+ // it.
+ if (Callee->getCallingConv() == CallingConv::Cold)
+ Bonus += InlineConstants::ColdccPenalty;
+ // Add to the inline quality for properties that make the call valuable to
+ // inline. This includes factors that indicate that the result of inlining
+ // the function will be optimizable. Currently this just looks at arguments
+ // passed into the function.
+ //
+ CallSite::arg_iterator I = CS.arg_begin();
+ for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
+ FI != FE; ++I, ++FI)
+ // Compute any constant bonus due to inlining we want to give here.
+ if (isa<Constant>(I))
+ Bonus += CountBonusForConstant(FI, cast<Constant>(I));
+
+ return Bonus;
+}
+
+// getInlineCost - The heuristic used to determine if we should inline the
+// function call or not.
+//
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+ SmallPtrSet<const Function*, 16> &NeverInline) {
+ return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+}
+
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+ Function *Callee,
+ SmallPtrSet<const Function*, 16> &NeverInline) {
+ Instruction *TheCall = CS.getInstruction();
+ Function *Caller = TheCall->getParent()->getParent();
+
+ // Don't inline functions which can be redefined at link-time to mean
+ // something else. Don't inline functions marked noinline or call sites
+ // marked noinline.
+ if (Callee->mayBeOverridden() ||
+ Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
+ CS.isNoInline())
+ return llvm::InlineCost::getNever();
+
// Get information about the callee.
FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
@@ -353,46 +520,45 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
return InlineCost::getNever();
}
- // Add to the inline quality for properties that make the call valuable to
- // inline. This includes factors that indicate that the result of inlining
- // the function will be optimizable. Currently this just looks at arguments
- // passed into the function.
+ // InlineCost - This value measures how good of an inline candidate this call
+ // site is to inline. A lower inline cost make is more likely for the call to
+ // be inlined. This value may go negative due to the fact that bonuses
+ // are negative numbers.
//
- unsigned ArgNo = 0;
- for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I, ++ArgNo) {
- // Each argument passed in has a cost at both the caller and the callee
- // sides. Measurements show that each argument costs about the same as an
- // instruction.
- InlineCost -= InlineConstants::InstrCost;
+ int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee);
+ return llvm::InlineCost::get(InlineCost);
+}
- // If an alloca is passed in, inlining this function is likely to allow
- // significant future optimization possibilities (like scalar promotion, and
- // scalarization), so encourage the inlining of the function.
- //
- if (isa<AllocaInst>(I)) {
- if (ArgNo < CalleeFI->ArgumentWeights.size())
- InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;
-
- // If this is a constant being passed into the function, use the argument
- // weights calculated for the callee to determine how much will be folded
- // away with this information.
- } else if (isa<Constant>(I)) {
- if (ArgNo < CalleeFI->ArgumentWeights.size())
- InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
- }
- }
+// getSpecializationCost - The heuristic used to determine the code-size
+// impact of creating a specialized version of Callee with argument
+// SpecializedArgNo replaced by a constant.
+InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
+ SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+ // Don't specialize functions which can be redefined at link-time to mean
+ // something else.
+ if (Callee->mayBeOverridden())
+ return llvm::InlineCost::getNever();
- // Now that we have considered all of the factors that make the call site more
- // likely to be inlined, look at factors that make us not want to inline it.
+ // Get information about the callee.
+ FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI->Metrics.NumBlocks == 0)
+ CalleeFI->analyzeFunction(Callee);
- // Calls usually take a long time, so they make the inlining gain smaller.
- InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+ int Cost = 0;
+
+ // Look at the orginal size of the callee. Each instruction counts as 5.
+ Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
- // Look at the size of the callee. Each instruction counts as 5.
- InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
+ // Offset that with the amount of code that can be constant-folded
+ // away with the given arguments replaced by constants.
+ for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
+ ae = SpecializedArgNos.end(); an != ae; ++an)
+ Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
- return llvm::InlineCost::get(InlineCost);
+ return llvm::InlineCost::get(Cost);
}
// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp
index dcbcac0..3b385d2 100644
--- a/contrib/llvm/lib/Analysis/InstCount.cpp
+++ b/contrib/llvm/lib/Analysis/InstCount.cpp
@@ -51,7 +51,9 @@ namespace {
}
public:
static char ID; // Pass identification, replacement for typeid
- InstCount() : FunctionPass(ID) {}
+ InstCount() : FunctionPass(ID) {
+ initializeInstCountPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -65,7 +67,7 @@ namespace {
char InstCount::ID = 0;
INITIALIZE_PASS(InstCount, "instcount",
- "Counts the various types of Instructions", false, true);
+ "Counts the various types of Instructions", false, true)
FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index 24cd343..a2f9862 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -8,179 +8,1267 @@
//===----------------------------------------------------------------------===//
//
// This file implements routines for folding instructions into simpler forms
-// that do not require creating new instructions. For example, this does
-// constant folding, and can handle identities like (X&0)->0.
+// that do not require creating new instructions. This does constant folding
+// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either
+// returning a constant ("and i32 %x, 0" -> "0") or an already existing value
+// ("and i32 %x, %x" -> "%x"). All operands are assumed to have already been
+// simplified: This is usually true and assuming it simplifies the logic (if
+// they have not been simplified then results are correct but maybe suboptimal).
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Support/ValueHandle.h"
-#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetData.h"
using namespace llvm;
using namespace llvm::PatternMatch;
+enum { RecursionLimit = 3 };
+
+STATISTIC(NumExpand, "Number of expansions");
+STATISTIC(NumFactor , "Number of factorizations");
+STATISTIC(NumReassoc, "Number of reassociations");
+
+static Value *SimplifyAndInst(Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
+ const DominatorTree *, unsigned);
+
+/// ValueDominatesPHI - Does the given value dominate the specified phi node?
+static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ // Arguments and constants dominate all instructions.
+ return true;
+
+ // If we have a DominatorTree then do a precise test.
+ if (DT)
+ return DT->dominates(I, P);
+
+ // Otherwise, if the instruction is in the entry block, and is not an invoke,
+ // then it obviously dominates all phi nodes.
+ if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&
+ !isa<InvokeInst>(I))
+ return true;
+
+ return false;
+}
+
+/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning
+/// it into "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is
+/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
+/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ unsigned OpcToExpand, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ // Check whether the expression has the form "(A op' B) op C".
+ if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+ if (Op0->getOpcode() == OpcodeToExpand) {
+ // It does! Try turning it into "(A op C) op' (B op C)".
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+ // Do "A op C" and "B op C" both simplify?
+ if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+ // They do! Return "L op' R" if it simplifies or is already available.
+ // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+ if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
+ && L == B && R == A)) {
+ ++NumExpand;
+ return LHS;
+ }
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+ MaxRecurse)) {
+ ++NumExpand;
+ return V;
+ }
+ }
+ }
+
+ // Check whether the expression has the form "A op (B op' C)".
+ if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+ if (Op1->getOpcode() == OpcodeToExpand) {
+ // It does! Try turning it into "(A op B) op' (A op C)".
+ Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+ // Do "A op B" and "A op C" both simplify?
+ if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) {
+ // They do! Return "L op' R" if it simplifies or is already available.
+ // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+ if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
+ && L == C && R == B)) {
+ ++NumExpand;
+ return RHS;
+ }
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+ MaxRecurse)) {
+ ++NumExpand;
+ return V;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term
+/// using the operation OpCodeToExtract. For example, when Opcode is Add and
+/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ unsigned OpcToExtract, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+ if (!Op0 || Op0->getOpcode() != OpcodeToExtract ||
+ !Op1 || Op1->getOpcode() != OpcodeToExtract)
+ return 0;
+
+ // The expression has the form "(A op' B) op (C op' D)".
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+ Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+
+ // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)".
+ // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+ // commutative case, "(A op' B) op (C op' A)"?
+ if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) {
+ Value *DD = A == C ? D : C;
+ // Form "A op' (B op DD)" if it simplifies completely.
+ // Does "B op DD" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) {
+ // It does! Return "A op' V" if it simplifies or is already available.
+ // If V equals B then "A op' V" is just the LHS. If V equals DD then
+ // "A op' V" is just the RHS.
+ if (V == B || V == DD) {
+ ++NumFactor;
+ return V == B ? LHS : RHS;
+ }
+ // Otherwise return "A op' V" if it simplifies.
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) {
+ ++NumFactor;
+ return W;
+ }
+ }
+ }
+
+ // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)".
+ // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+ // commutative case, "(A op' B) op (B op' D)"?
+ if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) {
+ Value *CC = B == D ? C : D;
+ // Form "(A op CC) op' B" if it simplifies completely..
+ // Does "A op CC" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) {
+ // It does! Return "V op' B" if it simplifies or is already available.
+ // If V equals A then "V op' B" is just the LHS. If V equals CC then
+ // "V op' B" is just the RHS.
+ if (V == A || V == CC) {
+ ++NumFactor;
+ return V == A ? LHS : RHS;
+ }
+ // Otherwise return "V op' B" if it simplifies.
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) {
+ ++NumFactor;
+ return W;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
+/// operations. Returns the simpler value, or null if none was found.
+static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
+ const TargetData *TD,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
+ assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
+
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+ // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = RHS;
+
+ // Does "B op C" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+ // It does! Return "A op V" if it simplifies or is already available.
+ // If V equals B then "A op V" is just the LHS.
+ if (V == B) return LHS;
+ // Otherwise return "A op V" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = LHS;
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "A op B" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) {
+ // It does! Return "V op C" if it simplifies or is already available.
+ // If V equals B then "V op C" is just the RHS.
+ if (V == B) return RHS;
+ // Otherwise return "V op C" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ // The remaining transforms require commutativity as well as associativity.
+ if (!Instruction::isCommutative(Opcode))
+ return 0;
+
+ // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = RHS;
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+ // It does! Return "V op B" if it simplifies or is already available.
+ // If V equals A then "V op B" is just the LHS.
+ if (V == A) return LHS;
+ // Otherwise return "V op B" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = LHS;
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+ // It does! Return "B op V" if it simplifies or is already available.
+ // If V equals C then "B op V" is just the RHS.
+ if (V == C) return RHS;
+ // Otherwise return "B op V" if it simplifies.
+ if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) {
+ ++NumReassoc;
+ return W;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/// ThreadBinOpOverSelect - In the case of a binary operation with a select
+/// instruction as an operand, try to simplify the binop by seeing whether
+/// evaluating it on both branches of the select results in the same value.
+/// Returns the common value if so, otherwise returns null.
+static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ SelectInst *SI;
+ if (isa<SelectInst>(LHS)) {
+ SI = cast<SelectInst>(LHS);
+ } else {
+ assert(isa<SelectInst>(RHS) && "No select instruction operand!");
+ SI = cast<SelectInst>(RHS);
+ }
+
+ // Evaluate the BinOp on the true and false branches of the select.
+ Value *TV;
+ Value *FV;
+ if (SI == LHS) {
+ TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse);
+ } else {
+ TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse);
+ }
+
+ // If they simplified to the same value, then return the common value.
+ // If they both failed to simplify then return null.
+ if (TV == FV)
+ return TV;
+
+ // If one branch simplified to undef, return the other one.
+ if (TV && isa<UndefValue>(TV))
+ return FV;
+ if (FV && isa<UndefValue>(FV))
+ return TV;
+
+ // If applying the operation did not change the true and false select values,
+ // then the result of the binop is the select itself.
+ if (TV == SI->getTrueValue() && FV == SI->getFalseValue())
+ return SI;
+
+ // If one branch simplified and the other did not, and the simplified
+ // value is equal to the unsimplified one, return the simplified value.
+ // For example, select (cond, X, X & Z) & Z -> X & Z.
+ if ((FV && !TV) || (TV && !FV)) {
+ // Check that the simplified value has the form "X op Y" where "op" is the
+ // same as the original operation.
+ Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV);
+ if (Simplified && Simplified->getOpcode() == Opcode) {
+ // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS".
+ // We already know that "op" is the same as for the simplified value. See
+ // if the operands match too. If so, return the simplified value.
+ Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue();
+ Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS;
+ Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch;
+ if (Simplified->getOperand(0) == UnsimplifiedLHS &&
+ Simplified->getOperand(1) == UnsimplifiedRHS)
+ return Simplified;
+ if (Simplified->isCommutative() &&
+ Simplified->getOperand(1) == UnsimplifiedLHS &&
+ Simplified->getOperand(0) == UnsimplifiedRHS)
+ return Simplified;
+ }
+ }
+
+ return 0;
+}
+
+/// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
+/// try to simplify the comparison by seeing whether both branches of the select
+/// result in the same value. Returns the common value if so, otherwise returns
+/// null.
+static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS, const TargetData *TD,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ // Make sure the select is on the LHS.
+ if (!isa<SelectInst>(LHS)) {
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+ assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!");
+ SelectInst *SI = cast<SelectInst>(LHS);
+
+ // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
+ // Does "cmp TV, RHS" simplify?
+ if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT,
+ MaxRecurse)) {
+ // It does! Does "cmp FV, RHS" simplify?
+ if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT,
+ MaxRecurse)) {
+ // It does! If they simplified to the same value, then use it as the
+ // result of the original comparison.
+ if (TCmp == FCmp)
+ return TCmp;
+ Value *Cond = SI->getCondition();
+ // If the false value simplified to false, then the result of the compare
+ // is equal to "Cond && TCmp". This also catches the case when the false
+ // value simplified to false and the true value to true, returning "Cond".
+ if (match(FCmp, m_Zero()))
+ if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse))
+ return V;
+ // If the true value simplified to true, then the result of the compare
+ // is equal to "Cond || FCmp".
+ if (match(TCmp, m_One()))
+ if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse))
+ return V;
+ // Finally, if the false value simplified to true and the true value to
+ // false, then the result of the compare is equal to "!Cond".
+ if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
+ if (Value *V =
+ SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
+ TD, DT, MaxRecurse))
+ return V;
+ }
+ }
+
+ return 0;
+}
+
+/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
+/// is a PHI instruction, try to simplify the binop by seeing whether evaluating
+/// it on the incoming phi values yields the same result for every value. If so
+/// returns the common value, otherwise returns null.
+static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ PHINode *PI;
+ if (isa<PHINode>(LHS)) {
+ PI = cast<PHINode>(LHS);
+ // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+ if (!ValueDominatesPHI(RHS, PI, DT))
+ return 0;
+ } else {
+ assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
+ PI = cast<PHINode>(RHS);
+ // Bail out if LHS and the phi may be mutually interdependent due to a loop.
+ if (!ValueDominatesPHI(LHS, PI, DT))
+ return 0;
+ }
+
+ // Evaluate the BinOp on the incoming phi values.
+ Value *CommonValue = 0;
+ for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PI->getIncomingValue(i);
+ // If the incoming value is the phi node itself, it can safely be skipped.
+ if (Incoming == PI) continue;
+ Value *V = PI == LHS ?
+ SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) :
+ SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse);
+ // If the operation failed to simplify, or simplified to a different value
+ // to previously, then give up.
+ if (!V || (CommonValue && V != CommonValue))
+ return 0;
+ CommonValue = V;
+ }
+
+ return CommonValue;
+}
+
+/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try
+/// try to simplify the comparison by seeing whether comparing with all of the
+/// incoming phi values yields the same result every time. If so returns the
+/// common result, otherwise returns null.
+static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return 0;
+
+ // Make sure the phi is on the LHS.
+ if (!isa<PHINode>(LHS)) {
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+ assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!");
+ PHINode *PI = cast<PHINode>(LHS);
+
+ // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+ if (!ValueDominatesPHI(RHS, PI, DT))
+ return 0;
+
+ // Evaluate the BinOp on the incoming phi values.
+ Value *CommonValue = 0;
+ for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PI->getIncomingValue(i);
+ // If the incoming value is the phi node itself, it can safely be skipped.
+ if (Incoming == PI) continue;
+ Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse);
+ // If the operation failed to simplify, or simplified to a different value
+ // to previously, then give up.
+ if (!V || (CommonValue && V != CommonValue))
+ return 0;
+ CommonValue = V;
+ }
+
+ return CommonValue;
+}
+
/// SimplifyAddInst - Given operands for an Add, see if we can
/// fold the result. If not, this returns null.
-Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD) {
+static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
Ops, 2, TD);
}
-
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X + undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // X + 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X + (Y - X) -> Y
+ // (Y - X) + X -> Y
+ // Eg: X + -X -> 0
+ Value *Y = 0;
+ if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) ||
+ match(Op0, m_Sub(m_Value(Y), m_Specific(Op1))))
+ return Y;
+
+ // X + ~X -> -1 since ~X = -X-1
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ /// i1 add -> xor.
+ if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ return V;
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // Mul distributes over Add. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // Threading Add over selects and phi nodes is pointless, so don't bother.
+ // Threading over the select in "A + select(cond, B, C)" means evaluating
+ // "A+B" and "A+C" and seeing if they are equal; but they are equal if and
+ // only if B and C are equal. If B and C are equal then (since we assume
+ // that operands have already been simplified) "select(cond, B, C)" should
+ // have been simplified to the common value of B and C already. Analysing
+ // "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly
+ // for threading over phi nodes.
+
+ return 0;
+}
+
+Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifySubInst - Given operands for a Sub, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0))
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // X - undef -> undef
+ // undef - X -> undef
+ if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ return UndefValue::get(Op0->getType());
+
+ // X - 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X - X -> 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // (X*2) - X -> X
+ // (X<<1) - X -> X
+ Value *X = 0;
+ if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) ||
+ match(Op0, m_Shl(m_Specific(Op1), m_One())))
+ return Op1;
+
+ // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
+ // For example, (X + Y) - Y -> X; (Y + X) - Y -> X
+ Value *Y = 0, *Z = Op1;
+ if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
+ // See if "V === Y - Z" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1))
+ // It does! Now see if "X + V" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ // See if "V === X - Z" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+ // It does! Now see if "Y + V" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ }
+
+ // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies.
+ // For example, X - (X + 1) -> -1
+ X = Op0;
+ if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
+ // See if "V === X - Y" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1))
+ // It does! Now see if "V - Z" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ // See if "V === X - Z" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+ // It does! Now see if "V - Y" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+ }
+
+ // Z - (X - Y) -> (Z - X) + Y if everything simplifies.
+ // For example, X - (X - Y) -> Y.
+ Z = Op0;
+ if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
+ // See if "V === Z - X" simplifies.
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1))
+ // It does! Now see if "V + Y" simplifies.
+ if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT,
+ MaxRecurse-1)) {
+ // It does, we successfully reassociated!
+ ++NumReassoc;
+ return W;
+ }
+
+ // Mul distributes over Sub. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // i1 sub -> xor.
+ if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ return V;
+
+ // Threading Sub over selects and phi nodes is pointless, so don't bother.
+ // Threading over the select in "A - select(cond, B, C)" means evaluating
+ // "A-B" and "A-C" and seeing if they are equal; but they are equal if and
+ // only if B and C are equal. If B and C are equal then (since we assume
+ // that operands have already been simplified) "select(cond, B, C)" should
+ // have been simplified to the common value of B and C already. Analysing
+ // "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly
+ // for threading over phi nodes.
+
+ return 0;
+}
+
+Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifyMulInst - Given operands for a Mul, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
// Canonicalize the constant to the RHS.
std::swap(Op0, Op1);
}
-
- if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
- // X + undef -> undef
- if (isa<UndefValue>(Op1C))
- return Op1C;
-
- // X + 0 --> X
- if (Op1C->isNullValue())
- return Op0;
- }
-
- // FIXME: Could pull several more out of instcombine.
+
+ // X * undef -> 0
+ if (match(Op1, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // X * 0 -> 0
+ if (match(Op1, m_Zero()))
+ return Op1;
+
+ // X * 1 -> X
+ if (match(Op1, m_One()))
+ return Op0;
+
+ // (X / Y) * Y -> X if the division is exact.
+ Value *X = 0, *Y = 0;
+ if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y
+ (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y)
+ BinaryOperator *Div = cast<BinaryOperator>(Y == Op1 ? Op0 : Op1);
+ if (Div->isExact())
+ return X;
+ }
+
+ // i1 mul -> and.
+ if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ return V;
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // Mul distributes over Add. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+ if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { C0, C1 };
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+ }
+ }
+
+ bool isSigned = Opcode == Instruction::SDiv;
+
+ // X / undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // undef / X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // 0 / X -> 0, we don't need to preserve faults!
+ if (match(Op0, m_Zero()))
+ return Op0;
+
+ // X / 1 -> X
+ if (match(Op1, m_One()))
+ return Op0;
+
+ if (Op0->getType()->isIntegerTy(1))
+ // It can't be division by zero, hence it must be division by one.
+ return Op0;
+
+ // X / X -> 1
+ if (Op0 == Op1)
+ return ConstantInt::get(Op0->getType(), 1);
+
+ // (X * Y) / Y -> X if the multiplication does not overflow.
+ Value *X = 0, *Y = 0;
+ if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
+ if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
+ BinaryOperator *Mul = cast<BinaryOperator>(Op0);
+ // If the Mul knows it does not overflow, then we are good to go.
+ if ((isSigned && Mul->hasNoSignedWrap()) ||
+ (!isSigned && Mul->hasNoUnsignedWrap()))
+ return X;
+ // If X has the form X = A / Y then X * Y cannot overflow.
+ if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X))
+ if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y)
+ return X;
+ }
+
+ // (X rem Y) / Y -> 0
+ if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
+ (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
+ return Constant::getNullValue(Op0->getType());
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+/// SimplifySDivInst - Given operands for an SDiv, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyUDivInst - Given operands for a UDiv, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
+ const DominatorTree *, unsigned) {
+ // undef / X -> undef (the undef could be a snan).
+ if (match(Op0, m_Undef()))
+ return Op0;
+
+ // X / undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ return 0;
+}
+
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+ if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { C0, C1 };
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+ }
+ }
+
+ // 0 shift by X -> 0
+ if (match(Op0, m_Zero()))
+ return Op0;
+
+ // X shift by 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // X shift by undef -> undef because it may shift by the bitwidth.
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // Shifting by the bitwidth or more is undefined.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1))
+ if (CI->getValue().getLimitedValue() >=
+ Op0->getType()->getScalarSizeInBits())
+ return UndefValue::get(Op0->getType());
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ return 0;
+}
+
+/// SimplifyShlInst - Given operands for an Shl, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // undef << X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // (X >> A) << A -> X
+ Value *X;
+ if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1))) &&
+ cast<PossiblyExactOperator>(Op0)->isExact())
+ return X;
+ return 0;
+}
+
+Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifyLShrInst - Given operands for an LShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // undef >>l X -> 0
+ if (match(Op0, m_Undef()))
+ return Constant::getNullValue(Op0->getType());
+
+ // (X << A) >> A -> X
+ Value *X;
+ if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+ cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap())
+ return X;
+
return 0;
}
+Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+}
+
+/// SimplifyAShrInst - Given operands for an AShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse))
+ return V;
+
+ // all ones >>a X -> all ones
+ if (match(Op0, m_AllOnes()))
+ return Op0;
+
+ // undef >>a X -> all ones
+ if (match(Op0, m_Undef()))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // (X << A) >> A -> X
+ Value *X;
+ if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+ cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+ return X;
+
+ return 0;
+}
+
+Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+}
+
/// SimplifyAndInst - Given operands for an And, see if we can
/// fold the result. If not, this returns null.
-Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD) {
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
Ops, 2, TD);
}
-
+
// Canonicalize the constant to the RHS.
std::swap(Op0, Op1);
}
-
+
// X & undef -> 0
- if (isa<UndefValue>(Op1))
+ if (match(Op1, m_Undef()))
return Constant::getNullValue(Op0->getType());
-
+
// X & X = X
if (Op0 == Op1)
return Op0;
-
- // X & <0,0> = <0,0>
- if (isa<ConstantAggregateZero>(Op1))
+
+ // X & 0 = 0
+ if (match(Op1, m_Zero()))
return Op1;
-
- // X & <-1,-1> = X
- if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1))
- if (CP->isAllOnesValue())
- return Op0;
-
- if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) {
- // X & 0 = 0
- if (Op1CI->isZero())
- return Op1CI;
- // X & -1 = X
- if (Op1CI->isAllOnesValue())
- return Op0;
- }
-
+
+ // X & -1 = X
+ if (match(Op1, m_AllOnes()))
+ return Op0;
+
// A & ~A = ~A & A = 0
- Value *A, *B;
- if ((match(Op0, m_Not(m_Value(A))) && A == Op1) ||
- (match(Op1, m_Not(m_Value(A))) && A == Op0))
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
return Constant::getNullValue(Op0->getType());
-
+
// (A | ?) & A = A
+ Value *A = 0, *B = 0;
if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
(A == Op1 || B == Op1))
return Op1;
-
+
// A & (A | ?) = A
if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
(A == Op0 || B == Op0))
return Op0;
-
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // And distributes over Or. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // And distributes over Xor. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // Or distributes over And. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
return 0;
}
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
/// SimplifyOrInst - Given operands for an Or, see if we can
/// fold the result. If not, this returns null.
-Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD) {
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
Ops, 2, TD);
}
-
+
// Canonicalize the constant to the RHS.
std::swap(Op0, Op1);
}
-
+
// X | undef -> -1
- if (isa<UndefValue>(Op1))
+ if (match(Op1, m_Undef()))
return Constant::getAllOnesValue(Op0->getType());
-
+
// X | X = X
if (Op0 == Op1)
return Op0;
- // X | <0,0> = X
- if (isa<ConstantAggregateZero>(Op1))
+ // X | 0 = X
+ if (match(Op1, m_Zero()))
return Op0;
-
- // X | <-1,-1> = <-1,-1>
- if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1))
- if (CP->isAllOnesValue())
- return Op1;
-
- if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) {
- // X | 0 = X
- if (Op1CI->isZero())
- return Op0;
- // X | -1 = -1
- if (Op1CI->isAllOnesValue())
- return Op1CI;
- }
-
+
+ // X | -1 = -1
+ if (match(Op1, m_AllOnes()))
+ return Op1;
+
// A | ~A = ~A | A = -1
- Value *A, *B;
- if ((match(Op0, m_Not(m_Value(A))) && A == Op1) ||
- (match(Op1, m_Not(m_Value(A))) && A == Op0))
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
return Constant::getAllOnesValue(Op0->getType());
-
+
// (A & ?) | A = A
+ Value *A = 0, *B = 0;
if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
(A == Op1 || B == Op1))
return Op1;
-
+
// A | (A & ?) = A
if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
(A == Op0 || B == Op0))
return Op0;
-
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // Or distributes over And. Try some generic simplifications based on this.
+ if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // And distributes over Or. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
return 0;
}
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyXorInst - Given operands for a Xor, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // A ^ undef -> undef
+ if (match(Op1, m_Undef()))
+ return Op1;
+
+ // A ^ 0 = A
+ if (match(Op1, m_Zero()))
+ return Op0;
+
+ // A ^ A = 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // A ^ ~A = ~A ^ A = -1
+ if (match(Op0, m_Not(m_Specific(Op1))) ||
+ match(Op1, m_Not(m_Specific(Op0))))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // Try some generic simplifications for associative operations.
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // And distributes over Xor. Try some generic simplifications based on this.
+ if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
+ TD, DT, MaxRecurse))
+ return V;
+
+ // Threading Xor over selects and phi nodes is pointless, so don't bother.
+ // Threading over the select in "A ^ select(cond, B, C)" means evaluating
+ // "A^B" and "A^C" and seeing if they are equal; but they are equal if and
+ // only if B and C are equal. If B and C are equal then (since we assume
+ // that operands have already been simplified) "select(cond, B, C)" should
+ // have been simplified to the common value of B and C already. Analysing
+ // "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly
+ // for threading over phi nodes.
+
+ return 0;
+}
+
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const DominatorTree *DT) {
+ return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit);
+}
static const Type *GetCompareTy(Value *Op) {
return CmpInst::makeCmpResultType(Op->getType());
}
-
/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
/// fold the result. If not, this returns null.
-Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD) {
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
-
+
if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
@@ -189,70 +1277,400 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
std::swap(LHS, RHS);
Pred = CmpInst::getSwappedPredicate(Pred);
}
-
- // ITy - This is the return type of the compare we're considering.
- const Type *ITy = GetCompareTy(LHS);
-
+
+ const Type *ITy = GetCompareTy(LHS); // The return type.
+ const Type *OpTy = LHS->getType(); // The operand type.
+
// icmp X, X -> true/false
// X icmp undef -> true/false. For example, icmp ugt %X, undef -> false
// because X could be 0.
if (LHS == RHS || isa<UndefValue>(RHS))
return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
-
- // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
- // addresses never equal each other! We already know that Op0 != Op1.
- if ((isa<GlobalValue>(LHS) || isa<AllocaInst>(LHS) ||
- isa<ConstantPointerNull>(LHS)) &&
- (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) ||
- isa<ConstantPointerNull>(RHS)))
- return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
-
- // See if we are doing a comparison with a constant.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- // If we have an icmp le or icmp ge instruction, turn it into the
- // appropriate icmp lt or icmp gt instruction. This allows us to rely on
- // them being folded in the code below.
+
+ // Special case logic when the operands have i1 type.
+ if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() &&
+ cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) {
switch (Pred) {
default: break;
+ case ICmpInst::ICMP_EQ:
+ // X == 1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_NE:
+ // X != 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // X >u 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_UGE:
+ // X >=u 1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_SLT:
+ // X <s 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_SLE:
+ // X <=s -1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ }
+ }
+
+ // icmp <alloca*>, <global/alloca*/null> - Different stack variables have
+ // different addresses, and what's more the address of a stack variable is
+ // never null or equal to the address of a global. Note that generalizing
+ // to the case where LHS is a global variable address or null is pointless,
+ // since if both LHS and RHS are constants then we already constant folded
+ // the compare, and if only one of them is then we moved it to RHS already.
+ if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) ||
+ isa<ConstantPointerNull>(RHS)))
+ // We already know that LHS != LHS.
+ return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+
+ // If we are comparing with zero then try hard since this is a common case.
+ if (match(RHS, m_Zero())) {
+ bool LHSKnownNonNegative, LHSKnownNegative;
+ switch (Pred) {
+ default:
+ assert(false && "Unknown ICmp predicate!");
+ case ICmpInst::ICMP_ULT:
+ return ConstantInt::getFalse(LHS->getContext());
+ case ICmpInst::ICMP_UGE:
+ return ConstantInt::getTrue(LHS->getContext());
+ case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULE:
- if (CI->isMaxValue(false)) // A <=u MAX -> TRUE
- return ConstantInt::getTrue(CI->getContext());
+ if (isKnownNonZero(LHS, TD))
+ return ConstantInt::getFalse(LHS->getContext());
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGT:
+ if (isKnownNonZero(LHS, TD))
+ return ConstantInt::getTrue(LHS->getContext());
+ break;
+ case ICmpInst::ICMP_SLT:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ if (LHSKnownNegative)
+ return ConstantInt::getTrue(LHS->getContext());
+ if (LHSKnownNonNegative)
+ return ConstantInt::getFalse(LHS->getContext());
break;
case ICmpInst::ICMP_SLE:
- if (CI->isMaxValue(true)) // A <=s MAX -> TRUE
- return ConstantInt::getTrue(CI->getContext());
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ if (LHSKnownNegative)
+ return ConstantInt::getTrue(LHS->getContext());
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+ return ConstantInt::getFalse(LHS->getContext());
+ break;
+ case ICmpInst::ICMP_SGE:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ if (LHSKnownNegative)
+ return ConstantInt::getFalse(LHS->getContext());
+ if (LHSKnownNonNegative)
+ return ConstantInt::getTrue(LHS->getContext());
+ break;
+ case ICmpInst::ICMP_SGT:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+ if (LHSKnownNegative)
+ return ConstantInt::getFalse(LHS->getContext());
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+ return ConstantInt::getTrue(LHS->getContext());
+ break;
+ }
+ }
+
+ // See if we are doing a comparison with a constant integer.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_UGT:
+ if (CI->isMaxValue(false)) // A >u MAX -> FALSE
+ return ConstantInt::getFalse(CI->getContext());
break;
case ICmpInst::ICMP_UGE:
if (CI->isMinValue(false)) // A >=u MIN -> TRUE
return ConstantInt::getTrue(CI->getContext());
break;
+ case ICmpInst::ICMP_ULT:
+ if (CI->isMinValue(false)) // A <u MIN -> FALSE
+ return ConstantInt::getFalse(CI->getContext());
+ break;
+ case ICmpInst::ICMP_ULE:
+ if (CI->isMaxValue(false)) // A <=u MAX -> TRUE
+ return ConstantInt::getTrue(CI->getContext());
+ break;
+ case ICmpInst::ICMP_SGT:
+ if (CI->isMaxValue(true)) // A >s MAX -> FALSE
+ return ConstantInt::getFalse(CI->getContext());
+ break;
case ICmpInst::ICMP_SGE:
if (CI->isMinValue(true)) // A >=s MIN -> TRUE
return ConstantInt::getTrue(CI->getContext());
break;
+ case ICmpInst::ICMP_SLT:
+ if (CI->isMinValue(true)) // A <s MIN -> FALSE
+ return ConstantInt::getFalse(CI->getContext());
+ break;
+ case ICmpInst::ICMP_SLE:
+ if (CI->isMaxValue(true)) // A <=s MAX -> TRUE
+ return ConstantInt::getTrue(CI->getContext());
+ break;
+ }
+ }
+
+ // Compare of cast, for example (zext X) != 0 -> X != 0
+ if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
+ Instruction *LI = cast<CastInst>(LHS);
+ Value *SrcOp = LI->getOperand(0);
+ const Type *SrcTy = SrcOp->getType();
+ const Type *DstTy = LI->getType();
+
+ // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
+ // if the integer type is the same size as the pointer type.
+ if (MaxRecurse && TD && isa<PtrToIntInst>(LI) &&
+ TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ // Transfer the cast to the constant.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+ ConstantExpr::getIntToPtr(RHSC, SrcTy),
+ TD, DT, MaxRecurse-1))
+ return V;
+ } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
+ if (RI->getOperand(0)->getType() == SrcTy)
+ // Compare without the cast.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+ TD, DT, MaxRecurse-1))
+ return V;
+ }
+ }
+
+ if (isa<ZExtInst>(LHS)) {
+ // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
+ // same type.
+ if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
+ if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+ // Compare X and Y. Note that signed predicates become unsigned.
+ if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+ SrcOp, RI->getOperand(0), TD, DT,
+ MaxRecurse-1))
+ return V;
+ }
+ // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
+ // too. If not, then try to deduce the result of the comparison.
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DstTy.
+ Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
+
+ // If the re-extended constant didn't change then this is effectively
+ // also a case of comparing two zero-extended values.
+ if (RExt == CI && MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+ SrcOp, Trunc, TD, DT, MaxRecurse-1))
+ return V;
+
+ // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
+ // there. Use this to work out the result of the comparison.
+ if (RExt != CI) {
+ switch (Pred) {
+ default:
+ assert(false && "Unknown ICmp predicate!");
+ // LHS <u RHS.
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ return ConstantInt::getFalse(CI->getContext());
+
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ return ConstantInt::getTrue(CI->getContext());
+
+ // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS
+ // is non-negative then LHS <s RHS.
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getTrue(CI->getContext()) :
+ ConstantInt::getFalse(CI->getContext());
+
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getFalse(CI->getContext()) :
+ ConstantInt::getTrue(CI->getContext());
+ }
+ }
+ }
+ }
+
+ if (isa<SExtInst>(LHS)) {
+ // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+ // same type.
+ if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
+ if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+ // Compare X and Y. Note that the predicate does not change.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+ TD, DT, MaxRecurse-1))
+ return V;
+ }
+ // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
+ // too. If not, then try to deduce the result of the comparison.
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DstTy.
+ Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
+
+ // If the re-extended constant didn't change then this is effectively
+ // also a case of comparing two sign-extended values.
+ if (RExt == CI && MaxRecurse)
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT,
+ MaxRecurse-1))
+ return V;
+
+ // Otherwise the upper bits of LHS are all equal, while RHS has varying
+ // bits there. Use this to work out the result of the comparison.
+ if (RExt != CI) {
+ switch (Pred) {
+ default:
+ assert(false && "Unknown ICmp predicate!");
+ case ICmpInst::ICMP_EQ:
+ return ConstantInt::getFalse(CI->getContext());
+ case ICmpInst::ICMP_NE:
+ return ConstantInt::getTrue(CI->getContext());
+
+ // If RHS is non-negative then LHS <s RHS. If RHS is negative then
+ // LHS >s RHS.
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getTrue(CI->getContext()) :
+ ConstantInt::getFalse(CI->getContext());
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getFalse(CI->getContext()) :
+ ConstantInt::getTrue(CI->getContext());
+
+ // If LHS is non-negative then LHS <u RHS. If LHS is negative then
+ // LHS >u RHS.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ // Comparison is true iff the LHS <s 0.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+ Constant::getNullValue(SrcTy),
+ TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ // Comparison is true iff the LHS >=s 0.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+ Constant::getNullValue(SrcTy),
+ TD, DT, MaxRecurse-1))
+ return V;
+ break;
+ }
+ }
+ }
}
}
-
-
+
+ // Special logic for binary operators.
+ BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
+ if (MaxRecurse && (LBO || RBO)) {
+ // Analyze the case when either LHS or RHS is an add instruction.
+ Value *A = 0, *B = 0, *C = 0, *D = 0;
+ // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
+ bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
+ if (LBO && LBO->getOpcode() == Instruction::Add) {
+ A = LBO->getOperand(0); B = LBO->getOperand(1);
+ NoLHSWrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+ }
+ if (RBO && RBO->getOpcode() == Instruction::Add) {
+ C = RBO->getOperand(0); D = RBO->getOperand(1);
+ NoRHSWrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+ }
+
+ // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+ if ((A == RHS || B == RHS) && NoLHSWrapProblem)
+ if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
+ Constant::getNullValue(RHS->getType()),
+ TD, DT, MaxRecurse-1))
+ return V;
+
+ // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+ if ((C == LHS || D == LHS) && NoRHSWrapProblem)
+ if (Value *V = SimplifyICmpInst(Pred,
+ Constant::getNullValue(LHS->getType()),
+ C == LHS ? D : C, TD, DT, MaxRecurse-1))
+ return V;
+
+ // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
+ if (A && C && (A == C || A == D || B == C || B == D) &&
+ NoLHSWrapProblem && NoRHSWrapProblem) {
+ // Determine Y and Z in the form icmp (X+Y), (X+Z).
+ Value *Y = (A == C || A == D) ? B : A;
+ Value *Z = (C == A || C == B) ? D : C;
+ if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1))
+ return V;
+ }
+ }
+
+ // If the comparison is with the result of a select instruction, check whether
+ // comparing with either branch of the select always yields the same value.
+ if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
+ // If the comparison is with the result of a phi instruction, check whether
+ // doing the compare with each incoming phi value yields a common result.
+ if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
return 0;
}
+Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
/// fold the result. If not, this returns null.
-Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD) {
+static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
-
+
// If we have a constant, make sure it is on the RHS.
std::swap(LHS, RHS);
Pred = CmpInst::getSwappedPredicate(Pred);
}
-
+
// Fold trivial predicates.
if (Pred == FCmpInst::FCMP_FALSE)
return ConstantInt::get(GetCompareTy(LHS), 0);
@@ -269,7 +1687,7 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (CmpInst::isFalseWhenEqual(Pred))
return ConstantInt::get(GetCompareTy(LHS), 0);
}
-
+
// Handle fcmp with constant RHS
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
// If the constant is a nan, see if we can fold the comparison based on it.
@@ -310,23 +1728,40 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
}
-
+
+ // If the comparison is with the result of a select instruction, check whether
+ // comparing with either branch of the select always yields the same value.
+ if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
+ // If the comparison is with the result of a phi instruction, check whether
+ // doing the compare with each incoming phi value yields a common result.
+ if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
return 0;
}
+Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
/// the result. If not, this returns null.
Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
- const TargetData *TD) {
+ const TargetData *TD, const DominatorTree *) {
// select true, X, Y -> X
// select false, X, Y -> Y
if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
return CB->getZExtValue() ? TrueVal : FalseVal;
-
+
// select C, X, X -> X
if (TrueVal == FalseVal)
return TrueVal;
-
+
if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X
return FalseVal;
if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X
@@ -336,98 +1771,249 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
return TrueVal;
return FalseVal;
}
-
-
-
+
return 0;
}
-
/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
/// fold the result. If not, this returns null.
Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps,
- const TargetData *TD) {
+ const TargetData *TD, const DominatorTree *) {
+ // The type of the GEP pointer operand.
+ const PointerType *PtrTy = cast<PointerType>(Ops[0]->getType());
+
// getelementptr P -> P.
if (NumOps == 1)
return Ops[0];
- // TODO.
- //if (isa<UndefValue>(Ops[0]))
- // return UndefValue::get(GEP.getType());
+ if (isa<UndefValue>(Ops[0])) {
+ // Compute the (pointer) type returned by the GEP instruction.
+ const Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, &Ops[1],
+ NumOps-1);
+ const Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
+ return UndefValue::get(GEPTy);
+ }
- // getelementptr P, 0 -> P.
- if (NumOps == 2)
+ if (NumOps == 2) {
+ // getelementptr P, 0 -> P.
if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1]))
if (C->isZero())
return Ops[0];
-
+ // getelementptr P, N -> P if P points to a type of zero size.
+ if (TD) {
+ const Type *Ty = PtrTy->getElementType();
+ if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0)
+ return Ops[0];
+ }
+ }
+
// Check to see if this is constant foldable.
for (unsigned i = 0; i != NumOps; ++i)
if (!isa<Constant>(Ops[i]))
return 0;
-
+
return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]),
(Constant *const*)Ops+1, NumOps-1);
}
+/// SimplifyPHINode - See if we can fold the given phi. If not, returns null.
+static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
+ // If all of the PHI's incoming values are the same then replace the PHI node
+ // with the common value.
+ Value *CommonValue = 0;
+ bool HasUndefInput = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming = PN->getIncomingValue(i);
+ // If the incoming value is the phi node itself, it can safely be skipped.
+ if (Incoming == PN) continue;
+ if (isa<UndefValue>(Incoming)) {
+ // Remember that we saw an undef value, but otherwise ignore them.
+ HasUndefInput = true;
+ continue;
+ }
+ if (CommonValue && Incoming != CommonValue)
+ return 0; // Not the same, bail out.
+ CommonValue = Incoming;
+ }
+
+ // If CommonValue is null then all of the incoming values were either undef or
+ // equal to the phi node itself.
+ if (!CommonValue)
+ return UndefValue::get(PN->getType());
+
+ // If we have a PHI node like phi(X, undef, X), where X is defined by some
+ // instruction, we cannot return X as the result of the PHI node unless it
+ // dominates the PHI block.
+ if (HasUndefInput)
+ return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0;
+
+ return CommonValue;
+}
+
//=== Helper functions for higher up the class hierarchy.
/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
/// fold the result. If not, this returns null.
-Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const TargetData *TD) {
+static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
switch (Opcode) {
- case Instruction::And: return SimplifyAndInst(LHS, RHS, TD);
- case Instruction::Or: return SimplifyOrInst(LHS, RHS, TD);
+ case Instruction::Add:
+ return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+ TD, DT, MaxRecurse);
+ case Instruction::Sub:
+ return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+ TD, DT, MaxRecurse);
+ case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::Shl:
+ return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+ TD, DT, MaxRecurse);
+ case Instruction::LShr:
+ return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+ case Instruction::AShr:
+ return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+ case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse);
+ case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse);
default:
if (Constant *CLHS = dyn_cast<Constant>(LHS))
if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
Constant *COps[] = {CLHS, CRHS};
return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD);
}
+
+ // If the operation is associative, try some generic simplifications.
+ if (Instruction::isAssociative(Opcode))
+ if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT,
+ MaxRecurse))
+ return V;
+
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse))
+ return V;
+
return 0;
}
}
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit);
+}
+
/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
/// fold the result.
-Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD) {
+static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT,
+ unsigned MaxRecurse) {
if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
- return SimplifyICmpInst(Predicate, LHS, RHS, TD);
- return SimplifyFCmpInst(Predicate, LHS, RHS, TD);
+ return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+ return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
}
+Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD, const DominatorTree *DT) {
+ return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
/// SimplifyInstruction - See if we can compute a simplified version of this
/// instruction. If not, this returns null.
-Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) {
+Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
+ const DominatorTree *DT) {
+ Value *Result;
+
switch (I->getOpcode()) {
default:
- return ConstantFoldInstruction(I, TD);
+ Result = ConstantFoldInstruction(I, TD);
+ break;
case Instruction::Add:
- return SimplifyAddInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(), TD);
+ Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->hasNoSignedWrap(),
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+ TD, DT);
+ break;
+ case Instruction::Sub:
+ Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->hasNoSignedWrap(),
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+ TD, DT);
+ break;
+ case Instruction::Mul:
+ Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::SDiv:
+ Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::UDiv:
+ Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::FDiv:
+ Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::Shl:
+ Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->hasNoSignedWrap(),
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+ TD, DT);
+ break;
+ case Instruction::LShr:
+ Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->isExact(),
+ TD, DT);
+ break;
+ case Instruction::AShr:
+ Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
+ cast<BinaryOperator>(I)->isExact(),
+ TD, DT);
+ break;
case Instruction::And:
- return SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD);
+ Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
case Instruction::Or:
- return SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD);
+ Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
+ case Instruction::Xor:
+ Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
case Instruction::ICmp:
- return SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1), TD);
+ Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
case Instruction::FCmp:
- return SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1), TD);
+ Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1), TD, DT);
+ break;
case Instruction::Select:
- return SimplifySelectInst(I->getOperand(0), I->getOperand(1),
- I->getOperand(2), TD);
+ Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
+ I->getOperand(2), TD, DT);
+ break;
case Instruction::GetElementPtr: {
SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
- return SimplifyGEPInst(&Ops[0], Ops.size(), TD);
+ Result = SimplifyGEPInst(&Ops[0], Ops.size(), TD, DT);
+ break;
}
+ case Instruction::PHI:
+ Result = SimplifyPHINode(cast<PHINode>(I), DT);
+ break;
}
+
+ /// If called on unreachable code, the above logic may report that the
+ /// instruction simplified to itself. Make life easier for users by
+ /// detecting that case here, returning a safe value instead.
+ return Result == I ? UndefValue::get(I->getType()) : Result;
}
/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
@@ -437,15 +2023,16 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) {
/// simplifies and deletes scalar operations, it does not change the CFG.
///
void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
- const TargetData *TD) {
+ const TargetData *TD,
+ const DominatorTree *DT) {
assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
-
+
// FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that
// we can know if it gets deleted out from under us or replaced in a
// recursive simplification.
WeakVH FromHandle(From);
WeakVH ToHandle(To);
-
+
while (!From->use_empty()) {
// Update the instruction to use the new value.
Use &TheUse = From->use_begin().getUse();
@@ -460,27 +2047,26 @@ void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
// Sanity check to make sure 'User' doesn't dangle across
// SimplifyInstruction.
AssertingVH<> UserHandle(User);
-
- SimplifiedVal = SimplifyInstruction(User, TD);
+
+ SimplifiedVal = SimplifyInstruction(User, TD, DT);
if (SimplifiedVal == 0) continue;
}
-
+
// Recursively simplify this user to the new value.
- ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD);
+ ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT);
From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
To = ToHandle;
-
+
assert(ToHandle && "To value deleted by recursive simplification?");
-
+
// If the recursive simplification ended up revisiting and deleting
// 'From' then we're done.
if (From == 0)
return;
}
-
+
// If 'From' has value handles referring to it, do a real RAUW to update them.
From->replaceAllUsesWith(To);
-
+
From->eraseFromParent();
}
-
diff --git a/contrib/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
index 1c9e148..2e259b1 100644
--- a/contrib/llvm/lib/Analysis/IntervalPartition.cpp
+++ b/contrib/llvm/lib/Analysis/IntervalPartition.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
char IntervalPartition::ID = 0;
INITIALIZE_PASS(IntervalPartition, "intervals",
- "Interval Partition Construction", true, true);
+ "Interval Partition Construction", true, true)
//===----------------------------------------------------------------------===//
// IntervalPartition Implementation
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index e32dbc4..9e7da6c 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -14,8 +14,10 @@
#define DEBUG_TYPE "lazy-value-info"
#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/CFG.h"
@@ -26,11 +28,14 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include <map>
+#include <set>
+#include <stack>
using namespace llvm;
char LazyValueInfo::ID = 0;
INITIALIZE_PASS(LazyValueInfo, "lazy-value-info",
- "Lazy Value Information Analysis", false, true);
+ "Lazy Value Information Analysis", false, true)
namespace llvm {
FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); }
@@ -50,18 +55,18 @@ namespace llvm {
namespace {
class LVILatticeVal {
enum LatticeValueTy {
- /// undefined - This LLVM Value has no known value yet.
+ /// undefined - This Value has no known value yet.
undefined,
- /// constant - This LLVM Value has a specific constant value.
+ /// constant - This Value has a specific constant value.
constant,
- /// notconstant - This LLVM value is known to not have the specified value.
+ /// notconstant - This Value is known to not have the specified value.
notconstant,
- /// constantrange
+ /// constantrange - The Value falls within this range.
constantrange,
- /// overdefined - This instruction is not known to be constant, and we know
+ /// overdefined - This value is not known to be constant, and we know that
/// it has a value.
overdefined
};
@@ -77,17 +82,13 @@ public:
static LVILatticeVal get(Constant *C) {
LVILatticeVal Res;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
- Res.markConstantRange(ConstantRange(CI->getValue(), CI->getValue()+1));
- else if (!isa<UndefValue>(C))
+ if (!isa<UndefValue>(C))
Res.markConstant(C);
return Res;
}
static LVILatticeVal getNot(Constant *C) {
LVILatticeVal Res;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
- Res.markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
- else
+ if (!isa<UndefValue>(C))
Res.markNotConstant(C);
return Res;
}
@@ -129,32 +130,34 @@ public:
/// markConstant - Return true if this is a change in status.
bool markConstant(Constant *V) {
- if (isConstant()) {
- assert(getConstant() == V && "Marking constant with different value");
+ assert(V && "Marking constant with NULL");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return markConstantRange(ConstantRange(CI->getValue()));
+ if (isa<UndefValue>(V))
return false;
- }
-
+
+ assert((!isConstant() || getConstant() == V) &&
+ "Marking constant with different value");
assert(isUndefined());
Tag = constant;
- assert(V && "Marking constant with NULL");
Val = V;
return true;
}
/// markNotConstant - Return true if this is a change in status.
bool markNotConstant(Constant *V) {
- if (isNotConstant()) {
- assert(getNotConstant() == V && "Marking !constant with different value");
+ assert(V && "Marking constant with NULL");
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+ if (isa<UndefValue>(V))
return false;
- }
-
- if (isConstant())
- assert(getConstant() != V && "Marking not constant with different value");
- else
- assert(isUndefined());
+ assert((!isConstant() || getConstant() != V) &&
+ "Marking constant !constant with same value");
+ assert((!isNotConstant() || getNotConstant() == V) &&
+ "Marking !constant with different value");
+ assert(isUndefined() || isConstant());
Tag = notconstant;
- assert(V && "Marking constant with NULL");
Val = V;
return true;
}
@@ -185,63 +188,81 @@ public:
if (RHS.isUndefined() || isOverdefined()) return false;
if (RHS.isOverdefined()) return markOverdefined();
- if (RHS.isNotConstant()) {
- if (isNotConstant()) {
- if (getNotConstant() != RHS.getNotConstant() ||
- isa<ConstantExpr>(getNotConstant()) ||
- isa<ConstantExpr>(RHS.getNotConstant()))
- return markOverdefined();
- return false;
- } else if (isConstant()) {
- if (getConstant() == RHS.getNotConstant() ||
- isa<ConstantExpr>(RHS.getNotConstant()) ||
- isa<ConstantExpr>(getConstant()))
+ if (isUndefined()) {
+ Tag = RHS.Tag;
+ Val = RHS.Val;
+ Range = RHS.Range;
+ return true;
+ }
+
+ if (isConstant()) {
+ if (RHS.isConstant()) {
+ if (Val == RHS.Val)
+ return false;
+ return markOverdefined();
+ }
+
+ if (RHS.isNotConstant()) {
+ if (Val == RHS.Val)
return markOverdefined();
- return markNotConstant(RHS.getNotConstant());
- } else if (isConstantRange()) {
+
+ // Unless we can prove that the two Constants are different, we must
+ // move to overdefined.
+ // FIXME: use TargetData for smarter constant folding.
+ if (ConstantInt *Res = dyn_cast<ConstantInt>(
+ ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+ getConstant(),
+ RHS.getNotConstant())))
+ if (Res->isOne())
+ return markNotConstant(RHS.getNotConstant());
+
return markOverdefined();
}
-
- assert(isUndefined() && "Unexpected lattice");
- return markNotConstant(RHS.getNotConstant());
+
+ // RHS is a ConstantRange, LHS is a non-integer Constant.
+
+ // FIXME: consider the case where RHS is a range [1, 0) and LHS is
+ // a function. The correct result is to pick up RHS.
+
+ return markOverdefined();
}
-
- if (RHS.isConstantRange()) {
- if (isConstantRange()) {
- ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
- if (NewR.isFullSet())
+
+ if (isNotConstant()) {
+ if (RHS.isConstant()) {
+ if (Val == RHS.Val)
return markOverdefined();
- else
- return markConstantRange(NewR);
- } else if (!isUndefined()) {
+
+ // Unless we can prove that the two Constants are different, we must
+ // move to overdefined.
+ // FIXME: use TargetData for smarter constant folding.
+ if (ConstantInt *Res = dyn_cast<ConstantInt>(
+ ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+ getNotConstant(),
+ RHS.getConstant())))
+ if (Res->isOne())
+ return false;
+
return markOverdefined();
}
-
- assert(isUndefined() && "Unexpected lattice");
- return markConstantRange(RHS.getConstantRange());
- }
-
- // RHS must be a constant, we must be undef, constant, or notconstant.
- assert(!isConstantRange() &&
- "Constant and ConstantRange cannot be merged.");
-
- if (isUndefined())
- return markConstant(RHS.getConstant());
-
- if (isConstant()) {
- if (getConstant() != RHS.getConstant())
+
+ if (RHS.isNotConstant()) {
+ if (Val == RHS.Val)
+ return false;
return markOverdefined();
- return false;
+ }
+
+ return markOverdefined();
}
- // If we are known "!=4" and RHS is "==5", stay at "!=4".
- if (getNotConstant() == RHS.getConstant() ||
- isa<ConstantExpr>(getNotConstant()) ||
- isa<ConstantExpr>(RHS.getConstant()))
+ assert(isConstantRange() && "New LVILattice type?");
+ if (!RHS.isConstantRange())
return markOverdefined();
- return false;
+
+ ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
+ if (NewR.isFullSet())
+ return markOverdefined();
+ return markConstantRange(NewR);
}
-
};
} // end anonymous namespace.
@@ -267,49 +288,136 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
//===----------------------------------------------------------------------===//
namespace {
+ /// LVIValueHandle - A callback value handle update the cache when
+ /// values are erased.
+ class LazyValueInfoCache;
+ struct LVIValueHandle : public CallbackVH {
+ LazyValueInfoCache *Parent;
+
+ LVIValueHandle(Value *V, LazyValueInfoCache *P)
+ : CallbackVH(V), Parent(P) { }
+
+ void deleted();
+ void allUsesReplacedWith(Value *V) {
+ deleted();
+ }
+ };
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<LVIValueHandle> {
+ typedef DenseMapInfo<Value*> PointerInfo;
+ static inline LVIValueHandle getEmptyKey() {
+ return LVIValueHandle(PointerInfo::getEmptyKey(),
+ static_cast<LazyValueInfoCache*>(0));
+ }
+ static inline LVIValueHandle getTombstoneKey() {
+ return LVIValueHandle(PointerInfo::getTombstoneKey(),
+ static_cast<LazyValueInfoCache*>(0));
+ }
+ static unsigned getHashValue(const LVIValueHandle &Val) {
+ return PointerInfo::getHashValue(Val);
+ }
+ static bool isEqual(const LVIValueHandle &LHS, const LVIValueHandle &RHS) {
+ return LHS == RHS;
+ }
+ };
+
+ template<>
+ struct DenseMapInfo<std::pair<AssertingVH<BasicBlock>, Value*> > {
+ typedef std::pair<AssertingVH<BasicBlock>, Value*> PairTy;
+ typedef DenseMapInfo<AssertingVH<BasicBlock> > APointerInfo;
+ typedef DenseMapInfo<Value*> BPointerInfo;
+ static inline PairTy getEmptyKey() {
+ return std::make_pair(APointerInfo::getEmptyKey(),
+ BPointerInfo::getEmptyKey());
+ }
+ static inline PairTy getTombstoneKey() {
+ return std::make_pair(APointerInfo::getTombstoneKey(),
+ BPointerInfo::getTombstoneKey());
+ }
+ static unsigned getHashValue( const PairTy &Val) {
+ return APointerInfo::getHashValue(Val.first) ^
+ BPointerInfo::getHashValue(Val.second);
+ }
+ static bool isEqual(const PairTy &LHS, const PairTy &RHS) {
+ return APointerInfo::isEqual(LHS.first, RHS.first) &&
+ BPointerInfo::isEqual(LHS.second, RHS.second);
+ }
+ };
+}
+
+namespace {
/// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
/// maintains information about queries across the clients' queries.
class LazyValueInfoCache {
- public:
- /// BlockCacheEntryTy - This is a computed lattice value at the end of the
- /// specified basic block for a Value* that depends on context.
- typedef std::pair<AssertingVH<BasicBlock>, LVILatticeVal> BlockCacheEntryTy;
-
/// ValueCacheEntryTy - This is all of the cached block information for
/// exactly one Value*. The entries are sorted by the BasicBlock* of the
/// entries, allowing us to do a lookup with a binary search.
typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
- private:
- /// LVIValueHandle - A callback value handle update the cache when
- /// values are erased.
- struct LVIValueHandle : public CallbackVH {
+ /// ValueCache - This is all of the cached information for all values,
+ /// mapped from Value* to key information.
+ DenseMap<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+
+ /// OverDefinedCache - This tracks, on a per-block basis, the set of
+ /// values that are over-defined at the end of that block. This is required
+ /// for cache updating.
+ typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+ DenseSet<OverDefinedPairTy> OverDefinedCache;
+
+ /// BlockValueStack - This stack holds the state of the value solver
+ /// during a query. It basically emulates the callstack of the naive
+ /// recursive value lookup process.
+ std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+
+ friend struct LVIValueHandle;
+
+ /// OverDefinedCacheUpdater - A helper object that ensures that the
+ /// OverDefinedCache is updated whenever solveBlockValue returns.
+ struct OverDefinedCacheUpdater {
LazyValueInfoCache *Parent;
+ Value *Val;
+ BasicBlock *BB;
+ LVILatticeVal &BBLV;
- LVIValueHandle(Value *V, LazyValueInfoCache *P)
- : CallbackVH(V), Parent(P) { }
+ OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV,
+ LazyValueInfoCache *P)
+ : Parent(P), Val(V), BB(B), BBLV(LV) { }
- void deleted();
- void allUsesReplacedWith(Value* V) {
- deleted();
- }
-
- LVIValueHandle &operator=(Value *V) {
- return *this = LVIValueHandle(V, Parent);
+ bool markResult(bool changed) {
+ if (changed && BBLV.isOverdefined())
+ Parent->OverDefinedCache.insert(std::make_pair(BB, Val));
+ return changed;
}
};
+
- /// ValueCache - This is all of the cached information for all values,
- /// mapped from Value* to key information.
- std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+
+ LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
+ bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
+ LVILatticeVal &Result);
+ bool hasBlockValue(Value *Val, BasicBlock *BB);
+
+ // These methods process one work item and may add more. A false value
+ // returned means that the work item was not completely processed and must
+ // be revisited after going through the new items.
+ bool solveBlockValue(Value *Val, BasicBlock *BB);
+ bool solveBlockValueNonLocal(LVILatticeVal &BBLV,
+ Value *Val, BasicBlock *BB);
+ bool solveBlockValuePHINode(LVILatticeVal &BBLV,
+ PHINode *PN, BasicBlock *BB);
+ bool solveBlockValueConstantRange(LVILatticeVal &BBLV,
+ Instruction *BBI, BasicBlock *BB);
+
+ void solve();
- /// OverDefinedCache - This tracks, on a per-block basis, the set of
- /// values that are over-defined at the end of that block. This is required
- /// for cache updating.
- std::set<std::pair<AssertingVH<BasicBlock>, Value*> > OverDefinedCache;
+ ValueCacheEntryTy &lookup(Value *V) {
+ return ValueCache[LVIValueHandle(V, this)];
+ }
public:
-
/// getValueInBlock - This is the query interface to determine the lattice
/// value for the specified Value* at the end of the specified block.
LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB);
@@ -335,199 +443,112 @@ namespace {
};
} // end anonymous namespace
-//===----------------------------------------------------------------------===//
-// LVIQuery Impl
-//===----------------------------------------------------------------------===//
-
-namespace {
- /// LVIQuery - This is a transient object that exists while a query is
- /// being performed.
- ///
- /// TODO: Reuse LVIQuery instead of recreating it for every query, this avoids
- /// reallocation of the densemap on every query.
- class LVIQuery {
- typedef LazyValueInfoCache::BlockCacheEntryTy BlockCacheEntryTy;
- typedef LazyValueInfoCache::ValueCacheEntryTy ValueCacheEntryTy;
-
- /// This is the current value being queried for.
- Value *Val;
-
- /// This is a pointer to the owning cache, for recursive queries.
- LazyValueInfoCache &Parent;
-
- /// This is all of the cached information about this value.
- ValueCacheEntryTy &Cache;
-
- /// This tracks, for each block, what values are overdefined.
- std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &OverDefinedCache;
-
- /// NewBlocks - This is a mapping of the new BasicBlocks which have been
- /// added to cache but that are not in sorted order.
- DenseSet<BasicBlock*> NewBlockInfo;
-
- public:
-
- LVIQuery(Value *V, LazyValueInfoCache &P,
- ValueCacheEntryTy &VC,
- std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &ODC)
- : Val(V), Parent(P), Cache(VC), OverDefinedCache(ODC) {
- }
-
- ~LVIQuery() {
- // When the query is done, insert the newly discovered facts into the
- // cache in sorted order.
- if (NewBlockInfo.empty()) return;
-
- for (DenseSet<BasicBlock*>::iterator I = NewBlockInfo.begin(),
- E = NewBlockInfo.end(); I != E; ++I) {
- if (Cache[*I].isOverdefined())
- OverDefinedCache.insert(std::make_pair(*I, Val));
- }
- }
-
- LVILatticeVal getBlockValue(BasicBlock *BB);
- LVILatticeVal getEdgeValue(BasicBlock *FromBB, BasicBlock *ToBB);
-
- private:
- LVILatticeVal getCachedEntryForBlock(BasicBlock *BB);
- };
-} // end anonymous namespace
-
-void LazyValueInfoCache::LVIValueHandle::deleted() {
- for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator
+void LVIValueHandle::deleted() {
+ typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+
+ SmallVector<OverDefinedPairTy, 4> ToErase;
+ for (DenseSet<OverDefinedPairTy>::iterator
I = Parent->OverDefinedCache.begin(),
E = Parent->OverDefinedCache.end();
- I != E; ) {
- std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I;
- ++I;
- if (tmp->second == getValPtr())
- Parent->OverDefinedCache.erase(tmp);
+ I != E; ++I) {
+ if (I->second == getValPtr())
+ ToErase.push_back(*I);
}
+ for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+ E = ToErase.end(); I != E; ++I)
+ Parent->OverDefinedCache.erase(*I);
+
// This erasure deallocates *this, so it MUST happen after we're done
// using any and all members of *this.
Parent->ValueCache.erase(*this);
}
void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
- for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator
- I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ) {
- std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I;
- ++I;
- if (tmp->first == BB)
- OverDefinedCache.erase(tmp);
+ SmallVector<OverDefinedPairTy, 4> ToErase;
+ for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
+ E = OverDefinedCache.end(); I != E; ++I) {
+ if (I->first == BB)
+ ToErase.push_back(*I);
}
+
+ for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+ E = ToErase.end(); I != E; ++I)
+ OverDefinedCache.erase(*I);
- for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator
+ for (DenseMap<LVIValueHandle, ValueCacheEntryTy>::iterator
I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
I->second.erase(BB);
}
-/// getCachedEntryForBlock - See if we already have a value for this block. If
-/// so, return it, otherwise create a new entry in the Cache map to use.
-LVILatticeVal LVIQuery::getCachedEntryForBlock(BasicBlock *BB) {
- NewBlockInfo.insert(BB);
- return Cache[BB];
+void LazyValueInfoCache::solve() {
+ while (!BlockValueStack.empty()) {
+ std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
+ if (solveBlockValue(e.second, e.first))
+ BlockValueStack.pop();
+ }
+}
+
+bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
+ // If already a constant, there is nothing to compute.
+ if (isa<Constant>(Val))
+ return true;
+
+ LVIValueHandle ValHandle(Val, this);
+ if (!ValueCache.count(ValHandle)) return false;
+ return ValueCache[ValHandle].count(BB);
+}
+
+LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
+ // If already a constant, there is nothing to compute.
+ if (Constant *VC = dyn_cast<Constant>(Val))
+ return LVILatticeVal::get(VC);
+
+ return lookup(Val)[BB];
}
-LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
- // See if we already have a value for this block.
- LVILatticeVal BBLV = getCachedEntryForBlock(BB);
+bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
+ if (isa<Constant>(Val))
+ return true;
+
+ ValueCacheEntryTy &Cache = lookup(Val);
+ LVILatticeVal &BBLV = Cache[BB];
+ // OverDefinedCacheUpdater is a helper object that will update
+ // the OverDefinedCache for us when this method exits. Make sure to
+ // call markResult on it as we exist, passing a bool to indicate if the
+ // cache needs updating, i.e. if we have solve a new value or not.
+ OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this);
+
// If we've already computed this block's value, return it.
if (!BBLV.isUndefined()) {
DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
- return BBLV;
+
+ // Since we're reusing a cached value here, we don't need to update the
+ // OverDefinedCahce. The cache will have been properly updated
+ // whenever the cached value was inserted.
+ ODCacheUpdater.markResult(false);
+ return true;
}
// Otherwise, this is the first time we're seeing this block. Reset the
// lattice value to overdefined, so that cycles will terminate and be
// conservatively correct.
BBLV.markOverdefined();
- Cache[BB] = BBLV;
Instruction *BBI = dyn_cast<Instruction>(Val);
if (BBI == 0 || BBI->getParent() != BB) {
- LVILatticeVal Result; // Start Undefined.
-
- // If this is a pointer, and there's a load from that pointer in this BB,
- // then we know that the pointer can't be NULL.
- bool NotNull = false;
- if (Val->getType()->isPointerTy()) {
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){
- LoadInst *L = dyn_cast<LoadInst>(BI);
- if (L && L->getPointerAddressSpace() == 0 &&
- L->getPointerOperand()->getUnderlyingObject() ==
- Val->getUnderlyingObject()) {
- NotNull = true;
- break;
- }
- }
- }
-
- unsigned NumPreds = 0;
- // Loop over all of our predecessors, merging what we know from them into
- // result.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- Result.mergeIn(getEdgeValue(*PI, BB));
-
- // If we hit overdefined, exit early. The BlockVals entry is already set
- // to overdefined.
- if (Result.isOverdefined()) {
- DEBUG(dbgs() << " compute BB '" << BB->getName()
- << "' - overdefined because of pred.\n");
- // If we previously determined that this is a pointer that can't be null
- // then return that rather than giving up entirely.
- if (NotNull) {
- const PointerType *PTy = cast<PointerType>(Val->getType());
- Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
- }
-
- return Result;
- }
- ++NumPreds;
- }
-
-
- // If this is the entry block, we must be asking about an argument. The
- // value is overdefined.
- if (NumPreds == 0 && BB == &BB->getParent()->front()) {
- assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
- Result.markOverdefined();
- return Result;
- }
-
- // Return the merged value, which is more precise than 'overdefined'.
- assert(!Result.isOverdefined());
- return Cache[BB] = Result;
+ return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB));
}
-
- // If this value is defined by an instruction in this block, we have to
- // process it here somehow or return overdefined.
+
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
- LVILatticeVal Result; // Start Undefined.
-
- // Loop over all of our predecessors, merging what we know from them into
- // result.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- Value* PhiVal = PN->getIncomingValueForBlock(*PI);
- Result.mergeIn(Parent.getValueOnEdge(PhiVal, *PI, BB));
-
- // If we hit overdefined, exit early. The BlockVals entry is already set
- // to overdefined.
- if (Result.isOverdefined()) {
- DEBUG(dbgs() << " compute BB '" << BB->getName()
- << "' - overdefined because of pred.\n");
- return Result;
- }
- }
-
- // Return the merged value, which is more precise than 'overdefined'.
- assert(!Result.isOverdefined());
- return Cache[BB] = Result;
+ return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB));
}
- assert(Cache[BB].isOverdefined() && "Recursive query changed our cache?");
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) {
+ BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
+ return ODCacheUpdater.markResult(true);
+ }
// We can only analyze the definitions of certain classes of instructions
// (integral binops and casts at the moment), so bail if this isn't one.
@@ -536,10 +557,10 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
!BBI->getType()->isIntegerTy()) {
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined because inst def found.\n");
- Result.markOverdefined();
- return Result;
+ BBLV.markOverdefined();
+ return ODCacheUpdater.markResult(true);
}
-
+
// FIXME: We're currently limited to binops with a constant RHS. This should
// be improved.
BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
@@ -547,34 +568,177 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined because inst def found.\n");
- Result.markOverdefined();
- return Result;
- }
+ BBLV.markOverdefined();
+ return ODCacheUpdater.markResult(true);
+ }
+
+ return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB));
+}
+
+static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
+ if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+ return L->getPointerAddressSpace() == 0 &&
+ GetUnderlyingObject(L->getPointerOperand()) ==
+ GetUnderlyingObject(Ptr);
+ }
+ if (StoreInst *S = dyn_cast<StoreInst>(I)) {
+ return S->getPointerAddressSpace() == 0 &&
+ GetUnderlyingObject(S->getPointerOperand()) ==
+ GetUnderlyingObject(Ptr);
+ }
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ if (MI->isVolatile()) return false;
+ if (MI->getAddressSpace() != 0) return false;
+
+ // FIXME: check whether it has a valuerange that excludes zero?
+ ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength());
+ if (!Len || Len->isZero()) return false;
+
+ if (MI->getRawDest() == Ptr || MI->getDest() == Ptr)
+ return true;
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+ return MTI->getRawSource() == Ptr || MTI->getSource() == Ptr;
+ }
+ return false;
+}
+
+bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
+ Value *Val, BasicBlock *BB) {
+ LVILatticeVal Result; // Start Undefined.
+
+ // If this is a pointer, and there's a load from that pointer in this BB,
+ // then we know that the pointer can't be NULL.
+ bool NotNull = false;
+ if (Val->getType()->isPointerTy()) {
+ if (isa<AllocaInst>(Val)) {
+ NotNull = true;
+ } else {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){
+ if (InstructionDereferencesPointer(BI, Val)) {
+ NotNull = true;
+ break;
+ }
+ }
+ }
+ }
+
+ // If this is the entry block, we must be asking about an argument. The
+ // value is overdefined.
+ if (BB == &BB->getParent()->getEntryBlock()) {
+ assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
+ if (NotNull) {
+ const PointerType *PTy = cast<PointerType>(Val->getType());
+ Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+ } else {
+ Result.markOverdefined();
+ }
+ BBLV = Result;
+ return true;
+ }
+
+ // Loop over all of our predecessors, merging what we know from them into
+ // result.
+ bool EdgesMissing = false;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ LVILatticeVal EdgeResult;
+ EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult);
+ if (EdgesMissing)
+ continue;
+ Result.mergeIn(EdgeResult);
+
+ // If we hit overdefined, exit early. The BlockVals entry is already set
+ // to overdefined.
+ if (Result.isOverdefined()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred.\n");
+ // If we previously determined that this is a pointer that can't be null
+ // then return that rather than giving up entirely.
+ if (NotNull) {
+ const PointerType *PTy = cast<PointerType>(Val->getType());
+ Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+ }
+
+ BBLV = Result;
+ return true;
+ }
+ }
+ if (EdgesMissing)
+ return false;
+
+ // Return the merged value, which is more precise than 'overdefined'.
+ assert(!Result.isOverdefined());
+ BBLV = Result;
+ return true;
+}
+
+bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
+ PHINode *PN, BasicBlock *BB) {
+ LVILatticeVal Result; // Start Undefined.
+
+ // Loop over all of our predecessors, merging what we know from them into
+ // result.
+ bool EdgesMissing = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PhiBB = PN->getIncomingBlock(i);
+ Value *PhiVal = PN->getIncomingValue(i);
+ LVILatticeVal EdgeResult;
+ EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult);
+ if (EdgesMissing)
+ continue;
+
+ Result.mergeIn(EdgeResult);
+
+ // If we hit overdefined, exit early. The BlockVals entry is already set
+ // to overdefined.
+ if (Result.isOverdefined()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred.\n");
+
+ BBLV = Result;
+ return true;
+ }
+ }
+ if (EdgesMissing)
+ return false;
+
+ // Return the merged value, which is more precise than 'overdefined'.
+ assert(!Result.isOverdefined() && "Possible PHI in entry block?");
+ BBLV = Result;
+ return true;
+}
+
+bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
+ Instruction *BBI,
+ BasicBlock *BB) {
// Figure out the range of the LHS. If that fails, bail.
- LVILatticeVal LHSVal = Parent.getValueInBlock(BBI->getOperand(0), BB);
+ if (!hasBlockValue(BBI->getOperand(0), BB)) {
+ BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0)));
+ return false;
+ }
+
+ LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
if (!LHSVal.isConstantRange()) {
- Result.markOverdefined();
- return Result;
+ BBLV.markOverdefined();
+ return true;
}
- ConstantInt *RHS = 0;
ConstantRange LHSRange = LHSVal.getConstantRange();
ConstantRange RHSRange(1);
const IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
if (isa<BinaryOperator>(BBI)) {
- RHS = dyn_cast<ConstantInt>(BBI->getOperand(1));
- if (!RHS) {
- Result.markOverdefined();
- return Result;
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) {
+ RHSRange = ConstantRange(RHS->getValue());
+ } else {
+ BBLV.markOverdefined();
+ return true;
}
-
- RHSRange = ConstantRange(RHS->getValue(), RHS->getValue()+1);
}
-
+
// NOTE: We're currently limited by the set of operations that ConstantRange
// can evaluate symbolically. Enhancing that set will allows us to analyze
// more definitions.
+ LVILatticeVal Result;
switch (BBI->getOpcode()) {
case Instruction::Add:
Result.markConstantRange(LHSRange.add(RHSRange));
@@ -606,6 +770,12 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
case Instruction::BitCast:
Result.markConstantRange(LHSRange);
break;
+ case Instruction::And:
+ Result.markConstantRange(LHSRange.binaryAnd(RHSRange));
+ break;
+ case Instruction::Or:
+ Result.markConstantRange(LHSRange.binaryOr(RHSRange));
+ break;
// Unhandled instructions are overdefined.
default:
@@ -615,12 +785,19 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
break;
}
- return Cache[BB] = Result;
+ BBLV = Result;
+ return true;
}
-
/// getEdgeValue - This method attempts to infer more complex
-LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
+bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
+ BasicBlock *BBTo, LVILatticeVal &Result) {
+ // If already a constant, there is nothing to compute.
+ if (Constant *VC = dyn_cast<Constant>(Val)) {
+ Result = LVILatticeVal::get(VC);
+ return true;
+ }
+
// TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
// know that v != 0.
if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
@@ -634,9 +811,11 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
// If V is the condition of the branch itself, then we know exactly what
// it is.
- if (BI->getCondition() == Val)
- return LVILatticeVal::get(ConstantInt::get(
+ if (BI->getCondition() == Val) {
+ Result = LVILatticeVal::get(ConstantInt::get(
Type::getInt1Ty(Val->getContext()), isTrueDest));
+ return true;
+ }
// If the condition of the branch is an equality comparison, we may be
// able to infer the value.
@@ -647,30 +826,40 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
// We know that V has the RHS constant if this is a true SETEQ or
// false SETNE.
if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
- return LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
- return LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+ Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+ else
+ Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+ return true;
}
-
+
if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
// Calculate the range of values that would satisfy the comparison.
ConstantRange CmpRange(CI->getValue(), CI->getValue()+1);
ConstantRange TrueValues =
ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
-
+
// If we're interested in the false dest, invert the condition.
if (!isTrueDest) TrueValues = TrueValues.inverse();
// Figure out the possible values of the query BEFORE this branch.
- LVILatticeVal InBlock = getBlockValue(BBFrom);
- if (!InBlock.isConstantRange())
- return LVILatticeVal::getRange(TrueValues);
-
+ if (!hasBlockValue(Val, BBFrom)) {
+ BlockValueStack.push(std::make_pair(BBFrom, Val));
+ return false;
+ }
+
+ LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
+ if (!InBlock.isConstantRange()) {
+ Result = LVILatticeVal::getRange(TrueValues);
+ return true;
+ }
+
// Find all potential values that satisfy both the input and output
// conditions.
ConstantRange PossibleValues =
TrueValues.intersectWith(InBlock.getConstantRange());
-
- return LVILatticeVal::getRange(PossibleValues);
+
+ Result = LVILatticeVal::getRange(PossibleValues);
+ return true;
}
}
}
@@ -682,9 +871,8 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
if (SI->getCondition() == Val) {
// We don't know anything in the default case.
if (SI->getDefaultDest() == BBTo) {
- LVILatticeVal Result;
Result.markOverdefined();
- return Result;
+ return true;
}
// We only know something if there is exactly one value that goes from
@@ -697,51 +885,48 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
EdgeVal = SI->getCaseValue(i);
}
assert(EdgeVal && "Missing successor?");
- if (NumEdges == 1)
- return LVILatticeVal::get(EdgeVal);
+ if (NumEdges == 1) {
+ Result = LVILatticeVal::get(EdgeVal);
+ return true;
+ }
}
}
// Otherwise see if the value is known in the block.
- return getBlockValue(BBFrom);
+ if (hasBlockValue(Val, BBFrom)) {
+ Result = getBlockValue(Val, BBFrom);
+ return true;
+ }
+ BlockValueStack.push(std::make_pair(BBFrom, Val));
+ return false;
}
-
-//===----------------------------------------------------------------------===//
-// LazyValueInfoCache Impl
-//===----------------------------------------------------------------------===//
-
LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
- // If already a constant, there is nothing to compute.
- if (Constant *VC = dyn_cast<Constant>(V))
- return LVILatticeVal::get(VC);
-
DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
<< BB->getName() << "'\n");
- LVILatticeVal Result = LVIQuery(V, *this,
- ValueCache[LVIValueHandle(V, this)],
- OverDefinedCache).getBlockValue(BB);
-
+ BlockValueStack.push(std::make_pair(BB, V));
+ solve();
+ LVILatticeVal Result = getBlockValue(V, BB);
+
DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
}
LVILatticeVal LazyValueInfoCache::
getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
- // If already a constant, there is nothing to compute.
- if (Constant *VC = dyn_cast<Constant>(V))
- return LVILatticeVal::get(VC);
-
DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
<< FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
- LVILatticeVal Result =
- LVIQuery(V, *this, ValueCache[LVIValueHandle(V, this)],
- OverDefinedCache).getEdgeValue(FromBB, ToBB);
-
+ LVILatticeVal Result;
+ if (!getEdgeValue(V, FromBB, ToBB, Result)) {
+ solve();
+ bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result);
+ (void)WasFastQuery;
+ assert(WasFastQuery && "More work to do after problem solved?");
+ }
+
DEBUG(dbgs() << " Result = " << Result << "\n");
-
return Result;
}
@@ -761,8 +946,8 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
worklist.push_back(OldSucc);
DenseSet<Value*> ClearSet;
- for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator
- I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ++I) {
+ for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
+ E = OverDefinedCache.end(); I != E; ++I) {
if (I->first == OldSucc)
ClearSet.insert(I->second);
}
@@ -779,17 +964,17 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
if (ToUpdate == NewSucc) continue;
bool changed = false;
- for (DenseSet<Value*>::iterator I = ClearSet.begin(),E = ClearSet.end();
+ for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end();
I != E; ++I) {
// If a value was marked overdefined in OldSucc, and is here too...
- std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator OI =
+ DenseSet<OverDefinedPairTy>::iterator OI =
OverDefinedCache.find(std::make_pair(ToUpdate, *I));
if (OI == OverDefinedCache.end()) continue;
// Remove it from the caches.
ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
-
+
assert(CI != Entry.end() && "Couldn't find entry to update?");
Entry.erase(CI);
OverDefinedCache.erase(OI);
@@ -798,7 +983,7 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
// blocks successors too.
changed = true;
}
-
+
if (!changed) continue;
worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
@@ -838,7 +1023,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
if (Result.isConstant())
return Result.getConstant();
- else if (Result.isConstantRange()) {
+ if (Result.isConstantRange()) {
ConstantRange CR = Result.getConstantRange();
if (const APInt *SingleVal = CR.getSingleElement())
return ConstantInt::get(V->getContext(), *SingleVal);
@@ -854,7 +1039,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
if (Result.isConstant())
return Result.getConstant();
- else if (Result.isConstantRange()) {
+ if (Result.isConstantRange()) {
ConstantRange CR = Result.getConstantRange();
if (const APInt *SingleVal = CR.getSingleElement())
return ConstantInt::get(V->getContext(), *SingleVal);
@@ -874,7 +1059,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
Constant *Res = 0;
if (Result.isConstant()) {
Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD);
- if (ConstantInt *ResCI = dyn_cast_or_null<ConstantInt>(Res))
+ if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
return ResCI->isZero() ? False : True;
return Unknown;
}
@@ -899,13 +1084,12 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
}
// Handle more complex predicates.
- ConstantRange RHS(CI->getValue(), CI->getValue()+1);
- ConstantRange TrueValues = ConstantRange::makeICmpRegion(Pred, RHS);
- if (CR.intersectWith(TrueValues).isEmptySet())
- return False;
- else if (TrueValues.contains(CR))
+ ConstantRange TrueValues =
+ ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
+ if (TrueValues.contains(CR))
return True;
-
+ if (TrueValues.inverse().contains(CR))
+ return False;
return Unknown;
}
@@ -932,7 +1116,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
}
void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
- BasicBlock* NewSucc) {
+ BasicBlock *NewSucc) {
if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc);
}
diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
index 7f51202..efb722b 100644
--- a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
// Register this pass...
char LibCallAliasAnalysis::ID = 0;
INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa",
- "LibCall Alias Analysis", false, true, false);
+ "LibCall Alias Analysis", false, true, false)
FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
return new LibCallAliasAnalysis(LCI);
@@ -43,8 +43,8 @@ void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
/// vs the specified pointer/size.
AliasAnalysis::ModRefResult
LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
- ImmutableCallSite CS, const Value *P,
- unsigned Size) {
+ ImmutableCallSite CS,
+ const Location &Loc) {
// If we have a function, check to see what kind of mod/ref effects it
// has. Start by including any info globally known about the function.
AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior;
@@ -64,9 +64,9 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
if (FI->DetailsType == LibCallFunctionInfo::DoesNot) {
// Find out if the pointer refers to a known location.
for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
- const LibCallLocationInfo &Loc =
+ const LibCallLocationInfo &LocInfo =
LCI->getLocationInfo(Details[i].LocationID);
- LibCallLocationInfo::LocResult Res = Loc.isLocation(CS, P, Size);
+ LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
if (Res != LibCallLocationInfo::Yes) continue;
// If we find a match against a location that we 'do not' interact with,
@@ -85,9 +85,9 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
// Find out if the pointer refers to a known location.
bool NoneMatch = true;
for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
- const LibCallLocationInfo &Loc =
+ const LibCallLocationInfo &LocInfo =
LCI->getLocationInfo(Details[i].LocationID);
- LibCallLocationInfo::LocResult Res = Loc.isLocation(CS, P, Size);
+ LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
if (Res == LibCallLocationInfo::No) continue;
// If we don't know if this pointer points to the location, then we have to
@@ -118,7 +118,7 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
//
AliasAnalysis::ModRefResult
LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
- const Value *P, unsigned Size) {
+ const Location &Loc) {
ModRefResult MRInfo = ModRef;
// If this is a direct call to a function that LCI knows about, get the
@@ -126,12 +126,12 @@ LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
if (LCI) {
if (const Function *F = CS.getCalledFunction()) {
if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
- MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, P, Size));
+ MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc));
if (MRInfo == NoModRef) return NoModRef;
}
}
}
// The AliasAnalysis base class has some smarts, lets use them.
- return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, P, Size));
+ return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc));
}
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index a9d9724..fc7edc0 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -70,7 +70,7 @@ namespace {
void visitCallSite(CallSite CS);
void visitMemoryReference(Instruction &I, Value *Ptr,
- unsigned Size, unsigned Align,
+ uint64_t Size, unsigned Align,
const Type *Ty, unsigned Flags);
void visitCallInst(CallInst &I);
@@ -108,7 +108,9 @@ namespace {
raw_string_ostream MessagesStr;
static char ID; // Pass identification, replacement for typeid
- Lint() : FunctionPass(ID), MessagesStr(Messages) {}
+ Lint() : FunctionPass(ID), MessagesStr(Messages) {
+ initializeLintPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -129,12 +131,6 @@ namespace {
}
}
- void WriteType(const Type *T) {
- if (!T) return;
- MessagesStr << ' ';
- WriteTypeSymbolic(MessagesStr, T, Mod);
- }
-
// CheckFailed - A check failed, so print out the condition and the message
// that failed. This provides a nice place to put a breakpoint if you want
// to see why something is not correct.
@@ -147,27 +143,16 @@ namespace {
WriteValue(V3);
WriteValue(V4);
}
-
- void CheckFailed(const Twine &Message, const Value *V1,
- const Type *T2, const Value *V3 = 0) {
- MessagesStr << Message.str() << "\n";
- WriteValue(V1);
- WriteType(T2);
- WriteValue(V3);
- }
-
- void CheckFailed(const Twine &Message, const Type *T1,
- const Type *T2 = 0, const Type *T3 = 0) {
- MessagesStr << Message.str() << "\n";
- WriteType(T1);
- WriteType(T2);
- WriteType(T3);
- }
};
}
char Lint::ID = 0;
-INITIALIZE_PASS(Lint, "lint", "Statically lint-checks LLVM IR", false, true);
+INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
+ false, true)
// Assert - We know that cond should be true, if not print an error message.
#define Assert(C, M) \
@@ -208,7 +193,8 @@ void Lint::visitCallSite(CallSite CS) {
Instruction &I = *CS.getInstruction();
Value *Callee = CS.getCalledValue();
- visitMemoryReference(I, Callee, ~0u, 0, 0, MemRef::Callee);
+ visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Callee);
if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
Assert1(CS.getCallingConv() == F->getCallingConv(),
@@ -240,15 +226,17 @@ void Lint::visitCallSite(CallSite CS) {
"Undefined behavior: Call argument type mismatches "
"callee parameter type", &I);
- // Check that noalias arguments don't alias other arguments. The
- // AliasAnalysis API isn't expressive enough for what we really want
- // to do. Known partial overlap is not distinguished from the case
- // where nothing is known.
+ // Check that noalias arguments don't alias other arguments. This is
+ // not fully precise because we don't know the sizes of the dereferenced
+ // memory regions.
if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
- for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
- Assert1(AI == BI || AA->alias(*AI, *BI) != AliasAnalysis::MustAlias,
- "Unusual: noalias argument aliases another argument", &I);
- }
+ for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
+ if (AI != BI && (*BI)->getType()->isPointerTy()) {
+ AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
+ Assert1(Result != AliasAnalysis::MustAlias &&
+ Result != AliasAnalysis::PartialAlias,
+ "Unusual: noalias argument aliases another argument", &I);
+ }
// Check that an sret argument points to valid memory.
if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
@@ -281,15 +269,17 @@ void Lint::visitCallSite(CallSite CS) {
case Intrinsic::memcpy: {
MemCpyInst *MCI = cast<MemCpyInst>(&I);
// TODO: If the size is known, use it.
- visitMemoryReference(I, MCI->getDest(), ~0u, MCI->getAlignment(), 0,
+ visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
+ MCI->getAlignment(), 0,
MemRef::Write);
- visitMemoryReference(I, MCI->getSource(), ~0u, MCI->getAlignment(), 0,
+ visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
+ MCI->getAlignment(), 0,
MemRef::Read);
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
// isn't expressive enough for what we really want to do. Known partial
// overlap is not distinguished from the case where nothing is known.
- unsigned Size = 0;
+ uint64_t Size = 0;
if (const ConstantInt *Len =
dyn_cast<ConstantInt>(findValue(MCI->getLength(),
/*OffsetOk=*/false)))
@@ -303,16 +293,19 @@ void Lint::visitCallSite(CallSite CS) {
case Intrinsic::memmove: {
MemMoveInst *MMI = cast<MemMoveInst>(&I);
// TODO: If the size is known, use it.
- visitMemoryReference(I, MMI->getDest(), ~0u, MMI->getAlignment(), 0,
+ visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
+ MMI->getAlignment(), 0,
MemRef::Write);
- visitMemoryReference(I, MMI->getSource(), ~0u, MMI->getAlignment(), 0,
+ visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
+ MMI->getAlignment(), 0,
MemRef::Read);
break;
}
case Intrinsic::memset: {
MemSetInst *MSI = cast<MemSetInst>(&I);
// TODO: If the size is known, use it.
- visitMemoryReference(I, MSI->getDest(), ~0u, MSI->getAlignment(), 0,
+ visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
+ MSI->getAlignment(), 0,
MemRef::Write);
break;
}
@@ -322,24 +315,26 @@ void Lint::visitCallSite(CallSite CS) {
"Undefined behavior: va_start called in a non-varargs function",
&I);
- visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0,
- MemRef::Read | MemRef::Write);
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read | MemRef::Write);
break;
case Intrinsic::vacopy:
- visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Write);
- visitMemoryReference(I, CS.getArgument(1), ~0u, 0, 0, MemRef::Read);
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Write);
+ visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read);
break;
case Intrinsic::vaend:
- visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0,
- MemRef::Read | MemRef::Write);
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read | MemRef::Write);
break;
case Intrinsic::stackrestore:
// Stackrestore doesn't read or write memory, but it sets the
// stack pointer, which the compiler may read from or write to
// at any time, so check it for both readability and writeability.
- visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0,
- MemRef::Read | MemRef::Write);
+ visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+ 0, 0, MemRef::Read | MemRef::Write);
break;
}
}
@@ -368,7 +363,7 @@ void Lint::visitReturnInst(ReturnInst &I) {
// TODO: Check that the reference is in bounds.
// TODO: Check readnone/readonly function attributes.
void Lint::visitMemoryReference(Instruction &I,
- Value *Ptr, unsigned Size, unsigned Align,
+ Value *Ptr, uint64_t Size, unsigned Align,
const Type *Ty, unsigned Flags) {
// If no memory is being referenced, it doesn't matter if the pointer
// is valid.
@@ -512,12 +507,13 @@ void Lint::visitAllocaInst(AllocaInst &I) {
}
void Lint::visitVAArgInst(VAArgInst &I) {
- visitMemoryReference(I, I.getOperand(0), ~0u, 0, 0,
+ visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0,
MemRef::Read | MemRef::Write);
}
void Lint::visitIndirectBrInst(IndirectBrInst &I) {
- visitMemoryReference(I, I.getAddress(), ~0u, 0, 0, MemRef::Branchee);
+ visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0,
+ MemRef::Branchee);
Assert1(I.getNumDestinations() != 0,
"Undefined behavior: indirectbr with no destinations", &I);
@@ -571,7 +567,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
// TODO: Look through eliminable cast pairs.
// TODO: Look through calls with unique return values.
// TODO: Look through vector insert/extract/shuffle.
- V = OffsetOk ? V->getUnderlyingObject() : V->stripPointerCasts();
+ V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts();
if (LoadInst *L = dyn_cast<LoadInst>(V)) {
BasicBlock::iterator BBI = L;
BasicBlock *BB = L->getParent();
@@ -587,8 +583,9 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
BBI = BB->end();
}
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
- if (Value *W = PN->hasConstantValue(DT))
- return findValueImpl(W, OffsetOk, Visited);
+ if (Value *W = PN->hasConstantValue())
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
} else if (CastInst *CI = dyn_cast<CastInst>(V)) {
if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) :
Type::getInt64Ty(V->getContext())))
@@ -620,9 +617,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
// As a last resort, try SimplifyInstruction or constant folding.
if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Value *W = SimplifyInstruction(Inst, TD))
- if (W != Inst)
- return findValueImpl(W, OffsetOk, Visited);
+ if (Value *W = SimplifyInstruction(Inst, TD, DT))
+ return findValueImpl(W, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (Value *W = ConstantFoldConstantExpression(CE, TD))
if (W != V)
diff --git a/contrib/llvm/lib/Analysis/LiveValues.cpp b/contrib/llvm/lib/Analysis/LiveValues.cpp
index 0225f4f..a0e6034 100644
--- a/contrib/llvm/lib/Analysis/LiveValues.cpp
+++ b/contrib/llvm/lib/Analysis/LiveValues.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LiveValues.h"
+#include "llvm/Instructions.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
using namespace llvm;
@@ -22,10 +23,16 @@ namespace llvm {
}
char LiveValues::ID = 0;
-INITIALIZE_PASS(LiveValues, "live-values",
- "Value Liveness Analysis", false, true);
-
-LiveValues::LiveValues() : FunctionPass(ID) {}
+INITIALIZE_PASS_BEGIN(LiveValues, "live-values",
+ "Value Liveness Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LiveValues, "live-values",
+ "Value Liveness Analysis", false, true)
+
+LiveValues::LiveValues() : FunctionPass(ID) {
+ initializeLiveValuesPass(*PassRegistry::getPassRegistry());
+}
void LiveValues::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
index 2ba1d86..2ea27fb 100644
--- a/contrib/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -49,7 +49,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and
/// bitcasts to get back to the underlying object being addressed, keeping
/// track of the offset in bytes from the GEPs relative to the result.
-/// This is closely related to Value::getUnderlyingObject but is located
+/// This is closely related to GetUnderlyingObject but is located
/// here to avoid making VMCore depend on TargetData.
static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
uint64_t &ByteOffset,
@@ -166,7 +166,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
// If we're using alias analysis to disambiguate get the size of *Ptr.
- unsigned AccessSize = 0;
+ uint64_t AccessSize = 0;
if (AA) {
const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
AccessSize = AA->getTypeStoreSize(AccessTy);
diff --git a/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp
index 82c02dc..c1afe8f 100644
--- a/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -27,6 +27,8 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/Instructions.h"
#include "llvm/Operator.h"
#include "llvm/Support/Allocator.h"
@@ -46,8 +48,12 @@ LoopPass *llvm::createLoopDependenceAnalysisPass() {
return new LoopDependenceAnalysis();
}
-INITIALIZE_PASS(LoopDependenceAnalysis, "lda",
- "Loop Dependence Analysis", false, true);
+INITIALIZE_PASS_BEGIN(LoopDependenceAnalysis, "lda",
+ "Loop Dependence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopDependenceAnalysis, "lda",
+ "Loop Dependence Analysis", false, true)
char LoopDependenceAnalysis::ID = 0;
//===----------------------------------------------------------------------===//
@@ -86,8 +92,8 @@ static Value *GetPointerOperand(Value *I) {
static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA,
const Value *A,
const Value *B) {
- const Value *aObj = A->getUnderlyingObject();
- const Value *bObj = B->getUnderlyingObject();
+ const Value *aObj = GetUnderlyingObject(A);
+ const Value *bObj = GetUnderlyingObject(B);
return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()),
bObj, AA->getTypeStoreSize(bObj->getType()));
}
@@ -128,7 +134,7 @@ void LoopDependenceAnalysis::getLoops(const SCEV *S,
DenseSet<const Loop*>* Loops) const {
// Refactor this into an SCEVVisitor, if efficiency becomes a concern.
for (const Loop *L = this->L; L != 0; L = L->getParentLoop())
- if (!S->isLoopInvariant(L))
+ if (!SE->isLoopInvariant(S, L))
Loops->insert(L);
}
@@ -217,6 +223,7 @@ LoopDependenceAnalysis::analysePair(DependencePair *P) const {
switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) {
case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
// We can not analyse objects if we do not know about their aliasing.
DEBUG(dbgs() << "---> [?] may alias\n");
return Unknown;
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index 46219d1..0583140 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -38,7 +38,9 @@ VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
cl::desc("Verify loop info (time consuming)"));
char LoopInfo::ID = 0;
-INITIALIZE_PASS(LoopInfo, "loops", "Natural Loop Information", true, true);
+INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true)
//===----------------------------------------------------------------------===//
// Loop implementation
@@ -48,15 +50,18 @@ INITIALIZE_PASS(LoopInfo, "loops", "Natural Loop Information", true, true);
///
bool Loop::isLoopInvariant(Value *V) const {
if (Instruction *I = dyn_cast<Instruction>(V))
- return isLoopInvariant(I);
+ return !contains(I);
return true; // All non-instructions are loop invariant
}
-/// isLoopInvariant - Return true if the specified instruction is
-/// loop-invariant.
-///
-bool Loop::isLoopInvariant(Instruction *I) const {
- return !contains(I);
+/// hasLoopInvariantOperands - Return true if all the operands of the
+/// specified instruction are loop invariant.
+bool Loop::hasLoopInvariantOperands(Instruction *I) const {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!isLoopInvariant(I->getOperand(i)))
+ return false;
+
+ return true;
}
/// makeLoopInvariant - If the given value is an instruciton inside of the
@@ -105,6 +110,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt))
return false;
+
// Hoist.
I->moveBefore(InsertPt);
Changed = true;
@@ -192,7 +198,7 @@ Value *Loop::getTripCount() const {
/// getSmallConstantTripCount - Returns the trip count of this loop as a
/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
-/// of not constant. Will also return 0 if the trip count is very large
+/// or not constant. Will also return 0 if the trip count is very large
/// (>= 2^32)
unsigned Loop::getSmallConstantTripCount() const {
Value* TripCount = this->getTripCount();
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index 15d4db8..8e1a7bf 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -30,7 +30,6 @@ private:
public:
static char ID;
- PrintLoopPass() : LoopPass(ID), Out(dbgs()) {}
PrintLoopPass(const std::string &B, raw_ostream &o)
: LoopPass(ID), Banner(B), Out(o) {}
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
new file mode 100644
index 0000000..64d215c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -0,0 +1,167 @@
+//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+using namespace llvm;
+
+namespace {
+ struct MemDepPrinter : public FunctionPass {
+ const Function *F;
+
+ typedef PointerIntPair<const Instruction *, 1> InstAndClobberFlag;
+ typedef std::pair<InstAndClobberFlag, const BasicBlock *> Dep;
+ typedef SmallSetVector<Dep, 4> DepSet;
+ typedef DenseMap<const Instruction *, DepSet> DepSetMap;
+ DepSetMap Deps;
+
+ static char ID; // Pass identifcation, replacement for typeid
+ MemDepPrinter() : FunctionPass(ID) {
+ initializeMemDepPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnFunction(Function &F);
+
+ void print(raw_ostream &OS, const Module * = 0) const;
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredTransitive<AliasAnalysis>();
+ AU.addRequiredTransitive<MemoryDependenceAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ virtual void releaseMemory() {
+ Deps.clear();
+ F = 0;
+ }
+ };
+}
+
+char MemDepPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps",
+ "Print MemDeps of function", false, true)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_END(MemDepPrinter, "print-memdeps",
+ "Print MemDeps of function", false, true)
+
+FunctionPass *llvm::createMemDepPrinter() {
+ return new MemDepPrinter();
+}
+
+bool MemDepPrinter::runOnFunction(Function &F) {
+ this->F = &F;
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
+
+ // All this code uses non-const interfaces because MemDep is not
+ // const-friendly, though nothing is actually modified.
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ Instruction *Inst = &*I;
+
+ if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
+ continue;
+
+ MemDepResult Res = MDA.getDependency(Inst);
+ if (!Res.isNonLocal()) {
+ assert(Res.isClobber() != Res.isDef() &&
+ "Local dep should be def or clobber!");
+ Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+ Res.isClobber()),
+ static_cast<BasicBlock *>(0)));
+ } else if (CallSite CS = cast<Value>(Inst)) {
+ const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
+ MDA.getNonLocalCallDependency(CS);
+
+ DepSet &InstDeps = Deps[Inst];
+ for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
+ I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+ const MemDepResult &Res = I->getResult();
+ assert(Res.isClobber() != Res.isDef() &&
+ "Resolved non-local call dep should be def or clobber!");
+ InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+ Res.isClobber()),
+ I->getBB()));
+ }
+ } else {
+ SmallVector<NonLocalDepResult, 4> NLDI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ // FIXME: Volatile is not handled properly here.
+ AliasAnalysis::Location Loc = AA.getLocation(LI);
+ MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(),
+ LI->getParent(), NLDI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // FIXME: Volatile is not handled properly here.
+ AliasAnalysis::Location Loc = AA.getLocation(SI);
+ MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI);
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
+ AliasAnalysis::Location Loc = AA.getLocation(VI);
+ MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI);
+ } else {
+ llvm_unreachable("Unknown memory instruction!");
+ }
+
+ DepSet &InstDeps = Deps[Inst];
+ for (SmallVectorImpl<NonLocalDepResult>::const_iterator
+ I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+ const MemDepResult &Res = I->getResult();
+ assert(Res.isClobber() != Res.isDef() &&
+ "Resolved non-local pointer dep should be def or clobber!");
+ InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+ Res.isClobber()),
+ I->getBB()));
+ }
+ }
+ }
+
+ return false;
+}
+
+void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
+ for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) {
+ const Instruction *Inst = &*I;
+
+ DepSetMap::const_iterator DI = Deps.find(Inst);
+ if (DI == Deps.end())
+ continue;
+
+ const DepSet &InstDeps = DI->second;
+
+ for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end();
+ I != E; ++I) {
+ const Instruction *DepInst = I->first.getPointer();
+ bool isClobber = I->first.getInt();
+ const BasicBlock *DepBB = I->second;
+
+ OS << " " << (isClobber ? "Clobber" : " Def");
+ if (DepBB) {
+ OS << " in block ";
+ WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
+ }
+ OS << " from: ";
+ if (DepInst == Inst)
+ OS << "<unspecified>";
+ else
+ DepInst->print(OS);
+ OS << "\n";
+ }
+
+ Inst->print(OS);
+ OS << "\n\n";
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index d18d5ce..35043bd 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -19,15 +19,18 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/PredIteratorCache.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
using namespace llvm;
STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
@@ -46,11 +49,15 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
char MemoryDependenceAnalysis::ID = 0;
// Register this pass...
-INITIALIZE_PASS(MemoryDependenceAnalysis, "memdep",
- "Memory Dependence Analysis", false, true);
+INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
+ "Memory Dependence Analysis", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
+ "Memory Dependence Analysis", false, true)
MemoryDependenceAnalysis::MemoryDependenceAnalysis()
: FunctionPass(ID), PredCache(0) {
+ initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
}
MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
}
@@ -77,6 +84,7 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
bool MemoryDependenceAnalysis::runOnFunction(Function &) {
AA = &getAnalysis<AliasAnalysis>();
+ TD = getAnalysisIfAvailable<TargetData>();
if (PredCache == 0)
PredCache.reset(new PredIteratorCache());
return false;
@@ -92,11 +100,79 @@ static void RemoveFromReverseMap(DenseMap<Instruction*,
InstIt = ReverseMap.find(Inst);
assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
bool Found = InstIt->second.erase(Val);
- assert(Found && "Invalid reverse map!"); Found=Found;
+ assert(Found && "Invalid reverse map!"); (void)Found;
if (InstIt->second.empty())
ReverseMap.erase(InstIt);
}
+/// GetLocation - If the given instruction references a specific memory
+/// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
+/// Return a ModRefInfo value describing the general behavior of the
+/// instruction.
+static
+AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
+ AliasAnalysis::Location &Loc,
+ AliasAnalysis *AA) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ if (LI->isVolatile()) {
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
+ }
+ Loc = AA->getLocation(LI);
+ return AliasAnalysis::Ref;
+ }
+
+ if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->isVolatile()) {
+ Loc = AliasAnalysis::Location();
+ return AliasAnalysis::ModRef;
+ }
+ Loc = AA->getLocation(SI);
+ return AliasAnalysis::Mod;
+ }
+
+ if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
+ Loc = AA->getLocation(V);
+ return AliasAnalysis::ModRef;
+ }
+
+ if (const CallInst *CI = isFreeCall(Inst)) {
+ // calls to free() deallocate the entire structure
+ Loc = AliasAnalysis::Location(CI->getArgOperand(0));
+ return AliasAnalysis::Mod;
+ }
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start:
+ Loc = AliasAnalysis::Location(II->getArgOperand(1),
+ cast<ConstantInt>(II->getArgOperand(0))
+ ->getZExtValue(),
+ II->getMetadata(LLVMContext::MD_tbaa));
+ // These intrinsics don't really modify the memory, but returning Mod
+ // will allow them to be handled conservatively.
+ return AliasAnalysis::Mod;
+ case Intrinsic::invariant_end:
+ Loc = AliasAnalysis::Location(II->getArgOperand(2),
+ cast<ConstantInt>(II->getArgOperand(1))
+ ->getZExtValue(),
+ II->getMetadata(LLVMContext::MD_tbaa));
+ // These intrinsics don't really modify the memory, but returning Mod
+ // will allow them to be handled conservatively.
+ return AliasAnalysis::Mod;
+ default:
+ break;
+ }
+
+ // Otherwise, just do the coarse-grained thing that always works.
+ if (Inst->mayWriteToMemory())
+ return AliasAnalysis::ModRef;
+ if (Inst->mayReadFromMemory())
+ return AliasAnalysis::Ref;
+ return AliasAnalysis::NoModRef;
+}
/// getCallSiteDependencyFrom - Private helper for finding the local
/// dependencies of a call site.
@@ -108,19 +184,16 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
Instruction *Inst = --ScanIt;
// If this inst is a memory op, get the pointer it accessed
- Value *Pointer = 0;
- uint64_t PointerSize = 0;
- if (StoreInst *S = dyn_cast<StoreInst>(Inst)) {
- Pointer = S->getPointerOperand();
- PointerSize = AA->getTypeStoreSize(S->getOperand(0)->getType());
- } else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
- Pointer = V->getOperand(0);
- PointerSize = AA->getTypeStoreSize(V->getType());
- } else if (const CallInst *CI = isFreeCall(Inst)) {
- Pointer = CI->getArgOperand(0);
- // calls to free() erase the entire structure
- PointerSize = ~0ULL;
- } else if (CallSite InstCS = cast<Value>(Inst)) {
+ AliasAnalysis::Location Loc;
+ AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
+ if (Loc.Ptr) {
+ // A simple instruction.
+ if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
+ return MemDepResult::getClobber(Inst);
+ continue;
+ }
+
+ if (CallSite InstCS = cast<Value>(Inst)) {
// Debug intrinsics don't cause dependences.
if (isa<DbgInfoIntrinsic>(Inst)) continue;
// If these two calls do not interfere, look past it.
@@ -128,23 +201,17 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
case AliasAnalysis::NoModRef:
// If the two calls are the same, return InstCS as a Def, so that
// CS can be found redundant and eliminated.
- if (isReadOnlyCall && InstCS.onlyReadsMemory() &&
+ if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) &&
CS.getInstruction()->isIdenticalToWhenDefined(Inst))
return MemDepResult::getDef(Inst);
// Otherwise if the two calls don't interact (e.g. InstCS is readnone)
// keep scanning.
- continue;
+ break;
default:
return MemDepResult::getClobber(Inst);
}
- } else {
- // Non-memory instruction.
- continue;
}
-
- if (AA->getModRefInfo(CS, Pointer, PointerSize) != AliasAnalysis::NoModRef)
- return MemDepResult::getClobber(Inst);
}
// No dependence found. If this is the entry block of the function, it is a
@@ -155,10 +222,11 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
}
/// getPointerDependencyFrom - Return the instruction on which a memory
-/// location depends. If isLoad is true, this routine ignore may-aliases with
-/// read-only operations.
+/// location depends. If isLoad is true, this routine ignores may-aliases with
+/// read-only operations. If isLoad is false, this routine ignores may-aliases
+/// with reads from read-only locations.
MemDepResult MemoryDependenceAnalysis::
-getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
+getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
BasicBlock::iterator ScanIt, BasicBlock *BB) {
Value *InvariantTag = 0;
@@ -175,8 +243,8 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
}
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- // Debug intrinsics don't cause dependences.
- if (isa<DbgInfoIntrinsic>(Inst)) continue;
+ // Debug intrinsics don't (and can't) cause dependences.
+ if (isa<DbgInfoIntrinsic>(II)) continue;
// If we pass an invariant-end marker, then we've just entered an
// invariant region and can start ignoring dependencies.
@@ -184,43 +252,53 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
// FIXME: This only considers queries directly on the invariant-tagged
// pointer, not on query pointers that are indexed off of them. It'd
// be nice to handle that at some point.
- AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(2), MemPtr);
- if (R == AliasAnalysis::MustAlias) {
+ AliasAnalysis::AliasResult R =
+ AA->alias(AliasAnalysis::Location(II->getArgOperand(2)), MemLoc);
+ if (R == AliasAnalysis::MustAlias)
InvariantTag = II->getArgOperand(0);
- continue;
- }
-
+
+ continue;
+ }
+
// If we reach a lifetime begin or end marker, then the query ends here
// because the value is undefined.
- } else if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
// FIXME: This only considers queries directly on the invariant-tagged
// pointer, not on query pointers that are indexed off of them. It'd
// be nice to handle that at some point.
- AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(1), MemPtr);
+ AliasAnalysis::AliasResult R =
+ AA->alias(AliasAnalysis::Location(II->getArgOperand(1)), MemLoc);
if (R == AliasAnalysis::MustAlias)
return MemDepResult::getDef(II);
+ continue;
}
}
// If we're querying on a load and we're in an invariant region, we're done
// at this point. Nothing a load depends on can live in an invariant region.
+ //
+ // FIXME: this will prevent us from returning load/load must-aliases, so GVN
+ // won't remove redundant loads.
if (isLoad && InvariantTag) continue;
// Values depend on loads if the pointers are must aliased. This means that
// a load depends on another must aliased load from the same value.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Value *Pointer = LI->getPointerOperand();
- uint64_t PointerSize = AA->getTypeStoreSize(LI->getType());
+ AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
// If we found a pointer, check if it could be the same as our pointer.
- AliasAnalysis::AliasResult R =
- AA->alias(Pointer, PointerSize, MemPtr, MemSize);
+ AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
if (R == AliasAnalysis::NoAlias)
continue;
// May-alias loads don't depend on each other without a dependence.
- if (isLoad && R == AliasAnalysis::MayAlias)
+ if (isLoad && R != AliasAnalysis::MustAlias)
continue;
+
+ // Stores don't alias loads from read-only memory.
+ if (!isLoad && AA->pointsToConstantMemory(LoadLoc))
+ continue;
+
// Stores depend on may and must aliased loads, loads depend on must-alias
// loads.
return MemDepResult::getDef(Inst);
@@ -234,23 +312,21 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
// If alias analysis can tell that this store is guaranteed to not modify
// the query pointer, ignore it. Use getModRefInfo to handle cases where
// the query pointer points to constant memory etc.
- if (AA->getModRefInfo(SI, MemPtr, MemSize) == AliasAnalysis::NoModRef)
+ if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
continue;
// Ok, this store might clobber the query pointer. Check to see if it is
// a must alias: in this case, we want to return this as a def.
- Value *Pointer = SI->getPointerOperand();
- uint64_t PointerSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
+ AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
// If we found a pointer, check if it could be the same as our pointer.
- AliasAnalysis::AliasResult R =
- AA->alias(Pointer, PointerSize, MemPtr, MemSize);
+ AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
if (R == AliasAnalysis::NoAlias)
continue;
- if (R == AliasAnalysis::MayAlias)
- return MemDepResult::getClobber(Inst);
- return MemDepResult::getDef(Inst);
+ if (R == AliasAnalysis::MustAlias)
+ return MemDepResult::getDef(Inst);
+ return MemDepResult::getClobber(Inst);
}
// If this is an allocation, and if we know that the accessed pointer is to
@@ -263,7 +339,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
// need to continue scanning until the malloc call.
if (isa<AllocaInst>(Inst) ||
(isa<CallInst>(Inst) && extractMallocCall(Inst))) {
- Value *AccessPtr = MemPtr->getUnderlyingObject();
+ const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
if (AccessPtr == Inst ||
AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
@@ -272,7 +348,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
}
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
- switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) {
+ switch (AA->getModRefInfo(Inst, MemLoc)) {
case AliasAnalysis::NoModRef:
// If the call has no effect on the queried pointer, just ignore it.
continue;
@@ -322,9 +398,6 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
BasicBlock *QueryParent = QueryInst->getParent();
- Value *MemPtr = 0;
- uint64_t MemSize = 0;
-
// Do the scan.
if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
// No dependence found. If this is the entry block of the function, it is a
@@ -333,65 +406,25 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
LocalCache = MemDepResult::getNonLocal();
else
LocalCache = MemDepResult::getClobber(QueryInst);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(QueryInst)) {
- // If this is a volatile store, don't mess around with it. Just return the
- // previous instruction as a clobber.
- if (SI->isVolatile())
- LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
- else {
- MemPtr = SI->getPointerOperand();
- MemSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
- }
- } else if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
- // If this is a volatile load, don't mess around with it. Just return the
- // previous instruction as a clobber.
- if (LI->isVolatile())
- LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
- else {
- MemPtr = LI->getPointerOperand();
- MemSize = AA->getTypeStoreSize(LI->getType());
- }
- } else if (const CallInst *CI = isFreeCall(QueryInst)) {
- MemPtr = CI->getArgOperand(0);
- // calls to free() erase the entire structure, not just a field.
- MemSize = ~0UL;
- } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
- int IntrinsicID = 0; // Intrinsic IDs start at 1.
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst);
- if (II)
- IntrinsicID = II->getIntrinsicID();
-
- switch (IntrinsicID) {
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::invariant_start:
- MemPtr = II->getArgOperand(1);
- MemSize = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
- break;
- case Intrinsic::invariant_end:
- MemPtr = II->getArgOperand(2);
- MemSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
- break;
- default:
+ } else {
+ AliasAnalysis::Location MemLoc;
+ AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
+ if (MemLoc.Ptr) {
+ // If we can do a pointer scan, make it happen.
+ bool isLoad = !(MR & AliasAnalysis::Mod);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+ isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
+
+ LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
+ QueryParent);
+ } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
CallSite QueryCS(QueryInst);
bool isReadOnly = AA->onlyReadsMemory(QueryCS);
LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
QueryParent);
- break;
- }
- } else {
- // Non-memory instruction.
- LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
- }
-
- // If we need to do a pointer scan, make it happen.
- if (MemPtr) {
- bool isLoad = !QueryInst->mayWriteToMemory();
- if (IntrinsicInst *II = dyn_cast<MemoryUseIntrinsic>(QueryInst)) {
- isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
- }
- LocalCache = getPointerDependencyFrom(MemPtr, MemSize, isLoad, ScanPos,
- QueryParent);
+ } else
+ // Non-memory instruction.
+ LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
}
// Remember the result!
@@ -565,31 +598,27 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
/// own block.
///
void MemoryDependenceAnalysis::
-getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
+getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
+ BasicBlock *FromBB,
SmallVectorImpl<NonLocalDepResult> &Result) {
- assert(Pointer->getType()->isPointerTy() &&
+ assert(Loc.Ptr->getType()->isPointerTy() &&
"Can't get pointer deps of a non-pointer!");
Result.clear();
- // We know that the pointer value is live into FromBB find the def/clobbers
- // from presecessors.
- const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
- uint64_t PointeeSize = AA->getTypeStoreSize(EltTy);
-
- PHITransAddr Address(Pointer, TD);
+ PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
// This is the set of blocks we've inspected, and the pointer we consider in
// each block. Because of critical edges, we currently bail out if querying
// a block with multiple different pointers. This can happen during PHI
// translation.
DenseMap<BasicBlock*, Value*> Visited;
- if (!getNonLocalPointerDepFromBB(Address, PointeeSize, isLoad, FromBB,
+ if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
Result, Visited, true))
return;
Result.clear();
Result.push_back(NonLocalDepResult(FromBB,
MemDepResult::getClobber(FromBB->begin()),
- Pointer));
+ const_cast<Value *>(Loc.Ptr)));
}
/// GetNonLocalInfoForBlock - Compute the memdep value for BB with
@@ -597,7 +626,7 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
/// lookup (which may use dirty cache info if available). If we do a lookup,
/// add the result to the cache.
MemDepResult MemoryDependenceAnalysis::
-GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
+GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
bool isLoad, BasicBlock *BB,
NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
@@ -631,15 +660,14 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
ScanPos = ExistingResult->getResult().getInst();
// Eliminating the dirty entry from 'Cache', so update the reverse info.
- ValueIsLoadPair CacheKey(Pointer, isLoad);
+ ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
} else {
++NumUncacheNonLocalPtr;
}
// Scan the block for the dependency.
- MemDepResult Dep = getPointerDependencyFrom(Pointer, PointeeSize, isLoad,
- ScanPos, BB);
+ MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
// If we had a dirty entry for the block, update it. Otherwise, just add
// a new entry.
@@ -658,7 +686,7 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
// update MemDep when we remove instructions.
Instruction *Inst = Dep.getInst();
assert(Inst && "Didn't depend on anything?");
- ValueIsLoadPair CacheKey(Pointer, isLoad);
+ ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
return Dep;
}
@@ -712,7 +740,8 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
/// not compute dependence information for some reason. This should be treated
/// as a clobber dependence on the first instruction in the predecessor block.
bool MemoryDependenceAnalysis::
-getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
+getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+ const AliasAnalysis::Location &Loc,
bool isLoad, BasicBlock *StartBB,
SmallVectorImpl<NonLocalDepResult> &Result,
DenseMap<BasicBlock*, Value*> &Visited,
@@ -720,14 +749,68 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
// Look up the cached info for Pointer.
ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
-
- std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo =
- &NonLocalPointerDeps[CacheKey];
- NonLocalDepInfo *Cache = &CacheInfo->second;
+
+ // Set up a temporary NLPI value. If the map doesn't yet have an entry for
+ // CacheKey, this value will be inserted as the associated value. Otherwise,
+ // it'll be ignored, and we'll have to check to see if the cached size and
+ // tbaa tag are consistent with the current query.
+ NonLocalPointerInfo InitialNLPI;
+ InitialNLPI.Size = Loc.Size;
+ InitialNLPI.TBAATag = Loc.TBAATag;
+
+ // Get the NLPI for CacheKey, inserting one into the map if it doesn't
+ // already have one.
+ std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
+ NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
+ NonLocalPointerInfo *CacheInfo = &Pair.first->second;
+
+ // If we already have a cache entry for this CacheKey, we may need to do some
+ // work to reconcile the cache entry and the current query.
+ if (!Pair.second) {
+ if (CacheInfo->Size < Loc.Size) {
+ // The query's Size is greater than the cached one. Throw out the
+ // cached data and procede with the query at the greater size.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ CacheInfo->Size = Loc.Size;
+ for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+ DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+ if (Instruction *Inst = DI->getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+ CacheInfo->NonLocalDeps.clear();
+ } else if (CacheInfo->Size > Loc.Size) {
+ // This query's Size is less than the cached one. Conservatively restart
+ // the query using the greater size.
+ return getNonLocalPointerDepFromBB(Pointer,
+ Loc.getWithNewSize(CacheInfo->Size),
+ isLoad, StartBB, Result, Visited,
+ SkipFirstBlock);
+ }
+
+ // If the query's TBAATag is inconsistent with the cached one,
+ // conservatively throw out the cached data and restart the query with
+ // no tag if needed.
+ if (CacheInfo->TBAATag != Loc.TBAATag) {
+ if (CacheInfo->TBAATag) {
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ CacheInfo->TBAATag = 0;
+ for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+ DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+ if (Instruction *Inst = DI->getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+ CacheInfo->NonLocalDeps.clear();
+ }
+ if (Loc.TBAATag)
+ return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
+ isLoad, StartBB, Result, Visited,
+ SkipFirstBlock);
+ }
+ }
+
+ NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
// If we have valid cached information for exactly the block we are
// investigating, just return it with no recomputation.
- if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+ if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
// We have a fully cached result for this query then we can just return the
// cached results and populate the visited set. However, we have to verify
// that we don't already have conflicting results for these blocks. Check
@@ -763,9 +846,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
// than its valid cache info. If empty, the result will be valid cache info,
// otherwise it isn't.
if (Cache->empty())
- CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+ CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
else
- CacheInfo->first = BBSkipFirstBlockPair();
+ CacheInfo->Pair = BBSkipFirstBlockPair();
SmallVector<BasicBlock*, 32> Worklist;
Worklist.push_back(StartBB);
@@ -790,8 +873,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
// Get the dependency info for Pointer in BB. If we have cached
// information, we will use it, otherwise we compute it.
DEBUG(AssertSorted(*Cache, NumSortedEntries));
- MemDepResult Dep = GetNonLocalInfoForBlock(Pointer.getAddr(), PointeeSize,
- isLoad, BB, Cache,
+ MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
NumSortedEntries);
// If we got a Def or Clobber, add this to the list of results.
@@ -888,7 +970,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
// queries. Mark this in NonLocalPointerDeps by setting the
// BBSkipFirstBlockPair pointer to null. This requires reuse of the
// cached value to do more work but not miss the phi trans failure.
- NonLocalPointerDeps[CacheKey].first = BBSkipFirstBlockPair();
+ NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
+ NLPI.Pair = BBSkipFirstBlockPair();
continue;
}
@@ -899,21 +982,23 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
// If we have a problem phi translating, fall through to the code below
// to handle the failure condition.
- if (getNonLocalPointerDepFromBB(PredPointer, PointeeSize, isLoad, Pred,
+ if (getNonLocalPointerDepFromBB(PredPointer,
+ Loc.getWithNewPtr(PredPointer.getAddr()),
+ isLoad, Pred,
Result, Visited))
goto PredTranslationFailure;
}
// Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
CacheInfo = &NonLocalPointerDeps[CacheKey];
- Cache = &CacheInfo->second;
+ Cache = &CacheInfo->NonLocalDeps;
NumSortedEntries = Cache->size();
// Since we did phi translation, the "Cache" set won't contain all of the
// results for the query. This is ok (we can still use it to accelerate
// specific block queries) but we can't do the fastpath "return all
// results from the set" Clear out the indicator for this.
- CacheInfo->first = BBSkipFirstBlockPair();
+ CacheInfo->Pair = BBSkipFirstBlockPair();
SkipFirstBlock = false;
continue;
@@ -922,7 +1007,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
if (Cache == 0) {
// Refresh the CacheInfo/Cache pointer if it got invalidated.
CacheInfo = &NonLocalPointerDeps[CacheKey];
- Cache = &CacheInfo->second;
+ Cache = &CacheInfo->NonLocalDeps;
NumSortedEntries = Cache->size();
}
@@ -930,7 +1015,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
// results for the query. This is ok (we can still use it to accelerate
// specific block queries) but we can't do the fastpath "return all
// results from the set". Clear out the indicator for this.
- CacheInfo->first = BBSkipFirstBlockPair();
+ CacheInfo->Pair = BBSkipFirstBlockPair();
// If *nothing* works, mark the pointer as being clobbered by the first
// instruction in this block.
@@ -972,7 +1057,7 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
// Remove all of the entries in the BB->val map. This involves removing
// instructions from the reverse map.
- NonLocalDepInfo &PInfo = It->second.second;
+ NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
Instruction *Target = PInfo[i].getResult().getInst();
@@ -1143,10 +1228,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
assert(P.getPointer() != RemInst &&
"Already removed NonLocalPointerDeps info for RemInst");
- NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].second;
+ NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
// The cache is not valid for any specific block anymore.
- NonLocalPointerDeps[P].first = BBSkipFirstBlockPair();
+ NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
// Update any entries for RemInst to use the instruction after it.
for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
@@ -1192,7 +1277,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
E = NonLocalPointerDeps.end(); I != E; ++I) {
assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
- const NonLocalDepInfo &Val = I->second.second;
+ const NonLocalDepInfo &Val = I->second.NonLocalDeps;
for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
II != E; ++II)
assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 2cc1c2a..e7e999c 100644
--- a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -30,7 +30,9 @@ namespace {
DebugInfoFinder Finder;
public:
static char ID; // Pass identification, replacement for typeid
- ModuleDebugInfoPrinter() : ModulePass(ID) {}
+ ModuleDebugInfoPrinter() : ModulePass(ID) {
+ initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -43,7 +45,7 @@ namespace {
char ModuleDebugInfoPrinter::ID = 0;
INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
- "Decodes module-level debug info", false, true);
+ "Decodes module-level debug info", false, true)
ModulePass *llvm::createModuleDebugInfoPrinterPass() {
return new ModuleDebugInfoPrinter();
diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
new file mode 100644
index 0000000..101c2d5
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
@@ -0,0 +1,88 @@
+//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the default implementation of the Alias Analysis interface
+// that simply returns "I don't know" for all queries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+namespace {
+ /// NoAA - This class implements the -no-aa pass, which always returns "I
+ /// don't know" for alias queries. NoAA is unlike other alias analysis
+ /// implementations, in that it does not chain to a previous analysis. As
+ /// such it doesn't follow many of the rules that other alias analyses must.
+ ///
+ struct NoAA : public ImmutablePass, public AliasAnalysis {
+ static char ID; // Class identification, replacement for typeinfo
+ NoAA() : ImmutablePass(ID) {
+ initializeNoAAPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ }
+
+ virtual void initializePass() {
+ // Note: NoAA does not call InitializeAliasAnalysis because it's
+ // special and does not support chaining.
+ TD = getAnalysisIfAvailable<TargetData>();
+ }
+
+ virtual AliasResult alias(const Location &LocA, const Location &LocB) {
+ return MayAlias;
+ }
+
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ return UnknownModRefBehavior;
+ }
+ virtual ModRefBehavior getModRefBehavior(const Function *F) {
+ return UnknownModRefBehavior;
+ }
+
+ virtual bool pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ return false;
+ }
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ return ModRef;
+ }
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return ModRef;
+ }
+
+ virtual void deleteValue(Value *V) {}
+ virtual void copyValue(Value *From, Value *To) {}
+ virtual void addEscapingUse(Use &U) {}
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char NoAA::ID = 0;
+INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
+ "No Alias Analysis (always returns 'may' alias)",
+ true, true, true)
+
+ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
index 8e4fa03..93da5a4 100644
--- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
@@ -12,22 +12,27 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Instructions.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
static bool CanPHITrans(Instruction *Inst) {
if (isa<PHINode>(Inst) ||
- isa<BitCastInst>(Inst) ||
isa<GetElementPtrInst>(Inst))
return true;
-
+
+ if (isa<CastInst>(Inst) &&
+ Inst->isSafeToSpeculativelyExecute())
+ return true;
+
if (Inst->getOpcode() == Instruction::Add &&
isa<ConstantInt>(Inst->getOperand(1)))
return true;
-
+
// cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
// if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
// cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
@@ -50,7 +55,7 @@ static bool VerifySubExpr(Value *Expr,
// If this is a non-instruction value, there is nothing to do.
Instruction *I = dyn_cast<Instruction>(Expr);
if (I == 0) return true;
-
+
// If it's an instruction, it is either in Tmp or its operands recursively
// are.
SmallVectorImpl<Instruction*>::iterator Entry =
@@ -59,16 +64,17 @@ static bool VerifySubExpr(Value *Expr,
InstInputs.erase(Entry);
return true;
}
-
+
// If it isn't in the InstInputs list it is a subexpr incorporated into the
// address. Sanity check that it is phi translatable.
if (!CanPHITrans(I)) {
- errs() << "Non phi translatable instruction found in PHITransAddr, either "
- "something is missing from InstInputs or CanPHITrans is wrong:\n";
+ errs() << "Non phi translatable instruction found in PHITransAddr:\n";
errs() << *I << '\n';
+ llvm_unreachable("Either something is missing from InstInputs or "
+ "CanPHITrans is wrong.");
return false;
}
-
+
// Validate the operands of the instruction.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (!VerifySubExpr(I->getOperand(i), InstInputs))
@@ -82,19 +88,20 @@ static bool VerifySubExpr(Value *Expr,
/// returns false.
bool PHITransAddr::Verify() const {
if (Addr == 0) return true;
-
- SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
-
+
+ SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
+
if (!VerifySubExpr(Addr, Tmp))
return false;
-
+
if (!Tmp.empty()) {
- errs() << "PHITransAddr inconsistent, contains extra instructions:\n";
+ errs() << "PHITransAddr contains extra instructions:\n";
for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n";
+ llvm_unreachable("This is unexpected.");
return false;
}
-
+
// a-ok.
return true;
}
@@ -111,11 +118,11 @@ bool PHITransAddr::IsPotentiallyPHITranslatable() const {
}
-static void RemoveInstInputs(Value *V,
+static void RemoveInstInputs(Value *V,
SmallVectorImpl<Instruction*> &InstInputs) {
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return;
-
+
// If the instruction is in the InstInputs list, remove it.
SmallVectorImpl<Instruction*>::iterator Entry =
std::find(InstInputs.begin(), InstInputs.end(), I);
@@ -123,9 +130,9 @@ static void RemoveInstInputs(Value *V,
InstInputs.erase(Entry);
return;
}
-
+
assert(!isa<PHINode>(I) && "Error, removing something that isn't an input");
-
+
// Otherwise, it must have instruction inputs itself. Zap them recursively.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
@@ -139,7 +146,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// If this is a non-instruction value, it can't require PHI translation.
Instruction *Inst = dyn_cast<Instruction>(V);
if (Inst == 0) return V;
-
+
// Determine whether 'Inst' is an input to our PHI translatable expression.
bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
@@ -156,16 +163,16 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// In either case, the instruction itself isn't an input any longer.
InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
-
+
// If this is a PHI, go ahead and translate it.
if (PHINode *PN = dyn_cast<PHINode>(Inst))
return AddAsInput(PN->getIncomingValueForBlock(PredBB));
-
+
// If this is a non-phi value, and it is analyzable, we can incorporate it
// into the expression by making all instruction operands be inputs.
if (!CanPHITrans(Inst))
return 0;
-
+
// All instruction operands are now inputs (and of course, they may also be
// defined in this block, so they may need to be phi translated themselves.
for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
@@ -176,31 +183,34 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// Ok, it must be an intermediate result (either because it started that way
// or because we just incorporated it into the expression). See if its
// operands need to be phi translated, and if so, reconstruct it.
-
- if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
- Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB, DT);
+
+ if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+ if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+ Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
if (PHIIn == 0) return 0;
- if (PHIIn == BC->getOperand(0))
- return BC;
-
+ if (PHIIn == Cast->getOperand(0))
+ return Cast;
+
// Find an available version of this cast.
-
+
// Constants are trivial to find.
if (Constant *C = dyn_cast<Constant>(PHIIn))
- return AddAsInput(ConstantExpr::getBitCast(C, BC->getType()));
-
- // Otherwise we have to see if a bitcasted version of the incoming pointer
+ return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(),
+ C, Cast->getType()));
+
+ // Otherwise we have to see if a casted version of the incoming pointer
// is available. If so, we can use it, otherwise we have to fail.
for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
UI != E; ++UI) {
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI))
- if (BCI->getType() == BC->getType() &&
- (!DT || DT->dominates(BCI->getParent(), PredBB)))
- return BCI;
+ if (CastInst *CastI = dyn_cast<CastInst>(*UI))
+ if (CastI->getOpcode() == Cast->getOpcode() &&
+ CastI->getType() == Cast->getType() &&
+ (!DT || DT->dominates(CastI->getParent(), PredBB)))
+ return CastI;
}
return 0;
}
-
+
// Handle getelementptr with at least one PHI translatable operand.
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
SmallVector<Value*, 8> GEPOps;
@@ -208,22 +218,22 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
if (GEPOp == 0) return 0;
-
+
AnyChanged |= GEPOp != GEP->getOperand(i);
GEPOps.push_back(GEPOp);
}
-
+
if (!AnyChanged)
return GEP;
-
+
// Simplify the GEP to handle 'gep x, 0' -> x etc.
- if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD)) {
+ if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD, DT)) {
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
RemoveInstInputs(GEPOps[i], InstInputs);
-
+
return AddAsInput(V);
}
-
+
// Scan to see if we have this GEP available.
Value *APHIOp = GEPOps[0];
for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
@@ -245,7 +255,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
}
return 0;
}
-
+
// Handle add with a constant RHS.
if (Inst->getOpcode() == Instruction::Add &&
isa<ConstantInt>(Inst->getOperand(1))) {
@@ -253,10 +263,10 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
-
+
Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
if (LHS == 0) return 0;
-
+
// If the PHI translated LHS is an add of a constant, fold the immediates.
if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
if (BOp->getOpcode() == Instruction::Add)
@@ -264,16 +274,16 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
LHS = BOp->getOperand(0);
RHS = ConstantExpr::getAdd(RHS, CI);
isNSW = isNUW = false;
-
+
// If the old 'LHS' was an input, add the new 'LHS' as an input.
if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) {
RemoveInstInputs(BOp, InstInputs);
AddAsInput(LHS);
}
}
-
+
// See if the add simplifies away.
- if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD)) {
+ if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) {
// If we simplified the operands, the LHS is no longer an input, but Res
// is.
RemoveInstInputs(LHS, InstInputs);
@@ -283,7 +293,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// If we didn't modify the add, just return it.
if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1))
return Inst;
-
+
// Otherwise, see if we have this add available somewhere.
for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
UI != E; ++UI) {
@@ -294,10 +304,10 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
(!DT || DT->dominates(BO->getParent(), PredBB)))
return BO;
}
-
+
return 0;
}
-
+
// Otherwise, we failed.
return 0;
}
@@ -335,13 +345,13 @@ PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
const DominatorTree &DT,
SmallVectorImpl<Instruction*> &NewInsts) {
unsigned NISize = NewInsts.size();
-
+
// Attempt to PHI translate with insertion.
Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts);
-
+
// If successful, return the new value.
if (Addr) return Addr;
-
+
// If not, destroy any intermediate instructions inserted.
while (NewInsts.size() != NISize)
NewInsts.pop_back_val()->eraseFromParent();
@@ -367,21 +377,23 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
// If we don't have an available version of this value, it must be an
// instruction.
Instruction *Inst = cast<Instruction>(InVal);
-
- // Handle bitcast of PHI translatable value.
- if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
- Value *OpVal = InsertPHITranslatedSubExpr(BC->getOperand(0),
+
+ // Handle cast of PHI translatable value.
+ if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+ if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+ Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
CurBB, PredBB, DT, NewInsts);
if (OpVal == 0) return 0;
-
- // Otherwise insert a bitcast at the end of PredBB.
- BitCastInst *New = new BitCastInst(OpVal, InVal->getType(),
- InVal->getName()+".phi.trans.insert",
- PredBB->getTerminator());
+
+ // Otherwise insert a cast at the end of PredBB.
+ CastInst *New = CastInst::Create(Cast->getOpcode(),
+ OpVal, InVal->getType(),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
NewInsts.push_back(New);
return New;
}
-
+
// Handle getelementptr with at least one PHI operand.
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
SmallVector<Value*, 8> GEPOps;
@@ -392,8 +404,8 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
if (OpVal == 0) return 0;
GEPOps.push_back(OpVal);
}
-
- GetElementPtrInst *Result =
+
+ GetElementPtrInst *Result =
GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
InVal->getName()+".phi.trans.insert",
PredBB->getTerminator());
@@ -401,12 +413,12 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
NewInsts.push_back(Result);
return Result;
}
-
+
#if 0
// FIXME: This code works, but it is unclear that we actually want to insert
// a big chain of computation in order to make a value available in a block.
// This needs to be evaluated carefully to consider its cost trade offs.
-
+
// Handle add with a constant RHS.
if (Inst->getOpcode() == Instruction::Add &&
isa<ConstantInt>(Inst->getOperand(1))) {
@@ -414,7 +426,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
CurBB, PredBB, DT, NewInsts);
if (OpVal == 0) return 0;
-
+
BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
InVal->getName()+".phi.trans.insert",
PredBB->getTerminator());
@@ -424,6 +436,6 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
return Res;
}
#endif
-
+
return 0;
}
diff --git a/contrib/llvm/lib/Analysis/PathNumbering.cpp b/contrib/llvm/lib/Analysis/PathNumbering.cpp
new file mode 100644
index 0000000..5d3f6bb
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathNumbering.cpp
@@ -0,0 +1,525 @@
+//===- PathNumbering.cpp --------------------------------------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Ball-Larus path numbers uniquely identify paths through a directed acyclic
+// graph (DAG) [Ball96]. For a CFG backedges are removed and replaced by phony
+// edges to obtain a DAG, and thus the unique path numbers [Ball96].
+//
+// The purpose of this analysis is to enumerate the edges in a CFG in order
+// to obtain paths from path numbers in a convenient manner. As described in
+// [Ball96] edges can be enumerated such that given a path number by following
+// the CFG and updating the path number, the path is obtained.
+//
+// [Ball96]
+// T. Ball and J. R. Larus. "Efficient Path Profiling."
+// International Symposium on Microarchitecture, pages 46-57, 1996.
+// http://portal.acm.org/citation.cfm?id=243857
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "ball-larus-numbering"
+
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <map>
+#include <queue>
+#include <set>
+#include <stack>
+#include <string>
+#include <utility>
+#include <vector>
+#include <sstream>
+
+using namespace llvm;
+
+// Are we enabling early termination
+static cl::opt<bool> ProcessEarlyTermination(
+ "path-profile-early-termination", cl::Hidden,
+ cl::desc("In path profiling, insert extra instrumentation to account for "
+ "unexpected function termination."));
+
+// Returns the basic block for the BallLarusNode
+BasicBlock* BallLarusNode::getBlock() {
+ return(_basicBlock);
+}
+
+// Returns the number of paths to the exit starting at the node.
+unsigned BallLarusNode::getNumberPaths() {
+ return(_numberPaths);
+}
+
+// Sets the number of paths to the exit starting at the node.
+void BallLarusNode::setNumberPaths(unsigned numberPaths) {
+ _numberPaths = numberPaths;
+}
+
+// Gets the NodeColor used in graph algorithms.
+BallLarusNode::NodeColor BallLarusNode::getColor() {
+ return(_color);
+}
+
+// Sets the NodeColor used in graph algorithms.
+void BallLarusNode::setColor(BallLarusNode::NodeColor color) {
+ _color = color;
+}
+
+// Returns an iterator over predecessor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::predBegin() {
+ return(_predEdges.begin());
+}
+
+// Returns the end sentinel for the predecessor iterator.
+BLEdgeIterator BallLarusNode::predEnd() {
+ return(_predEdges.end());
+}
+
+// Returns the number of predecessor edges. Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberPredEdges() {
+ return(_predEdges.size());
+}
+
+// Returns an iterator over successor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::succBegin() {
+ return(_succEdges.begin());
+}
+
+// Returns the end sentinel for the successor iterator.
+BLEdgeIterator BallLarusNode::succEnd() {
+ return(_succEdges.end());
+}
+
+// Returns the number of successor edges. Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberSuccEdges() {
+ return(_succEdges.size());
+}
+
+// Add an edge to the predecessor list.
+void BallLarusNode::addPredEdge(BallLarusEdge* edge) {
+ _predEdges.push_back(edge);
+}
+
+// Remove an edge from the predecessor list.
+void BallLarusNode::removePredEdge(BallLarusEdge* edge) {
+ removeEdge(_predEdges, edge);
+}
+
+// Add an edge to the successor list.
+void BallLarusNode::addSuccEdge(BallLarusEdge* edge) {
+ _succEdges.push_back(edge);
+}
+
+// Remove an edge from the successor list.
+void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) {
+ removeEdge(_succEdges, edge);
+}
+
+// Returns the name of the BasicBlock being represented. If BasicBlock
+// is null then returns "<null>". If BasicBlock has no name, then
+// "<unnamed>" is returned. Intended for use with debug output.
+std::string BallLarusNode::getName() {
+ std::stringstream name;
+
+ if(getBlock() != NULL) {
+ if(getBlock()->hasName()) {
+ std::string tempName(getBlock()->getName());
+ name << tempName.c_str() << " (" << _uid << ")";
+ } else
+ name << "<unnamed> (" << _uid << ")";
+ } else
+ name << "<null> (" << _uid << ")";
+
+ return name.str();
+}
+
+// Removes an edge from an edgeVector. Used by removePredEdge and
+// removeSuccEdge.
+void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) {
+ // TODO: Avoid linear scan by using a set instead
+ for(BLEdgeIterator i = v.begin(),
+ end = v.end();
+ i != end;
+ ++i) {
+ if((*i) == e) {
+ v.erase(i);
+ break;
+ }
+ }
+}
+
+// Returns the source node of this edge.
+BallLarusNode* BallLarusEdge::getSource() const {
+ return(_source);
+}
+
+// Returns the target node of this edge.
+BallLarusNode* BallLarusEdge::getTarget() const {
+ return(_target);
+}
+
+// Sets the type of the edge.
+BallLarusEdge::EdgeType BallLarusEdge::getType() const {
+ return _edgeType;
+}
+
+// Gets the type of the edge.
+void BallLarusEdge::setType(EdgeType type) {
+ _edgeType = type;
+}
+
+// Returns the weight of this edge. Used to decode path numbers to sequences
+// of basic blocks.
+unsigned BallLarusEdge::getWeight() {
+ return(_weight);
+}
+
+// Sets the weight of the edge. Used during path numbering.
+void BallLarusEdge::setWeight(unsigned weight) {
+ _weight = weight;
+}
+
+// Gets the phony edge originating at the root.
+BallLarusEdge* BallLarusEdge::getPhonyRoot() {
+ return _phonyRoot;
+}
+
+// Sets the phony edge originating at the root.
+void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) {
+ _phonyRoot = phonyRoot;
+}
+
+// Gets the phony edge terminating at the exit.
+BallLarusEdge* BallLarusEdge::getPhonyExit() {
+ return _phonyExit;
+}
+
+// Sets the phony edge terminating at the exit.
+void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) {
+ _phonyExit = phonyExit;
+}
+
+// Gets the associated real edge if this is a phony edge.
+BallLarusEdge* BallLarusEdge::getRealEdge() {
+ return _realEdge;
+}
+
+// Sets the associated real edge if this is a phony edge.
+void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) {
+ _realEdge = realEdge;
+}
+
+// Returns the duplicate number of the edge.
+unsigned BallLarusEdge::getDuplicateNumber() {
+ return(_duplicateNumber);
+}
+
+// Initialization that requires virtual functions which are not fully
+// functional in the constructor.
+void BallLarusDag::init() {
+ BLBlockNodeMap inDag;
+ std::stack<BallLarusNode*> dfsStack;
+
+ _root = addNode(&(_function.getEntryBlock()));
+ _exit = addNode(NULL);
+
+ // start search from root
+ dfsStack.push(getRoot());
+
+ // dfs to add each bb into the dag
+ while(dfsStack.size())
+ buildNode(inDag, dfsStack);
+
+ // put in the final edge
+ addEdge(getExit(),getRoot(),0);
+}
+
+// Frees all memory associated with the DAG.
+BallLarusDag::~BallLarusDag() {
+ for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end;
+ ++edge)
+ delete (*edge);
+
+ for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end;
+ ++node)
+ delete (*node);
+}
+
+// Calculate the path numbers by assigning edge increments as prescribed
+// in Ball-Larus path profiling.
+void BallLarusDag::calculatePathNumbers() {
+ BallLarusNode* node;
+ std::queue<BallLarusNode*> bfsQueue;
+ bfsQueue.push(getExit());
+
+ while(bfsQueue.size() > 0) {
+ node = bfsQueue.front();
+
+ DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n");
+
+ bfsQueue.pop();
+ unsigned prevPathNumber = node->getNumberPaths();
+ calculatePathNumbersFrom(node);
+
+ // Check for DAG splitting
+ if( node->getNumberPaths() > 100000000 && node != getRoot() ) {
+ // Add new phony edge from the split-node to the DAG's exit
+ BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
+ exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+
+ // Counters to handle the possibilty of a multi-graph
+ BasicBlock* oldTarget = 0;
+ unsigned duplicateNumber = 0;
+
+ // Iterate through each successor edge, adding phony edges
+ for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+ succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) {
+
+ if( (*succ)->getType() == BallLarusEdge::NORMAL ) {
+ // is this edge a duplicate?
+ if( oldTarget != (*succ)->getTarget()->getBlock() )
+ duplicateNumber = 0;
+
+ // create the new phony edge: root -> succ
+ BallLarusEdge* rootEdge =
+ addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++);
+ rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+ rootEdge->setRealEdge(*succ);
+
+ // split on this edge and reference it's exit/root phony edges
+ (*succ)->setType(BallLarusEdge::SPLITEDGE);
+ (*succ)->setPhonyRoot(rootEdge);
+ (*succ)->setPhonyExit(exitEdge);
+ (*succ)->setWeight(0);
+ }
+ }
+
+ calculatePathNumbersFrom(node);
+ }
+
+ DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", "
+ << node->getNumberPaths() << ".\n");
+
+ if(prevPathNumber == 0 && node->getNumberPaths() != 0) {
+ DEBUG(dbgs() << "node ready : " << node->getName() << "\n");
+ for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd();
+ pred != end; pred++) {
+ if( (*pred)->getType() == BallLarusEdge::BACKEDGE ||
+ (*pred)->getType() == BallLarusEdge::SPLITEDGE )
+ continue;
+
+ BallLarusNode* nextNode = (*pred)->getSource();
+ // not yet visited?
+ if(nextNode->getNumberPaths() == 0)
+ bfsQueue.push(nextNode);
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n");
+}
+
+// Returns the number of paths for the Dag.
+unsigned BallLarusDag::getNumberOfPaths() {
+ return(getRoot()->getNumberPaths());
+}
+
+// Returns the root (i.e. entry) node for the DAG.
+BallLarusNode* BallLarusDag::getRoot() {
+ return _root;
+}
+
+// Returns the exit node for the DAG.
+BallLarusNode* BallLarusDag::getExit() {
+ return _exit;
+}
+
+// Returns the function for the DAG.
+Function& BallLarusDag::getFunction() {
+ return(_function);
+}
+
+// Clears the node colors.
+void BallLarusDag::clearColors(BallLarusNode::NodeColor color) {
+ for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++)
+ (*nodeIt)->setColor(color);
+}
+
+// Processes one node and its imediate edges for building the DAG.
+void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
+ BallLarusNode* currentNode = dfsStack.top();
+ BasicBlock* currentBlock = currentNode->getBlock();
+
+ if(currentNode->getColor() != BallLarusNode::WHITE) {
+ // we have already visited this node
+ dfsStack.pop();
+ currentNode->setColor(BallLarusNode::BLACK);
+ } else {
+ // are there any external procedure calls?
+ if( ProcessEarlyTermination ) {
+ for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(),
+ bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd;
+ bbCurrent++ ) {
+ Instruction& instr = *bbCurrent;
+ if( instr.getOpcode() == Instruction::Call ) {
+ BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0);
+ callEdge->setType(BallLarusEdge::CALLEDGE_PHONY);
+ break;
+ }
+ }
+ }
+
+ TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
+ if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator)
+ || isa<UnwindInst>(terminator))
+ addEdge(currentNode, getExit(),0);
+
+ currentNode->setColor(BallLarusNode::GRAY);
+ inDag[currentBlock] = currentNode;
+
+ BasicBlock* oldSuccessor = 0;
+ unsigned duplicateNumber = 0;
+
+ // iterate through this node's successors
+ for(succ_iterator successor = succ_begin(currentBlock),
+ succEnd = succ_end(currentBlock); successor != succEnd;
+ oldSuccessor = *successor, ++successor ) {
+ BasicBlock* succBB = *successor;
+
+ // is this edge a duplicate?
+ if (oldSuccessor == succBB)
+ duplicateNumber++;
+ else
+ duplicateNumber = 0;
+
+ buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber);
+ }
+ }
+}
+
+// Process an edge in the CFG for DAG building.
+void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>&
+ dfsStack, BallLarusNode* currentNode,
+ BasicBlock* succBB, unsigned duplicateCount) {
+ BallLarusNode* succNode = inDag[succBB];
+
+ if(succNode && succNode->getColor() == BallLarusNode::BLACK) {
+ // visited node and forward edge
+ addEdge(currentNode, succNode, duplicateCount);
+ } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) {
+ // visited node and back edge
+ DEBUG(dbgs() << "Backedge detected.\n");
+ addBackedge(currentNode, succNode, duplicateCount);
+ } else {
+ BallLarusNode* childNode;
+ // not visited node and forward edge
+ if(succNode) // an unvisited node that is child of a gray node
+ childNode = succNode;
+ else { // an unvisited node that is a child of a an unvisted node
+ childNode = addNode(succBB);
+ inDag[succBB] = childNode;
+ }
+ addEdge(currentNode, childNode, duplicateCount);
+ dfsStack.push(childNode);
+ }
+}
+
+// The weight on each edge is the increment required along any path that
+// contains that edge.
+void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) {
+ if(node == getExit())
+ // The Exit node must be base case
+ node->setNumberPaths(1);
+ else {
+ unsigned sumPaths = 0;
+ BallLarusNode* succNode;
+
+ for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+ succ != end; succ++) {
+ if( (*succ)->getType() == BallLarusEdge::BACKEDGE ||
+ (*succ)->getType() == BallLarusEdge::SPLITEDGE )
+ continue;
+
+ (*succ)->setWeight(sumPaths);
+ succNode = (*succ)->getTarget();
+
+ if( !succNode->getNumberPaths() )
+ return;
+ sumPaths += succNode->getNumberPaths();
+ }
+
+ node->setNumberPaths(sumPaths);
+ }
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) {
+ return( new BallLarusNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source,
+ BallLarusNode* target,
+ unsigned duplicateCount) {
+ return( new BallLarusEdge(source, target, duplicateCount) );
+}
+
+// Proxy to node's constructor. Updates the DAG state.
+BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) {
+ BallLarusNode* newNode = createNode(BB);
+ _nodes.push_back(newNode);
+ return( newNode );
+}
+
+// Proxy to edge's constructor. Updates the DAG state.
+BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source,
+ BallLarusNode* target,
+ unsigned duplicateCount) {
+ BallLarusEdge* newEdge = createEdge(source, target, duplicateCount);
+ _edges.push_back(newEdge);
+ source->addSuccEdge(newEdge);
+ target->addPredEdge(newEdge);
+ return(newEdge);
+}
+
+// Adds a backedge with its phony edges. Updates the DAG state.
+void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target,
+ unsigned duplicateCount) {
+ BallLarusEdge* childEdge = addEdge(source, target, duplicateCount);
+ childEdge->setType(BallLarusEdge::BACKEDGE);
+
+ childEdge->setPhonyRoot(addEdge(getRoot(), target,0));
+ childEdge->setPhonyExit(addEdge(source, getExit(),0));
+
+ childEdge->getPhonyRoot()->setRealEdge(childEdge);
+ childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY);
+
+ childEdge->getPhonyExit()->setRealEdge(childEdge);
+ childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY);
+ _backEdges.push_back(childEdge);
+}
diff --git a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp
new file mode 100644
index 0000000..b361d3f
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp
@@ -0,0 +1,434 @@
+//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface used by optimizers to load path profiles,
+// and provides a loader pass which reads a path profile file.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-info"
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstdio>
+
+using namespace llvm;
+
+// command line option for loading path profiles
+static cl::opt<std::string>
+PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden);
+
+namespace {
+ class PathProfileLoaderPass : public ModulePass, public PathProfileInfo {
+ public:
+ PathProfileLoaderPass() : ModulePass(ID) { }
+ ~PathProfileLoaderPass();
+
+ // this pass doesn't change anything (only loads information)
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ // the full name of the loader pass
+ virtual const char* getPassName() const {
+ return "Path Profiling Information Loader";
+ }
+
+ // required since this pass implements multiple inheritance
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &PathProfileInfo::ID)
+ return (PathProfileInfo*)this;
+ return this;
+ }
+
+ // entry point to run the pass
+ bool runOnModule(Module &M);
+
+ // pass identification
+ static char ID;
+
+ private:
+ // make a reference table to refer to function by number
+ void buildFunctionRefs(Module &M);
+
+ // process argument info of a program from the input file
+ void handleArgumentInfo();
+
+ // process path number information from the input file
+ void handlePathInfo();
+
+ // array of references to the functions in the module
+ std::vector<Function*> _functions;
+
+ // path profile file handle
+ FILE* _file;
+
+ // path profile file name
+ std::string _filename;
+ };
+}
+
+// register PathLoader
+char PathProfileLoaderPass::ID = 0;
+
+INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information",
+ NoPathProfileInfo)
+INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo,
+ "path-profile-loader",
+ "Load path profile information from file",
+ false, true, false)
+
+char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID;
+
+// link PathLoader as a pass, and make it available as an optimisation
+ModulePass *llvm::createPathProfileLoaderPass() {
+ return new PathProfileLoaderPass;
+}
+
+// ----------------------------------------------------------------------------
+// PathEdge implementation
+//
+ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target,
+ unsigned duplicateNumber)
+ : _source(source), _target(target), _duplicateNumber(duplicateNumber) {}
+
+// ----------------------------------------------------------------------------
+// Path implementation
+//
+
+ProfilePath::ProfilePath (unsigned int number, unsigned int count,
+ double countStdDev, PathProfileInfo* ppi)
+ : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {}
+
+double ProfilePath::getFrequency() const {
+ return 100 * double(_count) /
+ double(_ppi->_functionPathCounts[_ppi->_currentFunction]);
+}
+
+static BallLarusEdge* getNextEdge (BallLarusNode* node,
+ unsigned int pathNumber) {
+ BallLarusEdge* best = 0;
+
+ for( BLEdgeIterator next = node->succBegin(),
+ end = node->succEnd(); next != end; next++ ) {
+ if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges
+ (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges
+ (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber
+ (!best || (best->getWeight() < (*next)->getWeight())) ) // best one?
+ best = *next;
+ }
+
+ return best;
+}
+
+ProfilePathEdgeVector* ProfilePath::getPathEdges() const {
+ BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+ unsigned int increment = _number;
+ ProfilePathEdgeVector* pev = new ProfilePathEdgeVector;
+
+ while (currentNode != _ppi->_currentDag->getExit()) {
+ BallLarusEdge* next = getNextEdge(currentNode, increment);
+
+ increment -= next->getWeight();
+
+ if( next->getType() != BallLarusEdge::BACKEDGE_PHONY &&
+ next->getType() != BallLarusEdge::SPLITEDGE_PHONY &&
+ next->getTarget() != _ppi->_currentDag->getExit() )
+ pev->push_back(ProfilePathEdge(
+ next->getSource()->getBlock(),
+ next->getTarget()->getBlock(),
+ next->getDuplicateNumber()));
+
+ if( next->getType() == BallLarusEdge::BACKEDGE_PHONY &&
+ next->getTarget() == _ppi->_currentDag->getExit() )
+ pev->push_back(ProfilePathEdge(
+ next->getRealEdge()->getSource()->getBlock(),
+ next->getRealEdge()->getTarget()->getBlock(),
+ next->getDuplicateNumber()));
+
+ if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY &&
+ next->getSource() == _ppi->_currentDag->getRoot() )
+ pev->push_back(ProfilePathEdge(
+ next->getRealEdge()->getSource()->getBlock(),
+ next->getRealEdge()->getTarget()->getBlock(),
+ next->getDuplicateNumber()));
+
+ // set the new node
+ currentNode = next->getTarget();
+ }
+
+ return pev;
+}
+
+ProfilePathBlockVector* ProfilePath::getPathBlocks() const {
+ BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+ unsigned int increment = _number;
+ ProfilePathBlockVector* pbv = new ProfilePathBlockVector;
+
+ while (currentNode != _ppi->_currentDag->getExit()) {
+ BallLarusEdge* next = getNextEdge(currentNode, increment);
+ increment -= next->getWeight();
+
+ // add block to the block list if it is a real edge
+ if( next->getType() == BallLarusEdge::NORMAL)
+ pbv->push_back (currentNode->getBlock());
+ // make the back edge the last edge since we are at the end
+ else if( next->getTarget() == _ppi->_currentDag->getExit() ) {
+ pbv->push_back (currentNode->getBlock());
+ pbv->push_back (next->getRealEdge()->getTarget()->getBlock());
+ }
+
+ // set the new node
+ currentNode = next->getTarget();
+ }
+
+ return pbv;
+}
+
+BasicBlock* ProfilePath::getFirstBlockInPath() const {
+ BallLarusNode* root = _ppi->_currentDag->getRoot();
+ BallLarusEdge* edge = getNextEdge(root, _number);
+
+ if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+ edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) )
+ return edge->getTarget()->getBlock();
+
+ return root->getBlock();
+}
+
+// ----------------------------------------------------------------------------
+// PathProfileInfo implementation
+//
+
+// Pass identification
+char llvm::PathProfileInfo::ID = 0;
+
+PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) {
+}
+
+PathProfileInfo::~PathProfileInfo() {
+ if (_currentDag)
+ delete _currentDag;
+}
+
+// set the function for which paths are currently begin processed
+void PathProfileInfo::setCurrentFunction(Function* F) {
+ // Make sure it exists
+ if (!F) return;
+
+ if (_currentDag)
+ delete _currentDag;
+
+ _currentFunction = F;
+ _currentDag = new BallLarusDag(*F);
+ _currentDag->init();
+ _currentDag->calculatePathNumbers();
+}
+
+// get the function for which paths are currently being processed
+Function* PathProfileInfo::getCurrentFunction() const {
+ return _currentFunction;
+}
+
+// get the entry block of the function
+BasicBlock* PathProfileInfo::getCurrentFunctionEntry() {
+ return _currentDag->getRoot()->getBlock();
+}
+
+// return the path based on its number
+ProfilePath* PathProfileInfo::getPath(unsigned int number) {
+ return _functionPaths[_currentFunction][number];
+}
+
+// return the number of paths which a function may potentially execute
+unsigned int PathProfileInfo::getPotentialPathCount() {
+ return _currentDag ? _currentDag->getNumberOfPaths() : 0;
+}
+
+// return an iterator for the beginning of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathBegin() {
+ return _functionPaths[_currentFunction].begin();
+}
+
+// return an iterator for the end of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathEnd() {
+ return _functionPaths[_currentFunction].end();
+}
+
+// returns the total number of paths run in the function
+unsigned int PathProfileInfo::pathsRun() {
+ return _currentFunction ? _functionPaths[_currentFunction].size() : 0;
+}
+
+// ----------------------------------------------------------------------------
+// PathLoader implementation
+//
+
+// remove all generated paths
+PathProfileLoaderPass::~PathProfileLoaderPass() {
+ for( FunctionPathIterator funcNext = _functionPaths.begin(),
+ funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++)
+ for( ProfilePathIterator pathNext = funcNext->second.begin(),
+ pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++)
+ delete pathNext->second;
+}
+
+// entry point of the pass; this loads and parses a file
+bool PathProfileLoaderPass::runOnModule(Module &M) {
+ // get the filename and setup the module's function references
+ _filename = PathProfileInfoFilename;
+ buildFunctionRefs (M);
+
+ if (!(_file = fopen(_filename.c_str(), "rb"))) {
+ errs () << "error: input '" << _filename << "' file does not exist.\n";
+ return false;
+ }
+
+ ProfilingType profType;
+
+ while( fread(&profType, sizeof(ProfilingType), 1, _file) ) {
+ switch (profType) {
+ case ArgumentInfo:
+ handleArgumentInfo ();
+ break;
+ case PathInfo:
+ handlePathInfo ();
+ break;
+ default:
+ errs () << "error: bad path profiling file syntax, " << profType << "\n";
+ fclose (_file);
+ return false;
+ }
+ }
+
+ fclose (_file);
+
+ return true;
+}
+
+// create a reference table for functions defined in the path profile file
+void PathProfileLoaderPass::buildFunctionRefs (Module &M) {
+ _functions.push_back(0); // make the 0 index a null pointer
+
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+ if (F->isDeclaration())
+ continue;
+ _functions.push_back(F);
+ }
+}
+
+// handle command like argument infor in the output file
+void PathProfileLoaderPass::handleArgumentInfo() {
+ // get the argument list's length
+ unsigned savedArgsLength;
+ if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) {
+ errs() << "warning: argument info header/data mismatch\n";
+ return;
+ }
+
+ // allocate a buffer, and get the arguments
+ char* args = new char[savedArgsLength+1];
+ if( fread(args, 1, savedArgsLength, _file) != savedArgsLength )
+ errs() << "warning: argument info header/data mismatch\n";
+
+ args[savedArgsLength] = '\0';
+ argList = std::string(args);
+ delete [] args; // cleanup dynamic string
+
+ // byte alignment
+ if (savedArgsLength & 3)
+ fseek(_file, 4-(savedArgsLength&3), SEEK_CUR);
+}
+
+// Handle path profile information in the output file
+void PathProfileLoaderPass::handlePathInfo () {
+ // get the number of functions in this profile
+ unsigned functionCount;
+ if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) {
+ errs() << "warning: path info header/data mismatch\n";
+ return;
+ }
+
+ // gather path information for each function
+ for (unsigned i = 0; i < functionCount; i++) {
+ PathProfileHeader pathHeader;
+ if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) {
+ errs() << "warning: bad header for path function info\n";
+ break;
+ }
+
+ Function* f = _functions[pathHeader.fnNumber];
+
+ // dynamically allocate a table to store path numbers
+ PathProfileTableEntry* pathTable =
+ new PathProfileTableEntry[pathHeader.numEntries];
+
+ if( fread(pathTable, sizeof(PathProfileTableEntry),
+ pathHeader.numEntries, _file) != pathHeader.numEntries) {
+ delete [] pathTable;
+ errs() << "warning: path function info header/data mismatch\n";
+ return;
+ }
+
+ // Build a new path for the current function
+ unsigned int totalPaths = 0;
+ for (unsigned int j = 0; j < pathHeader.numEntries; j++) {
+ totalPaths += pathTable[j].pathCounter;
+ _functionPaths[f][pathTable[j].pathNumber]
+ = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter,
+ 0, this);
+ }
+
+ _functionPathCounts[f] = totalPaths;
+
+ delete [] pathTable;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// NoProfile PathProfileInfo implementation
+//
+
+namespace {
+ struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo {
+ static char ID; // Class identification, replacement for typeinfo
+ NoPathProfileInfo() : ImmutablePass(ID) {
+ initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &PathProfileInfo::ID)
+ return (PathProfileInfo*)this;
+ return this;
+ }
+
+ virtual const char *getPassName() const {
+ return "NoPathProfileInfo";
+ }
+ };
+} // End of anonymous namespace
+
+char NoPathProfileInfo::ID = 0;
+// Register this pass...
+INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile",
+ "No Path Profile Information", false, true, true)
+
+ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); }
diff --git a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp
new file mode 100644
index 0000000..c549773
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp
@@ -0,0 +1,207 @@
+//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This verifier derives an edge profile file from current path profile
+// information
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-verifier"
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace {
+ class PathProfileVerifier : public ModulePass {
+ private:
+ bool runOnModule(Module &M);
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PathProfileVerifier() : ModulePass(ID) {
+ initializePathProfileVerifierPass(*PassRegistry::getPassRegistry());
+ }
+
+
+ virtual const char *getPassName() const {
+ return "Path Profiler Verifier";
+ }
+
+ // The verifier requires the path profile and edge profile.
+ virtual void getAnalysisUsage(AnalysisUsage& AU) const;
+ };
+}
+
+static cl::opt<std::string>
+EdgeProfileFilename("path-profile-verifier-file",
+ cl::init("edgefrompath.llvmprof.out"),
+ cl::value_desc("filename"),
+ cl::desc("Edge profile file generated by -path-profile-verifier"),
+ cl::Hidden);
+
+char PathProfileVerifier::ID = 0;
+INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier",
+ "Compare the path profile derived edge profile against the "
+ "edge profile.", true, true)
+
+ModulePass *llvm::createPathProfileVerifierPass() {
+ return new PathProfileVerifier();
+}
+
+// The verifier requires the path profile and edge profile.
+void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.addRequired<PathProfileInfo>();
+ AU.addPreserved<PathProfileInfo>();
+}
+
+typedef std::map<unsigned, unsigned> DuplicateToIndexMap;
+typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap;
+typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap;
+
+// the verifier iterates through each path to gather the total
+// number of edge frequencies
+bool PathProfileVerifier::runOnModule (Module &M) {
+ PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>();
+
+ // setup a data structure to map path edges which index an
+ // array of edge counters
+ NestedBlockToIndexMap arrayMap;
+ unsigned i = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+
+ arrayMap[0][F->begin()][0] = i++;
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ TerminatorInst *TI = BB->getTerminator();
+
+ unsigned duplicate = 0;
+ BasicBlock* prev = 0;
+ for (unsigned s = 0, e = TI->getNumSuccessors(); s != e;
+ prev = TI->getSuccessor(s), ++s) {
+ if (prev == TI->getSuccessor(s))
+ duplicate++;
+ else duplicate = 0;
+
+ arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++;
+ }
+ }
+ }
+
+ std::vector<unsigned> edgeArray(i);
+
+ // iterate through each path and increment the edge counters as needed
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+
+ pathProfileInfo.setCurrentFunction(F);
+
+ DEBUG(dbgs() << "function '" << F->getName() << "' ran "
+ << pathProfileInfo.pathsRun()
+ << "/" << pathProfileInfo.getPotentialPathCount()
+ << " potential paths\n");
+
+ for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(),
+ endPath = pathProfileInfo.pathEnd();
+ nextPath != endPath; nextPath++ ) {
+ ProfilePath* currentPath = nextPath->second;
+
+ ProfilePathEdgeVector* pev = currentPath->getPathEdges();
+ DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
+ << currentPath->getCount() << "\n");
+ // setup the entry edge (normally path profiling doens't care about this)
+ if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
+ edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]]
+ += currentPath->getCount();
+
+ for( ProfilePathEdgeIterator nextEdge = pev->begin(),
+ endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) {
+ if (nextEdge != pev->begin())
+ DEBUG(dbgs() << " :: ");
+
+ BasicBlock* source = nextEdge->getSource();
+ BasicBlock* target = nextEdge->getTarget();
+ unsigned duplicateNumber = nextEdge->getDuplicateNumber();
+ DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber
+ << "}--> " << target->getNameStr());
+
+ // Ensure all the referenced edges exist
+ // TODO: make this a separate function
+ if( !arrayMap.count(source) ) {
+ errs() << " error [" << F->getNameStr() << "()]: source '"
+ << source->getNameStr()
+ << "' does not exist in the array map.\n";
+ } else if( !arrayMap[source].count(target) ) {
+ errs() << " error [" << F->getNameStr() << "()]: target '"
+ << target->getNameStr()
+ << "' does not exist in the array map.\n";
+ } else if( !arrayMap[source][target].count(duplicateNumber) ) {
+ errs() << " error [" << F->getNameStr() << "()]: edge "
+ << source->getNameStr() << " -> " << target->getNameStr()
+ << " duplicate number " << duplicateNumber
+ << " does not exist in the array map.\n";
+ } else {
+ edgeArray[arrayMap[source][target][duplicateNumber]]
+ += currentPath->getCount();
+ }
+ }
+
+ DEBUG(errs() << "\n");
+
+ delete pev;
+ }
+ }
+
+ std::string errorInfo;
+ std::string filename = EdgeProfileFilename;
+
+ // Open a handle to the file
+ FILE* edgeFile = fopen(filename.c_str(),"wb");
+
+ if (!edgeFile) {
+ errs() << "error: unable to open file '" << filename << "' for output.\n";
+ return false;
+ }
+
+ errs() << "Generating edge profile '" << filename << "' ...\n";
+
+ // write argument info
+ unsigned type = ArgumentInfo;
+ unsigned num = pathProfileInfo.argList.size();
+ int zeros = 0;
+
+ fwrite(&type,sizeof(unsigned),1,edgeFile);
+ fwrite(&num,sizeof(unsigned),1,edgeFile);
+ fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile);
+ if (num&3)
+ fwrite(&zeros, 1, 4-(num&3), edgeFile);
+
+ type = EdgeInfo;
+ num = edgeArray.size();
+ fwrite(&type,sizeof(unsigned),1,edgeFile);
+ fwrite(&num,sizeof(unsigned),1,edgeFile);
+
+ // write each edge to the file
+ for( std::vector<unsigned>::iterator s = edgeArray.begin(),
+ e = edgeArray.end(); s != e; s++)
+ fwrite(&*s, sizeof (unsigned), 1, edgeFile);
+
+ fclose (edgeFile);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Analysis/PointerTracking.cpp b/contrib/llvm/lib/Analysis/PointerTracking.cpp
deleted file mode 100644
index 07f4682..0000000
--- a/contrib/llvm/lib/Analysis/PointerTracking.cpp
+++ /dev/null
@@ -1,316 +0,0 @@
-//===- PointerTracking.cpp - Pointer Bounds Tracking ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements tracking of pointer bounds.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/PointerTracking.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/Value.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
-using namespace llvm;
-
-char PointerTracking::ID = 0;
-PointerTracking::PointerTracking() : FunctionPass(ID) {}
-
-bool PointerTracking::runOnFunction(Function &F) {
- predCache.clear();
- assert(analyzing.empty());
- FF = &F;
- TD = getAnalysisIfAvailable<TargetData>();
- SE = &getAnalysis<ScalarEvolution>();
- LI = &getAnalysis<LoopInfo>();
- DT = &getAnalysis<DominatorTree>();
- return false;
-}
-
-void PointerTracking::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredTransitive<DominatorTree>();
- AU.addRequiredTransitive<LoopInfo>();
- AU.addRequiredTransitive<ScalarEvolution>();
- AU.setPreservesAll();
-}
-
-bool PointerTracking::doInitialization(Module &M) {
- const Type *PTy = Type::getInt8PtrTy(M.getContext());
-
- // Find calloc(i64, i64) or calloc(i32, i32).
- callocFunc = M.getFunction("calloc");
- if (callocFunc) {
- const FunctionType *Ty = callocFunc->getFunctionType();
-
- std::vector<const Type*> args, args2;
- args.push_back(Type::getInt64Ty(M.getContext()));
- args.push_back(Type::getInt64Ty(M.getContext()));
- args2.push_back(Type::getInt32Ty(M.getContext()));
- args2.push_back(Type::getInt32Ty(M.getContext()));
- const FunctionType *Calloc1Type =
- FunctionType::get(PTy, args, false);
- const FunctionType *Calloc2Type =
- FunctionType::get(PTy, args2, false);
- if (Ty != Calloc1Type && Ty != Calloc2Type)
- callocFunc = 0; // Give up
- }
-
- // Find realloc(i8*, i64) or realloc(i8*, i32).
- reallocFunc = M.getFunction("realloc");
- if (reallocFunc) {
- const FunctionType *Ty = reallocFunc->getFunctionType();
- std::vector<const Type*> args, args2;
- args.push_back(PTy);
- args.push_back(Type::getInt64Ty(M.getContext()));
- args2.push_back(PTy);
- args2.push_back(Type::getInt32Ty(M.getContext()));
-
- const FunctionType *Realloc1Type =
- FunctionType::get(PTy, args, false);
- const FunctionType *Realloc2Type =
- FunctionType::get(PTy, args2, false);
- if (Ty != Realloc1Type && Ty != Realloc2Type)
- reallocFunc = 0; // Give up
- }
- return false;
-}
-
-// Calculates the number of elements allocated for pointer P,
-// the type of the element is stored in Ty.
-const SCEV *PointerTracking::computeAllocationCount(Value *P,
- const Type *&Ty) const {
- Value *V = P->stripPointerCasts();
- if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
- Value *arraySize = AI->getArraySize();
- Ty = AI->getAllocatedType();
- // arraySize elements of type Ty.
- return SE->getSCEV(arraySize);
- }
-
- if (CallInst *CI = extractMallocCall(V)) {
- Value *arraySize = getMallocArraySize(CI, TD);
- const Type* AllocTy = getMallocAllocatedType(CI);
- if (!AllocTy || !arraySize) return SE->getCouldNotCompute();
- Ty = AllocTy;
- // arraySize elements of type Ty.
- return SE->getSCEV(arraySize);
- }
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
- if (GV->hasDefinitiveInitializer()) {
- Constant *C = GV->getInitializer();
- if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
- Ty = ATy->getElementType();
- return SE->getConstant(Type::getInt32Ty(P->getContext()),
- ATy->getNumElements());
- }
- }
- Ty = GV->getType();
- return SE->getConstant(Type::getInt32Ty(P->getContext()), 1);
- //TODO: implement more tracking for globals
- }
-
- if (CallInst *CI = dyn_cast<CallInst>(V)) {
- CallSite CS(CI);
- Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
- const Loop *L = LI->getLoopFor(CI->getParent());
- if (F == callocFunc) {
- Ty = Type::getInt8Ty(P->getContext());
- // calloc allocates arg0*arg1 bytes.
- return SE->getSCEVAtScope(SE->getMulExpr(SE->getSCEV(CS.getArgument(0)),
- SE->getSCEV(CS.getArgument(1))),
- L);
- } else if (F == reallocFunc) {
- Ty = Type::getInt8Ty(P->getContext());
- // realloc allocates arg1 bytes.
- return SE->getSCEVAtScope(CS.getArgument(1), L);
- }
- }
-
- return SE->getCouldNotCompute();
-}
-
-Value *PointerTracking::computeAllocationCountValue(Value *P, const Type *&Ty) const
-{
- Value *V = P->stripPointerCasts();
- if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
- Ty = AI->getAllocatedType();
- // arraySize elements of type Ty.
- return AI->getArraySize();
- }
-
- if (CallInst *CI = extractMallocCall(V)) {
- Ty = getMallocAllocatedType(CI);
- if (!Ty)
- return 0;
- Value *arraySize = getMallocArraySize(CI, TD);
- if (!arraySize) {
- Ty = Type::getInt8Ty(P->getContext());
- return CI->getArgOperand(0);
- }
- // arraySize elements of type Ty.
- return arraySize;
- }
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
- if (GV->hasDefinitiveInitializer()) {
- Constant *C = GV->getInitializer();
- if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
- Ty = ATy->getElementType();
- return ConstantInt::get(Type::getInt32Ty(P->getContext()),
- ATy->getNumElements());
- }
- }
- Ty = cast<PointerType>(GV->getType())->getElementType();
- return ConstantInt::get(Type::getInt32Ty(P->getContext()), 1);
- //TODO: implement more tracking for globals
- }
-
- if (CallInst *CI = dyn_cast<CallInst>(V)) {
- CallSite CS(CI);
- Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
- if (F == reallocFunc) {
- Ty = Type::getInt8Ty(P->getContext());
- // realloc allocates arg1 bytes.
- return CS.getArgument(1);
- }
- }
-
- return 0;
-}
-
-// Calculates the number of elements of type Ty allocated for P.
-const SCEV *PointerTracking::computeAllocationCountForType(Value *P,
- const Type *Ty)
- const {
- const Type *elementTy;
- const SCEV *Count = computeAllocationCount(P, elementTy);
- if (isa<SCEVCouldNotCompute>(Count))
- return Count;
- if (elementTy == Ty)
- return Count;
-
- if (!TD) // need TargetData from this point forward
- return SE->getCouldNotCompute();
-
- uint64_t elementSize = TD->getTypeAllocSize(elementTy);
- uint64_t wantSize = TD->getTypeAllocSize(Ty);
- if (elementSize == wantSize)
- return Count;
- if (elementSize % wantSize) //fractional counts not possible
- return SE->getCouldNotCompute();
- return SE->getMulExpr(Count, SE->getConstant(Count->getType(),
- elementSize/wantSize));
-}
-
-const SCEV *PointerTracking::getAllocationElementCount(Value *V) const {
- // We only deal with pointers.
- const PointerType *PTy = cast<PointerType>(V->getType());
- return computeAllocationCountForType(V, PTy->getElementType());
-}
-
-const SCEV *PointerTracking::getAllocationSizeInBytes(Value *V) const {
- return computeAllocationCountForType(V, Type::getInt8Ty(V->getContext()));
-}
-
-// Helper for isLoopGuardedBy that checks the swapped and inverted predicate too
-enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L,
- Predicate Pred,
- const SCEV *A,
- const SCEV *B) const {
- if (SE->isLoopEntryGuardedByCond(L, Pred, A, B))
- return AlwaysTrue;
- Pred = ICmpInst::getSwappedPredicate(Pred);
- if (SE->isLoopEntryGuardedByCond(L, Pred, B, A))
- return AlwaysTrue;
-
- Pred = ICmpInst::getInversePredicate(Pred);
- if (SE->isLoopEntryGuardedByCond(L, Pred, B, A))
- return AlwaysFalse;
- Pred = ICmpInst::getSwappedPredicate(Pred);
- if (SE->isLoopEntryGuardedByCond(L, Pred, A, B))
- return AlwaysTrue;
- return Unknown;
-}
-
-enum SolverResult PointerTracking::checkLimits(const SCEV *Offset,
- const SCEV *Limit,
- BasicBlock *BB)
-{
- //FIXME: merge implementation
- return Unknown;
-}
-
-void PointerTracking::getPointerOffset(Value *Pointer, Value *&Base,
- const SCEV *&Limit,
- const SCEV *&Offset) const
-{
- Pointer = Pointer->stripPointerCasts();
- Base = Pointer->getUnderlyingObject();
- Limit = getAllocationSizeInBytes(Base);
- if (isa<SCEVCouldNotCompute>(Limit)) {
- Base = 0;
- Offset = Limit;
- return;
- }
-
- Offset = SE->getMinusSCEV(SE->getSCEV(Pointer), SE->getSCEV(Base));
- if (isa<SCEVCouldNotCompute>(Offset)) {
- Base = 0;
- Limit = Offset;
- }
-}
-
-void PointerTracking::print(raw_ostream &OS, const Module* M) const {
- // Calling some PT methods may cause caches to be updated, however
- // this should be safe for the same reason its safe for SCEV.
- PointerTracking &PT = *const_cast<PointerTracking*>(this);
- for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) {
- if (!I->getType()->isPointerTy())
- continue;
- Value *Base;
- const SCEV *Limit, *Offset;
- getPointerOffset(&*I, Base, Limit, Offset);
- if (!Base)
- continue;
-
- if (Base == &*I) {
- const SCEV *S = getAllocationElementCount(Base);
- OS << *Base << " ==> " << *S << " elements, ";
- OS << *Limit << " bytes allocated\n";
- continue;
- }
- OS << &*I << " -- base: " << *Base;
- OS << " offset: " << *Offset;
-
- enum SolverResult res = PT.checkLimits(Offset, Limit, I->getParent());
- switch (res) {
- case AlwaysTrue:
- OS << " always safe\n";
- break;
- case AlwaysFalse:
- OS << " always unsafe\n";
- break;
- case Unknown:
- OS << " <<unknown>>\n";
- break;
- }
- }
-}
-
-INITIALIZE_PASS(PointerTracking, "pointertracking",
- "Track pointer bounds", false, true);
diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp
index cbe8d18..3f0deab 100644
--- a/contrib/llvm/lib/Analysis/PostDominators.cpp
+++ b/contrib/llvm/lib/Analysis/PostDominators.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetOperations.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/Analysis/DominatorInternals.h"
using namespace llvm;
@@ -29,7 +30,7 @@ using namespace llvm;
char PostDominatorTree::ID = 0;
char PostDominanceFrontier::ID = 0;
INITIALIZE_PASS(PostDominatorTree, "postdomtree",
- "Post-Dominator Tree Construction", true, true);
+ "Post-Dominator Tree Construction", true, true)
bool PostDominatorTree::runOnFunction(Function &F) {
DT->recalculate(F);
@@ -53,8 +54,11 @@ FunctionPass* llvm::createPostDomTree() {
// PostDominanceFrontier Implementation
//===----------------------------------------------------------------------===//
-INITIALIZE_PASS(PostDominanceFrontier, "postdomfrontier",
- "Post-Dominance Frontier Construction", true, true);
+INITIALIZE_PASS_BEGIN(PostDominanceFrontier, "postdomfrontier",
+ "Post-Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(PostDominanceFrontier, "postdomfrontier",
+ "Post-Dominance Frontier Construction", true, true)
const DominanceFrontier::DomSetType &
PostDominanceFrontier::calculate(const PostDominatorTree &DT,
diff --git a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
index ecc0a18..667ee1c 100644
--- a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp
@@ -39,7 +39,8 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
explicit ProfileEstimatorPass(const double execcount = 0)
- : FunctionPass(ID), ExecCount(execcount) {
+ : FunctionPass(ID), ExecCount(execcount) {
+ initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry());
if (execcount == 0) ExecCount = LoopWeight;
}
@@ -72,8 +73,11 @@ namespace {
} // End of anonymous namespace
char ProfileEstimatorPass::ID = 0;
-INITIALIZE_AG_PASS(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
- "Estimate profiling information", false, true, false);
+INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+ "Estimate profiling information", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+ "Estimate profiling information", false, true, false)
namespace llvm {
char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
@@ -319,6 +323,7 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) {
FunctionInformation.erase(&F);
BlockInformation[&F].clear();
EdgeInformation[&F].clear();
+ BBToVisit.clear();
// Mark all blocks as to visit.
for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
diff --git a/contrib/llvm/lib/Analysis/ProfileInfo.cpp b/contrib/llvm/lib/Analysis/ProfileInfo.cpp
index fc7f286..36f211e 100644
--- a/contrib/llvm/lib/Analysis/ProfileInfo.cpp
+++ b/contrib/llvm/lib/Analysis/ProfileInfo.cpp
@@ -24,8 +24,12 @@
#include <limits>
using namespace llvm;
+namespace llvm {
+ template<> char ProfileInfoT<Function,BasicBlock>::ID = 0;
+}
+
// Register the ProfileInfo interface, providing a nice name to refer to.
-static RegisterAnalysisGroup<ProfileInfo> Z("Profile Information");
+INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo)
namespace llvm {
@@ -44,9 +48,6 @@ ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
}
template<>
-char ProfileInfoT<Function,BasicBlock>::ID = 0;
-
-template<>
char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
template<>
@@ -888,7 +889,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
FI = Unvisited.begin(), FE = Unvisited.end();
while(FI != FE && !FoundPath) {
const BasicBlock *BB = *FI; ++FI;
- const BasicBlock *Dest;
+ const BasicBlock *Dest = 0;
Path P;
bool BackEdgeFound = false;
for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
@@ -1076,7 +1077,9 @@ raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, con
namespace {
struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
static char ID; // Class identification, replacement for typeinfo
- NoProfileInfo() : ImmutablePass(ID) {}
+ NoProfileInfo() : ImmutablePass(ID) {
+ initializeNoProfileInfoPass(*PassRegistry::getPassRegistry());
+ }
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
@@ -1097,6 +1100,6 @@ namespace {
char NoProfileInfo::ID = 0;
// Register this pass...
INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
- "No Profile Information", false, true, true);
+ "No Profile Information", false, true, true)
ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
index d325b57..098079b 100644
--- a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -46,6 +46,7 @@ namespace {
static char ID; // Class identification, replacement for typeinfo
explicit LoaderPass(const std::string &filename = "")
: ModulePass(ID), Filename(filename) {
+ initializeLoaderPassPass(*PassRegistry::getPassRegistry());
if (filename.empty()) Filename = ProfileInfoFilename;
}
@@ -80,7 +81,7 @@ namespace {
char LoaderPass::ID = 0;
INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
- "Load profile information from llvmprof.out", false, true, false);
+ "Load profile information from llvmprof.out", false, true, false)
char &llvm::ProfileLoaderPassID = LoaderPass::ID;
diff --git a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
index 3f01b2d..a017518 100644
--- a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
+++ b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp
@@ -60,10 +60,12 @@ namespace llvm {
static char ID; // Class identification, replacement for typeinfo
explicit ProfileVerifierPassT () : FunctionPass(ID) {
+ initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
DisableAssertions = ProfileVerifierDisableAssertions;
}
explicit ProfileVerifierPassT (bool da) : FunctionPass(ID),
DisableAssertions(da) {
+ initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -287,7 +289,7 @@ namespace llvm {
i != ie; ++i) {
if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
FType *F = CI->getCalledFunction();
- if (F && (F->getNameStr() == "_setjmp")) {
+ if (F && (F->getName() == "_setjmp")) {
isSetJmpTarget = true; break;
}
}
@@ -366,8 +368,11 @@ namespace llvm {
char ProfileVerifierPassT<FType, BType>::ID = 0;
}
-INITIALIZE_PASS(ProfileVerifierPass, "profile-verifier",
- "Verify profiling information", false, true);
+INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier",
+ "Verify profiling information", false, true)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier",
+ "Verify profiling information", false, true)
namespace llvm {
FunctionPass *createProfileVerifierPass() {
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
index abc057a..e2f6a8b 100644
--- a/contrib/llvm/lib/Analysis/RegionInfo.cpp
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -16,8 +16,8 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Assembly/Writer.h"
#define DEBUG_TYPE "region"
#include "llvm/Support/Debug.h"
@@ -45,7 +45,7 @@ STATISTIC(numSimpleRegions, "The # of simple regions");
/// PrintStyle - Print region in difference ways.
enum PrintStyle { PrintNone, PrintBB, PrintRN };
-cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden,
+static cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden,
cl::desc("style of printing regions"),
cl::values(
clEnumValN(PrintNone, "none", "print no details"),
@@ -72,6 +72,15 @@ Region::~Region() {
delete *I;
}
+void Region::replaceEntry(BasicBlock *BB) {
+ entry.setPointer(BB);
+}
+
+void Region::replaceExit(BasicBlock *BB) {
+ assert(exit && "No exit to replace!");
+ exit = BB;
+}
+
bool Region::contains(const BasicBlock *B) const {
BasicBlock *BB = const_cast<BasicBlock*>(B);
@@ -125,41 +134,49 @@ Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const {
return outermostLoopInRegion(L);
}
-bool Region::isSimple() const {
- bool isSimple = true;
- bool found = false;
-
- BasicBlock *entry = getEntry(), *exit = getExit();
-
- // TopLevelRegion
- if (!exit)
- return false;
+BasicBlock *Region::getEnteringBlock() const {
+ BasicBlock *entry = getEntry();
+ BasicBlock *Pred;
+ BasicBlock *enteringBlock = 0;
for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE;
++PI) {
- BasicBlock *Pred = *PI;
+ Pred = *PI;
if (DT->getNode(Pred) && !contains(Pred)) {
- if (found) {
- isSimple = false;
- break;
- }
- found = true;
+ if (enteringBlock)
+ return 0;
+
+ enteringBlock = Pred;
}
}
- found = false;
+ return enteringBlock;
+}
+
+BasicBlock *Region::getExitingBlock() const {
+ BasicBlock *exit = getExit();
+ BasicBlock *Pred;
+ BasicBlock *exitingBlock = 0;
+
+ if (!exit)
+ return 0;
for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE;
- ++PI)
- if (contains(*PI)) {
- if (found) {
- isSimple = false;
- break;
- }
- found = true;
+ ++PI) {
+ Pred = *PI;
+ if (contains(Pred)) {
+ if (exitingBlock)
+ return 0;
+
+ exitingBlock = Pred;
}
+ }
- return isSimple;
+ return exitingBlock;
+}
+
+bool Region::isSimple() const {
+ return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock();
}
std::string Region::getNameStr() const {
@@ -311,13 +328,38 @@ void Region::transferChildrenTo(Region *To) {
children.clear();
}
-void Region::addSubRegion(Region *SubRegion) {
+void Region::addSubRegion(Region *SubRegion, bool moveChildren) {
assert(SubRegion->parent == 0 && "SubRegion already has a parent!");
+ assert(std::find(begin(), end(), SubRegion) == children.end()
+ && "Subregion already exists!");
+
SubRegion->parent = this;
- // Set up the region node.
- assert(std::find(children.begin(), children.end(), SubRegion) == children.end()
- && "Node already exist!");
children.push_back(SubRegion);
+
+ if (!moveChildren)
+ return;
+
+ assert(SubRegion->children.size() == 0
+ && "SubRegions that contain children are not supported");
+
+ for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+ if (!(*I)->isSubRegion()) {
+ BasicBlock *BB = (*I)->getNodeAs<BasicBlock>();
+
+ if (SubRegion->contains(BB))
+ RI->setRegionFor(BB, SubRegion);
+ }
+
+ std::vector<Region*> Keep;
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ if (SubRegion->contains(*I) && *I != SubRegion) {
+ SubRegion->children.push_back(*I);
+ (*I)->parent = SubRegion;
+ } else
+ Keep.push_back(*I);
+
+ children.clear();
+ children.insert(children.begin(), Keep.begin(), Keep.end());
}
@@ -339,6 +381,38 @@ unsigned Region::getDepth() const {
return Depth;
}
+Region *Region::getExpandedRegion() const {
+ unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors();
+
+ if (NumSuccessors == 0)
+ return NULL;
+
+ for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+ PI != PE; ++PI)
+ if (!DT->dominates(getEntry(), *PI))
+ return NULL;
+
+ Region *R = RI->getRegionFor(exit);
+
+ if (R->getEntry() != exit) {
+ if (exit->getTerminator()->getNumSuccessors() == 1)
+ return new Region(getEntry(), *succ_begin(exit), RI, DT);
+ else
+ return NULL;
+ }
+
+ while (R->getParent() && R->getParent()->getEntry() == exit)
+ R = R->getParent();
+
+ if (!DT->dominates(getEntry(), R->getExit()))
+ for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+ PI != PE; ++PI)
+ if (!DT->dominates(R->getExit(), *PI))
+ return NULL;
+
+ return new Region(getEntry(), R->getExit(), RI, DT);
+}
+
void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const {
if (print_tree)
OS.indent(level*2) << "[" << level << "] " << getNameStr();
@@ -376,6 +450,11 @@ void Region::dump() const {
}
void Region::clearNodeCache() {
+ // Free the cached nodes.
+ for (BBNodeMapT::iterator I = BBNodeMap.begin(),
+ IE = BBNodeMap.end(); I != IE; ++I)
+ delete I->second;
+
BBNodeMap.clear();
for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI)
(*RI)->clearNodeCache();
@@ -592,6 +671,7 @@ void RegionInfo::releaseMemory() {
}
RegionInfo::RegionInfo() : FunctionPass(ID) {
+ initializeRegionInfoPass(*PassRegistry::getPassRegistry());
TopLevelRegion = 0;
}
@@ -654,11 +734,14 @@ Region *RegionInfo::getRegionFor(BasicBlock *BB) const {
return I != BBtoRegion.end() ? I->second : 0;
}
+void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) {
+ BBtoRegion[BB] = R;
+}
+
Region *RegionInfo::operator[](BasicBlock *BB) const {
return getRegionFor(BB);
}
-
BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const {
BasicBlock *Exit = NULL;
@@ -733,9 +816,28 @@ RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const {
return ret;
}
+void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB)
+{
+ Region *R = getRegionFor(OldBB);
+
+ setRegionFor(NewBB, R);
+
+ while (R->getEntry() == OldBB && !R->isTopLevelRegion()) {
+ R->replaceEntry(NewBB);
+ R = R->getParent();
+ }
+
+ setRegionFor(OldBB, R);
+}
+
char RegionInfo::ID = 0;
-INITIALIZE_PASS(RegionInfo, "regions",
- "Detect single entry single exit regions", true, true);
+INITIALIZE_PASS_BEGIN(RegionInfo, "regions",
+ "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
+INITIALIZE_PASS_END(RegionInfo, "regions",
+ "Detect single entry single exit regions", true, true)
// Create methods available outside of this file, to use them
// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
new file mode 100644
index 0000000..3269dcc
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -0,0 +1,275 @@
+//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements RegionPass and RGPassManager. All region optimization
+// and transformation passes are derived from RegionPass. RGPassManager is
+// responsible for managing RegionPasses.
+// most of these codes are COPY from LoopPass.cpp
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Support/Timer.h"
+
+#define DEBUG_TYPE "regionpassmgr"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// RGPassManager
+//
+
+char RGPassManager::ID = 0;
+
+RGPassManager::RGPassManager(int Depth)
+ : FunctionPass(ID), PMDataManager(Depth) {
+ skipThisRegion = false;
+ redoThisRegion = false;
+ RI = NULL;
+ CurrentRegion = NULL;
+}
+
+// Recurse through all subregions and all regions into RQ.
+static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) {
+ RQ.push_back(R);
+ for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I)
+ addRegionIntoQueue(*I, RQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+ Info.addRequired<RegionInfo>();
+ Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution. Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool RGPassManager::runOnFunction(Function &F) {
+ RI = &getAnalysis<RegionInfo>();
+ bool Changed = false;
+
+ // Collect inherited analysis from Module level pass manager.
+ populateInheritedAnalysis(TPM->activeStack);
+
+ addRegionIntoQueue(RI->getTopLevelRegion(), RQ);
+
+ if (RQ.empty()) // No regions, skip calling finalizers
+ return false;
+
+ // Initialization
+ for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end();
+ I != E; ++I) {
+ Region *R = *I;
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ RegionPass *RP = (RegionPass *)getContainedPass(Index);
+ Changed |= RP->doInitialization(R, *this);
+ }
+ }
+
+ // Walk Regions
+ while (!RQ.empty()) {
+
+ CurrentRegion = RQ.back();
+ skipThisRegion = false;
+ redoThisRegion = false;
+
+ // Run all passes on the current Region.
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ RegionPass *P = (RegionPass*)getContainedPass(Index);
+
+ dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
+ CurrentRegion->getNameStr());
+ dumpRequiredSet(P);
+
+ initializeAnalysisImpl(P);
+
+ {
+ PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry());
+
+ TimeRegion PassTimer(getPassTimer(P));
+ Changed |= P->runOnRegion(CurrentRegion, *this);
+ }
+
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
+ skipThisRegion ? "<deleted>" :
+ CurrentRegion->getNameStr());
+ dumpPreservedSet(P);
+
+ if (!skipThisRegion) {
+ // Manually check that this region is still healthy. This is done
+ // instead of relying on RegionInfo::verifyRegion since RegionInfo
+ // is a function pass and it's really expensive to verify every
+ // Region in the function every time. That level of checking can be
+ // enabled with the -verify-region-info option.
+ {
+ TimeRegion PassTimer(getPassTimer(P));
+ CurrentRegion->verifyRegion();
+ }
+
+ // Then call the regular verifyAnalysis functions.
+ verifyPreservedAnalysis(P);
+ }
+
+ removeNotPreservedAnalysis(P);
+ recordAvailableAnalysis(P);
+ removeDeadPasses(P,
+ skipThisRegion ? "<deleted>" :
+ CurrentRegion->getNameStr(),
+ ON_REGION_MSG);
+
+ if (skipThisRegion)
+ // Do not run other passes on this region.
+ break;
+ }
+
+ // If the region was deleted, release all the region passes. This frees up
+ // some memory, and avoids trouble with the pass manager trying to call
+ // verifyAnalysis on them.
+ if (skipThisRegion)
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ freePass(P, "<deleted>", ON_REGION_MSG);
+ }
+
+ // Pop the region from queue after running all passes.
+ RQ.pop_back();
+
+ if (redoThisRegion)
+ RQ.push_back(CurrentRegion);
+
+ // Free all region nodes created in region passes.
+ RI->clearNodeCache();
+ }
+
+ // Finalization
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ RegionPass *P = (RegionPass*)getContainedPass(Index);
+ Changed |= P->doFinalization();
+ }
+
+ // Print the region tree after all pass.
+ DEBUG(
+ dbgs() << "\nRegion tree of function " << F.getName()
+ << " after all region Pass:\n";
+ RI->dump();
+ dbgs() << "\n";
+ );
+
+ return Changed;
+}
+
+/// Print passes managed by this manager
+void RGPassManager::dumpPassStructure(unsigned Offset) {
+ errs().indent(Offset*2) << "Region Pass Manager\n";
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ Pass *P = getContainedPass(Index);
+ P->dumpPassStructure(Offset + 1);
+ dumpLastUses(P, Offset+1);
+ }
+}
+
+namespace {
+//===----------------------------------------------------------------------===//
+// PrintRegionPass
+class PrintRegionPass : public RegionPass {
+private:
+ std::string Banner;
+ raw_ostream &Out; // raw_ostream to print on.
+
+public:
+ static char ID;
+ PrintRegionPass() : RegionPass(ID), Out(dbgs()) {}
+ PrintRegionPass(const std::string &B, raw_ostream &o)
+ : RegionPass(ID), Banner(B), Out(o) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+
+ virtual bool runOnRegion(Region *R, RGPassManager &RGM) {
+ Out << Banner;
+ for (Region::block_iterator I = R->block_begin(), E = R->block_end();
+ I != E; ++I)
+ (*I)->getEntry()->print(Out);
+
+ return false;
+ }
+};
+
+char PrintRegionPass::ID = 0;
+} //end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// RegionPass
+
+// Check if this pass is suitable for the current RGPassManager, if
+// available. This pass P is not suitable for a RGPassManager if P
+// is not preserving higher level analysis info used by other
+// RGPassManager passes. In such case, pop RGPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void RegionPass::preparePassManager(PMStack &PMS) {
+
+ // Find RGPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+ PMS.pop();
+
+
+ // If this pass is destroying high level information that is used
+ // by other passes that are managed by LPM then do not insert
+ // this pass in current LPM. Use new RGPassManager.
+ if (PMS.top()->getPassManagerType() == PMT_RegionPassManager &&
+ !PMS.top()->preserveHigherLevelAnalysis(this))
+ PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void RegionPass::assignPassManager(PMStack &PMS,
+ PassManagerType PreferredType) {
+ // Find RGPassManager
+ while (!PMS.empty() &&
+ PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+ PMS.pop();
+
+ RGPassManager *RGPM;
+
+ // Create new Region Pass Manager if it does not exist.
+ if (PMS.top()->getPassManagerType() == PMT_RegionPassManager)
+ RGPM = (RGPassManager*)PMS.top();
+ else {
+
+ assert (!PMS.empty() && "Unable to create Region Pass Manager");
+ PMDataManager *PMD = PMS.top();
+
+ // [1] Create new Call Graph Pass Manager
+ RGPM = new RGPassManager(PMD->getDepth() + 1);
+ RGPM->populateInheritedAnalysis(PMS);
+
+ // [2] Set up new manager's top level manager
+ PMTopLevelManager *TPM = PMD->getTopLevelManager();
+ TPM->addIndirectPassManager(RGPM);
+
+ // [3] Assign manager to manage this new manager. This may create
+ // and push new managers into PMS
+ TPM->schedulePass(RGPM);
+
+ // [4] Push new manager into PMS
+ PMS.push(RGPM);
+ }
+
+ RGPM->add(this);
+}
+
+/// Get the printer pass
+Pass *RegionPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return new PrintRegionPass(Banner, O);
+}
diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
index fee5c1b..0cf0f90 100644
--- a/contrib/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
@@ -121,35 +121,41 @@ namespace {
struct RegionViewer
: public DOTGraphTraitsViewer<RegionInfo, false> {
static char ID;
- RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){}
+ RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){
+ initializeRegionViewerPass(*PassRegistry::getPassRegistry());
+ }
};
-
char RegionViewer::ID = 0;
-INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
- true, true);
struct RegionOnlyViewer
: public DOTGraphTraitsViewer<RegionInfo, true> {
static char ID;
- RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID){}
+ RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID) {
+ initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
+ }
};
-
char RegionOnlyViewer::ID = 0;
-INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
- "View regions of function (with no function bodies)",
- true, true);
struct RegionPrinter
: public DOTGraphTraitsPrinter<RegionInfo, false> {
static char ID;
RegionPrinter() :
- DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {}
+ DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {
+ initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
+ }
};
+char RegionPrinter::ID = 0;
} //end anonymous namespace
-char RegionPrinter::ID = 0;
INITIALIZE_PASS(RegionPrinter, "dot-regions",
- "Print regions of function to 'dot' file", true, true);
+ "Print regions of function to 'dot' file", true, true)
+
+INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
+ true, true)
+
+INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
+ "View regions of function (with no function bodies)",
+ true, true)
namespace {
@@ -157,7 +163,9 @@ struct RegionOnlyPrinter
: public DOTGraphTraitsPrinter<RegionInfo, true> {
static char ID;
RegionOnlyPrinter() :
- DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {}
+ DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {
+ initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ }
};
}
@@ -166,7 +174,7 @@ char RegionOnlyPrinter::ID = 0;
INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
"Print regions of function to 'dot' file "
"(with no function bodies)",
- true, true);
+ true, true)
FunctionPass* llvm::createRegionViewerPass() {
return new RegionViewer();
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index b892d85..62244cc 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -69,6 +69,7 @@
#include "llvm/Operator.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
@@ -103,8 +104,12 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
"derived loop"),
cl::init(100));
-INITIALIZE_PASS(ScalarEvolution, "scalar-evolution",
- "Scalar Evolution Analysis", false, true);
+INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true)
char ScalarEvolution::ID = 0;
//===----------------------------------------------------------------------===//
@@ -115,13 +120,139 @@ char ScalarEvolution::ID = 0;
// Implementation of the SCEV class.
//
-SCEV::~SCEV() {}
-
void SCEV::dump() const {
print(dbgs());
dbgs() << '\n';
}
+void SCEV::print(raw_ostream &OS) const {
+ switch (getSCEVType()) {
+ case scConstant:
+ WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
+ return;
+ case scTruncate: {
+ const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
+ const SCEV *Op = Trunc->getOperand();
+ OS << "(trunc " << *Op->getType() << " " << *Op << " to "
+ << *Trunc->getType() << ")";
+ return;
+ }
+ case scZeroExtend: {
+ const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
+ const SCEV *Op = ZExt->getOperand();
+ OS << "(zext " << *Op->getType() << " " << *Op << " to "
+ << *ZExt->getType() << ")";
+ return;
+ }
+ case scSignExtend: {
+ const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
+ const SCEV *Op = SExt->getOperand();
+ OS << "(sext " << *Op->getType() << " " << *Op << " to "
+ << *SExt->getType() << ")";
+ return;
+ }
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
+ OS << "{" << *AR->getOperand(0);
+ for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
+ OS << ",+," << *AR->getOperand(i);
+ OS << "}<";
+ if (AR->hasNoUnsignedWrap())
+ OS << "nuw><";
+ if (AR->hasNoSignedWrap())
+ OS << "nsw><";
+ WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
+ OS << ">";
+ return;
+ }
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
+ const char *OpStr = 0;
+ switch (NAry->getSCEVType()) {
+ case scAddExpr: OpStr = " + "; break;
+ case scMulExpr: OpStr = " * "; break;
+ case scUMaxExpr: OpStr = " umax "; break;
+ case scSMaxExpr: OpStr = " smax "; break;
+ }
+ OS << "(";
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ OS << **I;
+ if (llvm::next(I) != E)
+ OS << OpStr;
+ }
+ OS << ")";
+ return;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
+ OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
+ return;
+ }
+ case scUnknown: {
+ const SCEVUnknown *U = cast<SCEVUnknown>(this);
+ const Type *AllocTy;
+ if (U->isSizeOf(AllocTy)) {
+ OS << "sizeof(" << *AllocTy << ")";
+ return;
+ }
+ if (U->isAlignOf(AllocTy)) {
+ OS << "alignof(" << *AllocTy << ")";
+ return;
+ }
+
+ const Type *CTy;
+ Constant *FieldNo;
+ if (U->isOffsetOf(CTy, FieldNo)) {
+ OS << "offsetof(" << *CTy << ", ";
+ WriteAsOperand(OS, FieldNo, false);
+ OS << ")";
+ return;
+ }
+
+ // Otherwise just print it normally.
+ WriteAsOperand(OS, U->getValue(), false);
+ return;
+ }
+ case scCouldNotCompute:
+ OS << "***COULDNOTCOMPUTE***";
+ return;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+}
+
+const Type *SCEV::getType() const {
+ switch (getSCEVType()) {
+ case scConstant:
+ return cast<SCEVConstant>(this)->getType();
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ return cast<SCEVCastExpr>(this)->getType();
+ case scAddRecExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr:
+ return cast<SCEVNAryExpr>(this)->getType();
+ case scAddExpr:
+ return cast<SCEVAddExpr>(this)->getType();
+ case scUDivExpr:
+ return cast<SCEVUDivExpr>(this)->getType();
+ case scUnknown:
+ return cast<SCEVUnknown>(this)->getType();
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ return 0;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ return 0;
+}
+
bool SCEV::isZero() const {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
return SC->getValue()->isZero();
@@ -143,30 +274,6 @@ bool SCEV::isAllOnesValue() const {
SCEVCouldNotCompute::SCEVCouldNotCompute() :
SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
-bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const {
- llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- return false;
-}
-
-const Type *SCEVCouldNotCompute::getType() const {
- llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- return 0;
-}
-
-bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const {
- llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- return false;
-}
-
-bool SCEVCouldNotCompute::hasOperand(const SCEV *) const {
- llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- return false;
-}
-
-void SCEVCouldNotCompute::print(raw_ostream &OS) const {
- OS << "***COULDNOTCOMPUTE***";
-}
-
bool SCEVCouldNotCompute::classof(const SCEV *S) {
return S->getSCEVType() == scCouldNotCompute;
}
@@ -192,24 +299,10 @@ ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
return getConstant(ConstantInt::get(ITy, V, isSigned));
}
-const Type *SCEVConstant::getType() const { return V->getType(); }
-
-void SCEVConstant::print(raw_ostream &OS) const {
- WriteAsOperand(OS, V, false);
-}
-
SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
unsigned SCEVTy, const SCEV *op, const Type *ty)
: SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
-bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
- return Op->dominates(BB, DT);
-}
-
-bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
- return Op->properlyDominates(BB, DT);
-}
-
SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, const Type *ty)
: SCEVCastExpr(ID, scTruncate, op, ty) {
@@ -218,10 +311,6 @@ SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
"Cannot truncate non-integer value!");
}
-void SCEVTruncateExpr::print(raw_ostream &OS) const {
- OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
-}
-
SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, const Type *ty)
: SCEVCastExpr(ID, scZeroExtend, op, ty) {
@@ -230,10 +319,6 @@ SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
"Cannot zero extend non-integer value!");
}
-void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
- OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
-}
-
SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, const Type *ty)
: SCEVCastExpr(ID, scSignExtend, op, ty) {
@@ -242,139 +327,9 @@ SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
"Cannot sign extend non-integer value!");
}
-void SCEVSignExtendExpr::print(raw_ostream &OS) const {
- OS << "(sext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
-}
-
-void SCEVCommutativeExpr::print(raw_ostream &OS) const {
- const char *OpStr = getOperationStr();
- OS << "(";
- for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
- OS << **I;
- if (llvm::next(I) != E)
- OS << OpStr;
- }
- OS << ")";
-}
-
-bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
- for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
- if (!(*I)->dominates(BB, DT))
- return false;
- return true;
-}
-
-bool SCEVNAryExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
- for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
- if (!(*I)->properlyDominates(BB, DT))
- return false;
- return true;
-}
-
-bool SCEVNAryExpr::isLoopInvariant(const Loop *L) const {
- for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
- if (!(*I)->isLoopInvariant(L))
- return false;
- return true;
-}
-
-// hasComputableLoopEvolution - N-ary expressions have computable loop
-// evolutions iff they have at least one operand that varies with the loop,
-// but that all varying operands are computable.
-bool SCEVNAryExpr::hasComputableLoopEvolution(const Loop *L) const {
- bool HasVarying = false;
- for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
- const SCEV *S = *I;
- if (!S->isLoopInvariant(L)) {
- if (S->hasComputableLoopEvolution(L))
- HasVarying = true;
- else
- return false;
- }
- }
- return HasVarying;
-}
-
-bool SCEVNAryExpr::hasOperand(const SCEV *O) const {
- for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
- const SCEV *S = *I;
- if (O == S || S->hasOperand(O))
- return true;
- }
- return false;
-}
-
-bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
- return LHS->dominates(BB, DT) && RHS->dominates(BB, DT);
-}
-
-bool SCEVUDivExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
- return LHS->properlyDominates(BB, DT) && RHS->properlyDominates(BB, DT);
-}
-
-void SCEVUDivExpr::print(raw_ostream &OS) const {
- OS << "(" << *LHS << " /u " << *RHS << ")";
-}
-
-const Type *SCEVUDivExpr::getType() const {
- // In most cases the types of LHS and RHS will be the same, but in some
- // crazy cases one or the other may be a pointer. ScalarEvolution doesn't
- // depend on the type for correctness, but handling types carefully can
- // avoid extra casts in the SCEVExpander. The LHS is more likely to be
- // a pointer type than the RHS, so use the RHS' type here.
- return RHS->getType();
-}
-
-bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
- // Add recurrences are never invariant in the function-body (null loop).
- if (!QueryLoop)
- return false;
-
- // This recurrence is variant w.r.t. QueryLoop if QueryLoop contains L.
- if (QueryLoop->contains(L))
- return false;
-
- // This recurrence is invariant w.r.t. QueryLoop if L contains QueryLoop.
- if (L->contains(QueryLoop))
- return true;
-
- // This recurrence is variant w.r.t. QueryLoop if any of its operands
- // are variant.
- for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
- if (!(*I)->isLoopInvariant(QueryLoop))
- return false;
-
- // Otherwise it's loop-invariant.
- return true;
-}
-
-bool
-SCEVAddRecExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
- return DT->dominates(L->getHeader(), BB) &&
- SCEVNAryExpr::dominates(BB, DT);
-}
-
-bool
-SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
- // This uses a "dominates" query instead of "properly dominates" query because
- // the instruction which produces the addrec's value is a PHI, and a PHI
- // effectively properly dominates its entire containing block.
- return DT->dominates(L->getHeader(), BB) &&
- SCEVNAryExpr::properlyDominates(BB, DT);
-}
-
-void SCEVAddRecExpr::print(raw_ostream &OS) const {
- OS << "{" << *Operands[0];
- for (unsigned i = 1, e = NumOperands; i != e; ++i)
- OS << ",+," << *Operands[i];
- OS << "}<";
- WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
- OS << ">";
-}
-
void SCEVUnknown::deleted() {
- // Clear this SCEVUnknown from ValuesAtScopes.
- SE->ValuesAtScopes.erase(this);
+ // Clear this SCEVUnknown from various maps.
+ SE->forgetMemoizedResults(this);
// Remove this SCEVUnknown from the uniquing map.
SE->UniqueSCEVs.RemoveNode(this);
@@ -384,8 +339,8 @@ void SCEVUnknown::deleted() {
}
void SCEVUnknown::allUsesReplacedWith(Value *New) {
- // Clear this SCEVUnknown from ValuesAtScopes.
- SE->ValuesAtScopes.erase(this);
+ // Clear this SCEVUnknown from various maps.
+ SE->forgetMemoizedResults(this);
// Remove this SCEVUnknown from the uniquing map.
SE->UniqueSCEVs.RemoveNode(this);
@@ -396,32 +351,6 @@ void SCEVUnknown::allUsesReplacedWith(Value *New) {
setValPtr(New);
}
-bool SCEVUnknown::isLoopInvariant(const Loop *L) const {
- // All non-instruction values are loop invariant. All instructions are loop
- // invariant if they are not contained in the specified loop.
- // Instructions are never considered invariant in the function body
- // (null loop) because they are defined within the "loop".
- if (Instruction *I = dyn_cast<Instruction>(getValue()))
- return L && !L->contains(I);
- return true;
-}
-
-bool SCEVUnknown::dominates(BasicBlock *BB, DominatorTree *DT) const {
- if (Instruction *I = dyn_cast<Instruction>(getValue()))
- return DT->dominates(I->getParent(), BB);
- return true;
-}
-
-bool SCEVUnknown::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
- if (Instruction *I = dyn_cast<Instruction>(getValue()))
- return DT->properlyDominates(I->getParent(), BB);
- return true;
-}
-
-const Type *SCEVUnknown::getType() const {
- return getValue()->getType();
-}
-
bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
@@ -486,30 +415,6 @@ bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
return false;
}
-void SCEVUnknown::print(raw_ostream &OS) const {
- const Type *AllocTy;
- if (isSizeOf(AllocTy)) {
- OS << "sizeof(" << *AllocTy << ")";
- return;
- }
- if (isAlignOf(AllocTy)) {
- OS << "alignof(" << *AllocTy << ")";
- return;
- }
-
- const Type *CTy;
- Constant *FieldNo;
- if (isOffsetOf(CTy, FieldNo)) {
- OS << "offsetof(" << *CTy << ", ";
- WriteAsOperand(OS, FieldNo, false);
- OS << ")";
- return;
- }
-
- // Otherwise just print it normally.
- WriteAsOperand(OS, getValue(), false);
-}
-
//===----------------------------------------------------------------------===//
// SCEV Utilities
//===----------------------------------------------------------------------===//
@@ -914,6 +819,36 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
+ // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
+ // eliminate all the truncates.
+ if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Operands;
+ bool hasTrunc = false;
+ for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
+ const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
+ hasTrunc = isa<SCEVTruncateExpr>(S);
+ Operands.push_back(S);
+ }
+ if (!hasTrunc)
+ return getAddExpr(Operands, false, false);
+ UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
+ }
+
+ // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
+ // eliminate all the truncates.
+ if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
+ SmallVector<const SCEV *, 4> Operands;
+ bool hasTrunc = false;
+ for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
+ const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
+ hasTrunc = isa<SCEVTruncateExpr>(S);
+ Operands.push_back(S);
+ }
+ if (!hasTrunc)
+ return getMulExpr(Operands, false, false);
+ UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
+ }
+
// If the input value is a chrec scev, truncate the chrec's operands.
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
@@ -965,6 +900,19 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
void *IP = 0;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ // zext(trunc(x)) --> zext(x) or x or trunc(x)
+ if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+ // It's possible the bits taken off by the truncate were all zero bits. If
+ // so, we should be able to simplify this further.
+ const SCEV *X = ST->getOperand();
+ ConstantRange CR = getUnsignedRange(X);
+ unsigned TruncBits = getTypeSizeInBits(ST->getType());
+ unsigned NewBits = getTypeSizeInBits(Ty);
+ if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
+ CR.zextOrTrunc(NewBits)))
+ return getTruncateOrZeroExtend(X, Ty);
+ }
+
// If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can zero extend all of the
// operands (often constants). This allows analysis of something like
@@ -1089,6 +1037,10 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
return getSignExtendExpr(SS->getOperand(), Ty);
+ // sext(zext(x)) --> zext(x)
+ if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+ return getZeroExtendExpr(SZ->getOperand(), Ty);
+
// Before doing any expensive analysis, check to see if we've already
// computed a SCEV for this Op and Ty.
FoldingSetNodeID ID;
@@ -1098,6 +1050,23 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
void *IP = 0;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ // If the input value is provably positive, build a zext instead.
+ if (isKnownNonNegative(Op))
+ return getZeroExtendExpr(Op, Ty);
+
+ // sext(trunc(x)) --> sext(x) or x or trunc(x)
+ if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+ // It's possible the bits taken off by the truncate were all sign bits. If
+ // so, we should be able to simplify this further.
+ const SCEV *X = ST->getOperand();
+ ConstantRange CR = getSignedRange(X);
+ unsigned TruncBits = getTypeSizeInBits(ST->getType());
+ unsigned NewBits = getTypeSizeInBits(Ty);
+ if (CR.truncate(TruncBits).signExtend(NewBits).contains(
+ CR.sextOrTrunc(NewBits)))
+ return getTruncateOrSignExtend(X, Ty);
+ }
+
// If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can sign extend all of the
// operands (often constants). This allows analysis of something like
@@ -1639,7 +1608,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (Ops[i]->isLoopInvariant(AddRecLoop)) {
+ if (isLoopInvariant(Ops[i], AddRecLoop)) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
@@ -1711,7 +1680,6 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// already have one, otherwise create a new one.
FoldingSetNodeID ID;
ID.AddInteger(scAddExpr);
- ID.AddInteger(Ops.size());
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
void *IP = 0;
@@ -1846,7 +1814,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (Ops[i]->isLoopInvariant(AddRecLoop)) {
+ if (isLoopInvariant(Ops[i], AddRecLoop)) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
@@ -1917,7 +1885,6 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// already have one, otherwise create a new one.
FoldingSetNodeID ID;
ID.AddInteger(scMulExpr);
- ID.AddInteger(Ops.size());
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
void *IP = 0;
@@ -2066,6 +2033,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
for (unsigned i = 1, e = Operands.size(); i != e; ++i)
assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
"SCEVAddRecExpr operand types don't match!");
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+ assert(isLoopInvariant(Operands[i], L) &&
+ "SCEVAddRecExpr operand is not loop-invariant!");
#endif
if (Operands.back()->isZero()) {
@@ -2106,7 +2076,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
// requirement.
bool AllInvariant = true;
for (unsigned i = 0, e = Operands.size(); i != e; ++i)
- if (!Operands[i]->isLoopInvariant(L)) {
+ if (!isLoopInvariant(Operands[i], L)) {
AllInvariant = false;
break;
}
@@ -2114,7 +2084,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
NestedOperands[0] = getAddRecExpr(Operands, L);
AllInvariant = true;
for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
- if (!NestedOperands[i]->isLoopInvariant(NestedLoop)) {
+ if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
AllInvariant = false;
break;
}
@@ -2131,7 +2101,6 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
// already have one, otherwise create a new one.
FoldingSetNodeID ID;
ID.AddInteger(scAddRecExpr);
- ID.AddInteger(Operands.size());
for (unsigned i = 0, e = Operands.size(); i != e; ++i)
ID.AddPointer(Operands[i]);
ID.AddPointer(L);
@@ -2242,7 +2211,6 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
// already have one, otherwise create a new one.
FoldingSetNodeID ID;
ID.AddInteger(scSMaxExpr);
- ID.AddInteger(Ops.size());
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
void *IP = 0;
@@ -2347,7 +2315,6 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
// already have one, otherwise create a new one.
FoldingSetNodeID ID;
ID.AddInteger(scUMaxExpr);
- ID.AddInteger(Ops.size());
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
void *IP = 0;
@@ -2543,24 +2510,24 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
return getMinusSCEV(AllOnes, V);
}
-/// getMinusSCEV - Return a SCEV corresponding to LHS - RHS.
-///
-const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS,
- const SCEV *RHS) {
+/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1,
+/// and thus the HasNUW and HasNSW bits apply to the resultant add, not
+/// whether the sub would have overflowed.
+const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
+ bool HasNUW, bool HasNSW) {
// Fast path: X - X --> 0.
if (LHS == RHS)
return getConstant(LHS->getType(), 0);
// X - Y --> X + -Y
- return getAddExpr(LHS, getNegativeSCEV(RHS));
+ return getAddExpr(LHS, getNegativeSCEV(RHS), HasNUW, HasNSW);
}
/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
/// input value to the specified type. If the type must be extended, it is zero
/// extended.
const SCEV *
-ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V,
- const Type *Ty) {
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) {
const Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
@@ -2714,9 +2681,11 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
ValueExprMapType::iterator It =
ValueExprMap.find(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
+ const SCEV *Old = It->second;
+
// Short-circuit the def-use traversal if the symbolic name
// ceases to appear in expressions.
- if (It->second != SymName && !It->second->hasOperand(SymName))
+ if (Old != SymName && !hasOperand(Old, SymName))
continue;
// SCEVUnknown for a PHI either means that it has an unrecognized
@@ -2727,9 +2696,9 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
// updates on its own when it gets to that point. In the third, we do
// want to forget the SCEVUnknown.
if (!isa<PHINode>(I) ||
- !isa<SCEVUnknown>(It->second) ||
- (I != PN && It->second == SymName)) {
- ValuesAtScopes.erase(It->second);
+ !isa<SCEVUnknown>(Old) ||
+ (I != PN && Old == SymName)) {
+ forgetMemoizedResults(Old);
ValueExprMap.erase(It);
}
}
@@ -2801,7 +2770,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// This is not a valid addrec if the step amount is varying each
// loop iteration, but is not itself an addrec in this loop.
- if (Accum->isLoopInvariant(L) ||
+ if (isLoopInvariant(Accum, L) ||
(isa<SCEVAddRecExpr>(Accum) &&
cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
bool HasNUW = false;
@@ -2814,6 +2783,23 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
HasNUW = true;
if (OBO->hasNoSignedWrap())
HasNSW = true;
+ } else if (const GEPOperator *GEP =
+ dyn_cast<GEPOperator>(BEValueV)) {
+ // If the increment is a GEP, then we know it won't perform a
+ // signed overflow, because the address space cannot be
+ // wrapped around.
+ //
+ // NOTE: This isn't strictly true, because you could have an
+ // object straddling the 2G address boundary in a 32-bit address
+ // space (for example). We really want to model this as a "has
+ // no signed/unsigned wrap" where the base pointer is treated as
+ // unsigned and the increment is known to not have signed
+ // wrapping.
+ //
+ // This is a highly theoretical concern though, and this is good
+ // enough for all cases we know of at this point. :)
+ //
+ HasNSW |= GEP->isInBounds();
}
const SCEV *StartVal = getSCEV(StartValueV);
@@ -2822,7 +2808,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// Since the no-wrap flags are on the increment, they apply to the
// post-incremented value as well.
- if (Accum->isLoopInvariant(L))
+ if (isLoopInvariant(Accum, L))
(void)getAddRecExpr(getAddExpr(StartVal, Accum),
Accum, L, HasNUW, HasNSW);
@@ -2867,17 +2853,9 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
- if (Value *V = PN->hasConstantValue(DT)) {
- bool AllSameLoop = true;
- Loop *PNLoop = LI->getLoopFor(PN->getParent());
- for (size_t i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (LI->getLoopFor(PN->getIncomingBlock(i)) != PNLoop) {
- AllSameLoop = false;
- break;
- }
- if (AllSameLoop)
+ if (Value *V = SimplifyInstruction(PN, TD, DT))
+ if (LI->replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
- }
// If it's not a loop phi, we can't handle it yet.
return getUnknown(PN);
@@ -2892,6 +2870,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
// Add expression, because the Instruction may be guarded by control flow
// and the no-overflow bits may not be valid for the expression in any
// context.
+ bool isInBounds = GEP->isInBounds();
const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
Value *Base = GEP->getOperand(0);
@@ -2920,7 +2899,8 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
// Multiply the index by the element size to compute the element offset.
- const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize);
+ const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, /*NUW*/ false,
+ /*NSW*/ isInBounds);
// Add the element offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, LocalOffset);
@@ -2931,7 +2911,8 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
const SCEV *BaseS = getSCEV(Base);
// Add the total offset from all the GEP indices to the base.
- return getAddExpr(BaseS, TotalOffset);
+ return getAddExpr(BaseS, TotalOffset, /*NUW*/ false,
+ /*NSW*/ isInBounds);
}
/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -3019,9 +3000,13 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
///
ConstantRange
ScalarEvolution::getUnsignedRange(const SCEV *S) {
+ // See if we've computed this range already.
+ DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
+ if (I != UnsignedRanges.end())
+ return I->second;
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
- return ConstantRange(C->getValue()->getValue());
+ return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
unsigned BitWidth = getTypeSizeInBits(S->getType());
ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
@@ -3038,49 +3023,52 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
ConstantRange X = getUnsignedRange(Add->getOperand(0));
for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
X = X.add(getUnsignedRange(Add->getOperand(i)));
- return ConservativeResult.intersectWith(X);
+ return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
ConstantRange X = getUnsignedRange(Mul->getOperand(0));
for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
- return ConservativeResult.intersectWith(X);
+ return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
}
if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
ConstantRange X = getUnsignedRange(SMax->getOperand(0));
for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
X = X.smax(getUnsignedRange(SMax->getOperand(i)));
- return ConservativeResult.intersectWith(X);
+ return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
}
if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
ConstantRange X = getUnsignedRange(UMax->getOperand(0));
for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
X = X.umax(getUnsignedRange(UMax->getOperand(i)));
- return ConservativeResult.intersectWith(X);
+ return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
}
if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
ConstantRange X = getUnsignedRange(UDiv->getLHS());
ConstantRange Y = getUnsignedRange(UDiv->getRHS());
- return ConservativeResult.intersectWith(X.udiv(Y));
+ return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
}
if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
ConstantRange X = getUnsignedRange(ZExt->getOperand());
- return ConservativeResult.intersectWith(X.zeroExtend(BitWidth));
+ return setUnsignedRange(ZExt,
+ ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
}
if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
ConstantRange X = getUnsignedRange(SExt->getOperand());
- return ConservativeResult.intersectWith(X.signExtend(BitWidth));
+ return setUnsignedRange(SExt,
+ ConservativeResult.intersectWith(X.signExtend(BitWidth)));
}
if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
ConstantRange X = getUnsignedRange(Trunc->getOperand());
- return ConservativeResult.intersectWith(X.truncate(BitWidth));
+ return setUnsignedRange(Trunc,
+ ConservativeResult.intersectWith(X.truncate(BitWidth)));
}
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
@@ -3120,19 +3108,20 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
ExtEndRange)
- return ConservativeResult;
+ return setUnsignedRange(AddRec, ConservativeResult);
APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
EndRange.getUnsignedMin());
APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
EndRange.getUnsignedMax());
if (Min.isMinValue() && Max.isMaxValue())
- return ConservativeResult;
- return ConservativeResult.intersectWith(ConstantRange(Min, Max+1));
+ return setUnsignedRange(AddRec, ConservativeResult);
+ return setUnsignedRange(AddRec,
+ ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
}
}
- return ConservativeResult;
+ return setUnsignedRange(AddRec, ConservativeResult);
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
@@ -3141,20 +3130,25 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
if (Ones == ~Zeros + 1)
- return ConservativeResult;
- return ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
+ return setUnsignedRange(U, ConservativeResult);
+ return setUnsignedRange(U,
+ ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
}
- return ConservativeResult;
+ return setUnsignedRange(S, ConservativeResult);
}
/// getSignedRange - Determine the signed range for a particular SCEV.
///
ConstantRange
ScalarEvolution::getSignedRange(const SCEV *S) {
+ // See if we've computed this range already.
+ DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
+ if (I != SignedRanges.end())
+ return I->second;
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
- return ConstantRange(C->getValue()->getValue());
+ return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
unsigned BitWidth = getTypeSizeInBits(S->getType());
ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
@@ -3171,49 +3165,52 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
ConstantRange X = getSignedRange(Add->getOperand(0));
for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
X = X.add(getSignedRange(Add->getOperand(i)));
- return ConservativeResult.intersectWith(X);
+ return setSignedRange(Add, ConservativeResult.intersectWith(X));
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
ConstantRange X = getSignedRange(Mul->getOperand(0));
for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
X = X.multiply(getSignedRange(Mul->getOperand(i)));
- return ConservativeResult.intersectWith(X);
+ return setSignedRange(Mul, ConservativeResult.intersectWith(X));
}
if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
ConstantRange X = getSignedRange(SMax->getOperand(0));
for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
X = X.smax(getSignedRange(SMax->getOperand(i)));
- return ConservativeResult.intersectWith(X);
+ return setSignedRange(SMax, ConservativeResult.intersectWith(X));
}
if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
ConstantRange X = getSignedRange(UMax->getOperand(0));
for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
X = X.umax(getSignedRange(UMax->getOperand(i)));
- return ConservativeResult.intersectWith(X);
+ return setSignedRange(UMax, ConservativeResult.intersectWith(X));
}
if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
ConstantRange X = getSignedRange(UDiv->getLHS());
ConstantRange Y = getSignedRange(UDiv->getRHS());
- return ConservativeResult.intersectWith(X.udiv(Y));
+ return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
}
if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
ConstantRange X = getSignedRange(ZExt->getOperand());
- return ConservativeResult.intersectWith(X.zeroExtend(BitWidth));
+ return setSignedRange(ZExt,
+ ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
}
if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
ConstantRange X = getSignedRange(SExt->getOperand());
- return ConservativeResult.intersectWith(X.signExtend(BitWidth));
+ return setSignedRange(SExt,
+ ConservativeResult.intersectWith(X.signExtend(BitWidth)));
}
if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
ConstantRange X = getSignedRange(Trunc->getOperand());
- return ConservativeResult.intersectWith(X.truncate(BitWidth));
+ return setSignedRange(Trunc,
+ ConservativeResult.intersectWith(X.truncate(BitWidth)));
}
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
@@ -3263,34 +3260,35 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
ExtEndRange)
- return ConservativeResult;
+ return setSignedRange(AddRec, ConservativeResult);
APInt Min = APIntOps::smin(StartRange.getSignedMin(),
EndRange.getSignedMin());
APInt Max = APIntOps::smax(StartRange.getSignedMax(),
EndRange.getSignedMax());
if (Min.isMinSignedValue() && Max.isMaxSignedValue())
- return ConservativeResult;
- return ConservativeResult.intersectWith(ConstantRange(Min, Max+1));
+ return setSignedRange(AddRec, ConservativeResult);
+ return setSignedRange(AddRec,
+ ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
}
}
- return ConservativeResult;
+ return setSignedRange(AddRec, ConservativeResult);
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
if (!U->getValue()->getType()->isIntegerTy() && !TD)
- return ConservativeResult;
+ return setSignedRange(U, ConservativeResult);
unsigned NS = ComputeNumSignBits(U->getValue(), TD);
if (NS == 1)
- return ConservativeResult;
- return ConservativeResult.intersectWith(
+ return setSignedRange(U, ConservativeResult);
+ return setSignedRange(U, ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
- APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1));
+ APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
}
- return ConservativeResult;
+ return setSignedRange(S, ConservativeResult);
}
/// createSCEV - We know that there is no SCEV for the specified value.
@@ -3458,8 +3456,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// If C is a single bit, it may be in the sign-bit position
// before the zero-extend. In this case, represent the xor
// using an add, which is equivalent, and re-apply the zext.
- APInt Trunc = APInt(CI->getValue()).trunc(Z0TySize);
- if (APInt(Trunc).zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
+ APInt Trunc = CI->getValue().trunc(Z0TySize);
+ if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
Trunc.isSignBit())
return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
UTy);
@@ -3699,58 +3697,61 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// backedge-taken count, which could result in infinite recursion.
std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
- if (Pair.second) {
- BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L);
- if (BECount.Exact != getCouldNotCompute()) {
- assert(BECount.Exact->isLoopInvariant(L) &&
- BECount.Max->isLoopInvariant(L) &&
- "Computed backedge-taken count isn't loop invariant for loop!");
- ++NumTripCountsComputed;
+ if (!Pair.second)
+ return Pair.first->second;
+ BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L);
+ if (BECount.Exact != getCouldNotCompute()) {
+ assert(isLoopInvariant(BECount.Exact, L) &&
+ isLoopInvariant(BECount.Max, L) &&
+ "Computed backedge-taken count isn't loop invariant for loop!");
+ ++NumTripCountsComputed;
+
+ // Update the value in the map.
+ Pair.first->second = BECount;
+ } else {
+ if (BECount.Max != getCouldNotCompute())
// Update the value in the map.
Pair.first->second = BECount;
- } else {
- if (BECount.Max != getCouldNotCompute())
- // Update the value in the map.
- Pair.first->second = BECount;
- if (isa<PHINode>(L->getHeader()->begin()))
- // Only count loops that have phi nodes as not being computable.
- ++NumTripCountsNotComputed;
- }
-
- // Now that we know more about the trip count for this loop, forget any
- // existing SCEV values for PHI nodes in this loop since they are only
- // conservative estimates made without the benefit of trip count
- // information. This is similar to the code in forgetLoop, except that
- // it handles SCEVUnknown PHI nodes specially.
- if (BECount.hasAnyInfo()) {
- SmallVector<Instruction *, 16> Worklist;
- PushLoopPHIs(L, Worklist);
-
- SmallPtrSet<Instruction *, 8> Visited;
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I)) continue;
-
- ValueExprMapType::iterator It =
- ValueExprMap.find(static_cast<Value *>(I));
- if (It != ValueExprMap.end()) {
- // SCEVUnknown for a PHI either means that it has an unrecognized
- // structure, or it's a PHI that's in the progress of being computed
- // by createNodeForPHI. In the former case, additional loop trip
- // count information isn't going to change anything. In the later
- // case, createNodeForPHI will perform the necessary updates on its
- // own when it gets to that point.
- if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) {
- ValuesAtScopes.erase(It->second);
- ValueExprMap.erase(It);
- }
- if (PHINode *PN = dyn_cast<PHINode>(I))
- ConstantEvolutionLoopExitValue.erase(PN);
+ if (isa<PHINode>(L->getHeader()->begin()))
+ // Only count loops that have phi nodes as not being computable.
+ ++NumTripCountsNotComputed;
+ }
+
+ // Now that we know more about the trip count for this loop, forget any
+ // existing SCEV values for PHI nodes in this loop since they are only
+ // conservative estimates made without the benefit of trip count
+ // information. This is similar to the code in forgetLoop, except that
+ // it handles SCEVUnknown PHI nodes specially.
+ if (BECount.hasAnyInfo()) {
+ SmallVector<Instruction *, 16> Worklist;
+ PushLoopPHIs(L, Worklist);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ ValueExprMapType::iterator It =
+ ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ const SCEV *Old = It->second;
+
+ // SCEVUnknown for a PHI either means that it has an unrecognized
+ // structure, or it's a PHI that's in the progress of being computed
+ // by createNodeForPHI. In the former case, additional loop trip
+ // count information isn't going to change anything. In the later
+ // case, createNodeForPHI will perform the necessary updates on its
+ // own when it gets to that point.
+ if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
+ forgetMemoizedResults(Old);
+ ValueExprMap.erase(It);
}
-
- PushDefUseChildren(I, Worklist);
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
}
+
+ PushDefUseChildren(I, Worklist);
}
}
return Pair.first->second;
@@ -3774,7 +3775,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
- ValuesAtScopes.erase(It->second);
+ forgetMemoizedResults(It->second);
ValueExprMap.erase(It);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
@@ -3782,6 +3783,11 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
PushDefUseChildren(I, Worklist);
}
+
+ // Forget all contained loops too, to avoid dangling entries in the
+ // ValuesAtScopes map.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ forgetLoop(*I);
}
/// forgetValue - This method should be called by the client when it has
@@ -3802,7 +3808,7 @@ void ScalarEvolution::forgetValue(Value *V) {
ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
- ValuesAtScopes.erase(It->second);
+ forgetMemoizedResults(It->second);
ValueExprMap.erase(It);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
@@ -4016,6 +4022,105 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
}
+static const SCEVAddRecExpr *
+isSimpleUnwrappingAddRec(const SCEV *S, const Loop *L) {
+ const SCEVAddRecExpr *SA = dyn_cast<SCEVAddRecExpr>(S);
+
+ // The SCEV must be an addrec of this loop.
+ if (!SA || SA->getLoop() != L || !SA->isAffine())
+ return 0;
+
+ // The SCEV must be known to not wrap in some way to be interesting.
+ if (!SA->hasNoUnsignedWrap() && !SA->hasNoSignedWrap())
+ return 0;
+
+ // The stride must be a constant so that we know if it is striding up or down.
+ if (!isa<SCEVConstant>(SA->getOperand(1)))
+ return 0;
+ return SA;
+}
+
+/// getMinusSCEVForExitTest - When considering an exit test for a loop with a
+/// "x != y" exit test, we turn this into a computation that evaluates x-y != 0,
+/// and this function returns the expression to use for x-y. We know and take
+/// advantage of the fact that this subtraction is only being used in a
+/// comparison by zero context.
+///
+static const SCEV *getMinusSCEVForExitTest(const SCEV *LHS, const SCEV *RHS,
+ const Loop *L, ScalarEvolution &SE) {
+ // If either LHS or RHS is an AddRec SCEV (of this loop) that is known to not
+ // wrap (either NSW or NUW), then we know that the value will either become
+ // the other one (and thus the loop terminates), that the loop will terminate
+ // through some other exit condition first, or that the loop has undefined
+ // behavior. This information is useful when the addrec has a stride that is
+ // != 1 or -1, because it means we can't "miss" the exit value.
+ //
+ // In any of these three cases, it is safe to turn the exit condition into a
+ // "counting down" AddRec (to zero) by subtracting the two inputs as normal,
+ // but since we know that the "end cannot be missed" we can force the
+ // resulting AddRec to be a NUW addrec. Since it is counting down, this means
+ // that the AddRec *cannot* pass zero.
+
+ // See if LHS and RHS are addrec's we can handle.
+ const SCEVAddRecExpr *LHSA = isSimpleUnwrappingAddRec(LHS, L);
+ const SCEVAddRecExpr *RHSA = isSimpleUnwrappingAddRec(RHS, L);
+
+ // If neither addrec is interesting, just return a minus.
+ if (RHSA == 0 && LHSA == 0)
+ return SE.getMinusSCEV(LHS, RHS);
+
+ // If only one of LHS and RHS are an AddRec of this loop, make sure it is LHS.
+ if (RHSA && LHSA == 0) {
+ // Safe because a-b === b-a for comparisons against zero.
+ std::swap(LHS, RHS);
+ std::swap(LHSA, RHSA);
+ }
+
+ // Handle the case when only one is advancing in a non-overflowing way.
+ if (RHSA == 0) {
+ // If RHS is loop varying, then we can't predict when LHS will cross it.
+ if (!SE.isLoopInvariant(RHS, L))
+ return SE.getMinusSCEV(LHS, RHS);
+
+ // If LHS has a positive stride, then we compute RHS-LHS, because the loop
+ // is counting up until it crosses RHS (which must be larger than LHS). If
+ // it is negative, we compute LHS-RHS because we're counting down to RHS.
+ const ConstantInt *Stride =
+ cast<SCEVConstant>(LHSA->getOperand(1))->getValue();
+ if (Stride->getValue().isNegative())
+ std::swap(LHS, RHS);
+
+ return SE.getMinusSCEV(RHS, LHS, true /*HasNUW*/);
+ }
+
+ // If both LHS and RHS are interesting, we have something like:
+ // a+i*4 != b+i*8.
+ const ConstantInt *LHSStride =
+ cast<SCEVConstant>(LHSA->getOperand(1))->getValue();
+ const ConstantInt *RHSStride =
+ cast<SCEVConstant>(RHSA->getOperand(1))->getValue();
+
+ // If the strides are equal, then this is just a (complex) loop invariant
+ // comparison of a and b.
+ if (LHSStride == RHSStride)
+ return SE.getMinusSCEV(LHSA->getStart(), RHSA->getStart());
+
+ // If the signs of the strides differ, then the negative stride is counting
+ // down to the positive stride.
+ if (LHSStride->getValue().isNegative() != RHSStride->getValue().isNegative()){
+ if (RHSStride->getValue().isNegative())
+ std::swap(LHS, RHS);
+ } else {
+ // If LHS's stride is smaller than RHS's stride, then "b" must be less than
+ // "a" and "b" is RHS is counting up (catching up) to LHS. This is true
+ // whether the strides are positive or negative.
+ if (RHSStride->getValue().slt(LHSStride->getValue()))
+ std::swap(LHS, RHS);
+ }
+
+ return SE.getMinusSCEV(LHS, RHS, true /*HasNUW*/);
+}
+
/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
/// backedge of the specified loop will execute if its exit condition
/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
@@ -4050,7 +4155,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
// At this point, we would like to compute how many iterations of the
// loop the predicate will return true for these inputs.
- if (LHS->isLoopInvariant(L) && !RHS->isLoopInvariant(L)) {
+ if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
// If there is a loop-invariant, force it into the RHS.
std::swap(LHS, RHS);
Cond = ICmpInst::getSwappedPredicate(Cond);
@@ -4075,7 +4180,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
switch (Cond) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
- BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+ BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEVForExitTest(LHS, RHS, L,
+ *this), L);
if (BTI.hasAnyInfo()) return BTI;
break;
}
@@ -4212,7 +4318,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
// We can only recognize very limited forms of loop index expressions, in
// particular, only affine AddRec's like {C1,+,C2}.
const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
- if (!IdxExpr || !IdxExpr->isAffine() || IdxExpr->isLoopInvariant(L) ||
+ if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
!isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
!isa<SCEVConstant>(IdxExpr->getOperand(1)))
return getCouldNotCompute();
@@ -4686,7 +4792,7 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
// bit width during computations.
APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D
APInt Mod(BW + 1, 0);
- Mod.set(BW - Mult2); // Mod = N / D
+ Mod.setBit(BW - Mult2); // Mod = N / D
APInt I = AD.multiplicativeInverse(Mod);
// 4. Compute the minimum unsigned root of the equation:
@@ -4778,58 +4884,26 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
if (!AddRec || AddRec->getLoop() != L)
return getCouldNotCompute();
- if (AddRec->isAffine()) {
- // If this is an affine expression, the execution count of this branch is
- // the minimum unsigned root of the following equation:
- //
- // Start + Step*N = 0 (mod 2^BW)
- //
- // equivalent to:
- //
- // Step*N = -Start (mod 2^BW)
- //
- // where BW is the common bit width of Start and Step.
-
- // Get the initial value for the loop.
- const SCEV *Start = getSCEVAtScope(AddRec->getStart(),
- L->getParentLoop());
- const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1),
- L->getParentLoop());
-
- if (const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step)) {
- // For now we handle only constant steps.
-
- // First, handle unitary steps.
- if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so:
- return getNegativeSCEV(Start); // N = -Start (as unsigned)
- if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so:
- return Start; // N = Start (as unsigned)
-
- // Then, try to solve the above equation provided that Start is constant.
- if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
- return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
- -StartC->getValue()->getValue(),
- *this);
- }
- } else if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
- // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
- // the quadratic equation to solve it.
- std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec,
- *this);
+ // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
+ // the quadratic equation to solve it.
+ if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
+ std::pair<const SCEV *,const SCEV *> Roots =
+ SolveQuadraticEquation(AddRec, *this);
const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
- if (R1) {
+ if (R1 && R2) {
#if 0
dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
<< " sol#2: " << *R2 << "\n";
#endif
// Pick the smallest positive root value.
if (ConstantInt *CB =
- dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
- R1->getValue(), R2->getValue()))) {
+ dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
+ R1->getValue(),
+ R2->getValue()))) {
if (CB->getZExtValue() == false)
std::swap(R1, R2); // R1 is the minimum root now.
-
+
// We can only use this value if the chrec ends up with an exact zero
// value at this index. When solving for "X*X != 5", for example, we
// should not accept a root of 2.
@@ -4838,8 +4912,54 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
return R1; // We found a quadratic root!
}
}
+ return getCouldNotCompute();
}
+ // Otherwise we can only handle this if it is affine.
+ if (!AddRec->isAffine())
+ return getCouldNotCompute();
+
+ // If this is an affine expression, the execution count of this branch is
+ // the minimum unsigned root of the following equation:
+ //
+ // Start + Step*N = 0 (mod 2^BW)
+ //
+ // equivalent to:
+ //
+ // Step*N = -Start (mod 2^BW)
+ //
+ // where BW is the common bit width of Start and Step.
+
+ // Get the initial value for the loop.
+ const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
+ const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
+
+ // If the AddRec is NUW, then (in an unsigned sense) it cannot be counting up
+ // to wrap to 0, it must be counting down to equal 0. Also, while counting
+ // down, it cannot "miss" 0 (which would cause it to wrap), regardless of what
+ // the stride is. As such, NUW addrec's will always become zero in
+ // "start / -stride" steps, and we know that the division is exact.
+ if (AddRec->hasNoUnsignedWrap())
+ // FIXME: We really want an "isexact" bit for udiv.
+ return getUDivExpr(Start, getNegativeSCEV(Step));
+
+ // For now we handle only constant steps.
+ const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
+ if (StepC == 0)
+ return getCouldNotCompute();
+
+ // First, handle unitary steps.
+ if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so:
+ return getNegativeSCEV(Start); // N = -Start (as unsigned)
+
+ if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so:
+ return Start; // N = Start (as unsigned)
+
+ // Then, try to solve the above equation provided that Start is constant.
+ if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
+ return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
+ -StartC->getValue()->getValue(),
+ *this);
return getCouldNotCompute();
}
@@ -4939,7 +5059,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
// as both operands could be addrecs loop-invariant in each other's loop.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
const Loop *L = AR->getLoop();
- if (LHS->isLoopInvariant(L) && LHS->properlyDominates(L->getHeader(), DT)) {
+ if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
std::swap(LHS, RHS);
Pred = ICmpInst::getSwappedPredicate(Pred);
Changed = true;
@@ -5159,13 +5279,13 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
trivially_true:
// Return 0 == 0.
- LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0);
+ LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
Pred = ICmpInst::ICMP_EQ;
return true;
trivially_false:
// Return 0 != 0.
- LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0);
+ LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
Pred = ICmpInst::ICMP_NE;
return true;
}
@@ -5556,7 +5676,7 @@ ScalarEvolution::BackedgeTakenInfo
ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool isSigned) {
// Only handle: "ADDREC < LoopInvariant".
- if (!RHS->isLoopInvariant(L)) return getCouldNotCompute();
+ if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
if (!AddRec || AddRec->getLoop() != L)
@@ -5836,6 +5956,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
ScalarEvolution::ScalarEvolution()
: FunctionPass(ID), FirstUnknown(0) {
+ initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
}
bool ScalarEvolution::runOnFunction(Function &F) {
@@ -5857,6 +5978,10 @@ void ScalarEvolution::releaseMemory() {
BackedgeTakenCounts.clear();
ConstantEvolutionLoopExitValue.clear();
ValuesAtScopes.clear();
+ LoopDispositions.clear();
+ BlockDispositions.clear();
+ UnsignedRanges.clear();
+ SignedRanges.clear();
UniqueSCEVs.clear();
SCEVAllocator.Reset();
}
@@ -5936,7 +6061,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
if (L) {
OS << "\t\t" "Exits: ";
const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
- if (!ExitValue->isLoopInvariant(L)) {
+ if (!SE.isLoopInvariant(ExitValue, L)) {
OS << "<<Unknown>>";
} else {
OS << *ExitValue;
@@ -5953,3 +6078,240 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
PrintLoopInfo(OS, &SE, *I);
}
+ScalarEvolution::LoopDisposition
+ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
+ std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
+ std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
+ Values.insert(std::make_pair(L, LoopVariant));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ LoopDisposition D = computeLoopDisposition(S, L);
+ return LoopDispositions[S][L] = D;
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return LoopInvariant;
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+
+ // If L is the addrec's loop, it's computable.
+ if (AR->getLoop() == L)
+ return LoopComputable;
+
+ // Add recurrences are never invariant in the function-body (null loop).
+ if (!L)
+ return LoopVariant;
+
+ // This recurrence is variant w.r.t. L if L contains AR's loop.
+ if (L->contains(AR->getLoop()))
+ return LoopVariant;
+
+ // This recurrence is invariant w.r.t. L if AR's loop contains L.
+ if (AR->getLoop()->contains(L))
+ return LoopInvariant;
+
+ // This recurrence is variant w.r.t. L if any of its operands
+ // are variant.
+ for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+ I != E; ++I)
+ if (!isLoopInvariant(*I, L))
+ return LoopVariant;
+
+ // Otherwise it's loop-invariant.
+ return LoopInvariant;
+ }
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+ bool HasVarying = false;
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ LoopDisposition D = getLoopDisposition(*I, L);
+ if (D == LoopVariant)
+ return LoopVariant;
+ if (D == LoopComputable)
+ HasVarying = true;
+ }
+ return HasVarying ? LoopComputable : LoopInvariant;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+ LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
+ if (LD == LoopVariant)
+ return LoopVariant;
+ LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
+ if (RD == LoopVariant)
+ return LoopVariant;
+ return (LD == LoopInvariant && RD == LoopInvariant) ?
+ LoopInvariant : LoopComputable;
+ }
+ case scUnknown:
+ // All non-instruction values are loop invariant. All instructions are loop
+ // invariant if they are not contained in the specified loop.
+ // Instructions are never considered invariant in the function body
+ // (null loop) because they are defined within the "loop".
+ if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
+ return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
+ return LoopInvariant;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ return LoopVariant;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ return LoopVariant;
+}
+
+bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
+ return getLoopDisposition(S, L) == LoopInvariant;
+}
+
+bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
+ return getLoopDisposition(S, L) == LoopComputable;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+ std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
+ std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
+ Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ BlockDisposition D = computeBlockDisposition(S, BB);
+ return BlockDispositions[S][BB] = D;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return ProperlyDominatesBlock;
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
+ case scAddRecExpr: {
+ // This uses a "dominates" query instead of "properly dominates" query
+ // to test for proper dominance too, because the instruction which
+ // produces the addrec's value is a PHI, and a PHI effectively properly
+ // dominates its entire containing block.
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+ if (!DT->dominates(AR->getLoop()->getHeader(), BB))
+ return DoesNotDominateBlock;
+ }
+ // FALL THROUGH into SCEVNAryExpr handling.
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+ bool Proper = true;
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ BlockDisposition D = getBlockDisposition(*I, BB);
+ if (D == DoesNotDominateBlock)
+ return DoesNotDominateBlock;
+ if (D == DominatesBlock)
+ Proper = false;
+ }
+ return Proper ? ProperlyDominatesBlock : DominatesBlock;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+ const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+ BlockDisposition LD = getBlockDisposition(LHS, BB);
+ if (LD == DoesNotDominateBlock)
+ return DoesNotDominateBlock;
+ BlockDisposition RD = getBlockDisposition(RHS, BB);
+ if (RD == DoesNotDominateBlock)
+ return DoesNotDominateBlock;
+ return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
+ ProperlyDominatesBlock : DominatesBlock;
+ }
+ case scUnknown:
+ if (Instruction *I =
+ dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
+ if (I->getParent() == BB)
+ return DominatesBlock;
+ if (DT->properlyDominates(I->getParent(), BB))
+ return ProperlyDominatesBlock;
+ return DoesNotDominateBlock;
+ }
+ return ProperlyDominatesBlock;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ return DoesNotDominateBlock;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ return DoesNotDominateBlock;
+}
+
+bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
+ return getBlockDisposition(S, BB) >= DominatesBlock;
+}
+
+bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
+ return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
+}
+
+bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+ switch (S->getSCEVType()) {
+ case scConstant:
+ return false;
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend: {
+ const SCEVCastExpr *Cast = cast<SCEVCastExpr>(S);
+ const SCEV *CastOp = Cast->getOperand();
+ return Op == CastOp || hasOperand(CastOp, Op);
+ }
+ case scAddRecExpr:
+ case scAddExpr:
+ case scMulExpr:
+ case scUMaxExpr:
+ case scSMaxExpr: {
+ const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ const SCEV *NAryOp = *I;
+ if (NAryOp == Op || hasOperand(NAryOp, Op))
+ return true;
+ }
+ return false;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+ const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+ return LHS == Op || hasOperand(LHS, Op) ||
+ RHS == Op || hasOperand(RHS, Op);
+ }
+ case scUnknown:
+ return false;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ return false;
+ default: break;
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+ return false;
+}
+
+void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
+ ValuesAtScopes.erase(S);
+ LoopDispositions.erase(S);
+ BlockDispositions.erase(S);
+ UnsignedRanges.erase(S);
+ SignedRanges.erase(S);
+}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 93b2a8b..e9edb3e 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -34,7 +34,10 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
- ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {}
+ ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {
+ initializeScalarEvolutionAliasAnalysisPass(
+ *PassRegistry::getPassRegistry());
+ }
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
@@ -49,8 +52,7 @@ namespace {
private:
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual bool runOnFunction(Function &F);
- virtual AliasResult alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size);
+ virtual AliasResult alias(const Location &LocA, const Location &LocB);
Value *GetBaseValue(const SCEV *S);
};
@@ -58,8 +60,11 @@ namespace {
// Register this pass...
char ScalarEvolutionAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
- "ScalarEvolution-based Alias Analysis", false, true, false);
+INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true, false)
FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
return new ScalarEvolutionAliasAnalysis();
@@ -101,17 +106,17 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
}
AliasAnalysis::AliasResult
-ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
- const Value *B, unsigned BSize) {
+ScalarEvolutionAliasAnalysis::alias(const Location &LocA,
+ const Location &LocB) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are. This allows the code below to ignore this special
// case.
- if (ASize == 0 || BSize == 0)
+ if (LocA.Size == 0 || LocB.Size == 0)
return NoAlias;
// This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
- const SCEV *AS = SE->getSCEV(const_cast<Value *>(A));
- const SCEV *BS = SE->getSCEV(const_cast<Value *>(B));
+ const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr));
+ const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr));
// If they evaluate to the same expression, it's a MustAlias.
if (AS == BS) return MustAlias;
@@ -121,8 +126,8 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
if (SE->getEffectiveSCEVType(AS->getType()) ==
SE->getEffectiveSCEVType(BS->getType())) {
unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
- APInt ASizeInt(BitWidth, ASize);
- APInt BSizeInt(BitWidth, BSize);
+ APInt ASizeInt(BitWidth, LocA.Size);
+ APInt BSizeInt(BitWidth, LocB.Size);
// Compute the difference between the two pointers.
const SCEV *BA = SE->getMinusSCEV(BS, AS);
@@ -154,11 +159,15 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
// inttoptr and ptrtoint operators.
Value *AO = GetBaseValue(AS);
Value *BO = GetBaseValue(BS);
- if ((AO && AO != A) || (BO && BO != B))
- if (alias(AO ? AO : A, AO ? UnknownSize : ASize,
- BO ? BO : B, BO ? UnknownSize : BSize) == NoAlias)
+ if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
+ if (alias(Location(AO ? AO : LocA.Ptr,
+ AO ? +UnknownSize : LocA.Size,
+ AO ? 0 : LocA.TBAATag),
+ Location(BO ? BO : LocB.Ptr,
+ BO ? +UnknownSize : LocB.Size,
+ BO ? 0 : LocB.TBAATag)) == NoAlias)
return NoAlias;
// Forward the query to the next analysis.
- return AliasAnalysis::alias(A, ASize, B, BSize);
+ return AliasAnalysis::alias(LocA, LocB);
}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index 66a06ae..b7c110f 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -608,15 +608,22 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
return A; // Arbitrarily break the tie.
}
-/// GetRelevantLoop - Get the most relevant loop associated with the given
+/// getRelevantLoop - Get the most relevant loop associated with the given
/// expression, according to PickMostRelevantLoop.
-static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI,
- DominatorTree &DT) {
+const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
+ // Test whether we've already computed the most relevant loop for this SCEV.
+ std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
+ RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0)));
+ if (!Pair.second)
+ return Pair.first->second;
+
if (isa<SCEVConstant>(S))
+ // A constant has no relevant loops.
return 0;
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
- return LI.getLoopFor(I->getParent());
+ return Pair.first->second = SE.LI->getLoopFor(I->getParent());
+ // A non-instruction has no relevant loops.
return 0;
}
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
@@ -625,16 +632,22 @@ static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI,
L = AR->getLoop();
for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
I != E; ++I)
- L = PickMostRelevantLoop(L, GetRelevantLoop(*I, LI, DT), DT);
- return L;
+ L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT);
+ return RelevantLoops[N] = L;
+ }
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
+ const Loop *Result = getRelevantLoop(C->getOperand());
+ return RelevantLoops[C] = Result;
+ }
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+ const Loop *Result =
+ PickMostRelevantLoop(getRelevantLoop(D->getLHS()),
+ getRelevantLoop(D->getRHS()),
+ *SE.DT);
+ return RelevantLoops[D] = Result;
}
- if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
- return GetRelevantLoop(C->getOperand(), LI, DT);
- if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S))
- return PickMostRelevantLoop(GetRelevantLoop(D->getLHS(), LI, DT),
- GetRelevantLoop(D->getRHS(), LI, DT),
- DT);
llvm_unreachable("Unexpected SCEV type!");
+ return 0;
}
namespace {
@@ -682,8 +695,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
E(S->op_begin()); I != E; ++I)
- OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT),
- *I));
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
// Sort by loop. Use a stable sort so that constants follow non-constants and
// pointer operands precede non-pointer operands.
@@ -752,8 +764,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
E(S->op_begin()); I != E; ++I)
- OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT),
- *I));
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
// Sort by loop. Use a stable sort so that constants follow non-constants.
std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
@@ -990,7 +1001,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Strip off any non-loop-dominating component from the addrec start.
const SCEV *Start = Normalized->getStart();
const SCEV *PostLoopOffset = 0;
- if (!Start->properlyDominates(L->getHeader(), SE.DT)) {
+ if (!SE.properlyDominates(Start, L->getHeader())) {
PostLoopOffset = Start;
Start = SE.getConstant(Normalized->getType(), 0);
Normalized =
@@ -1002,7 +1013,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Strip off any non-loop-dominating component from the addrec step.
const SCEV *Step = Normalized->getStepRecurrence(SE);
const SCEV *PostLoopScale = 0;
- if (!Step->dominates(L->getHeader(), SE.DT)) {
+ if (!SE.dominates(Step, L->getHeader())) {
PostLoopScale = Step;
Step = SE.getConstant(Normalized->getType(), 1);
Normalized =
@@ -1278,7 +1289,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
Instruction *InsertPt = Builder.GetInsertPoint();
for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
L = L->getParentLoop())
- if (S->isLoopInvariant(L)) {
+ if (SE.isLoopInvariant(S, L)) {
if (!L) break;
if (BasicBlock *Preheader = L->getLoopPreheader())
InsertPt = Preheader->getTerminator();
@@ -1286,7 +1297,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
- if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L))
+ if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
InsertPt = L->getHeader()->getFirstNonPHI();
while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index bbfdcec..40e18ab 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -12,29 +12,65 @@
//
// In LLVM IR, memory does not have types, so LLVM's own type system is not
// suitable for doing TBAA. Instead, metadata is added to the IR to describe
-// a type system of a higher level language.
+// a type system of a higher level language. This can be used to implement
+// typical C/C++ TBAA, but it can also be used to implement custom alias
+// analysis behavior for other languages.
//
-// This pass is language-independent. The type system is encoded in
-// metadata. This allows this pass to support typical C and C++ TBAA, but
-// it can also support custom aliasing behavior for other languages.
+// The current metadata format is very simple. TBAA MDNodes have up to
+// three fields, e.g.:
+// !0 = metadata !{ metadata !"an example type tree" }
+// !1 = metadata !{ metadata !"int", metadata !0 }
+// !2 = metadata !{ metadata !"float", metadata !0 }
+// !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
//
-// This is a work-in-progress. It doesn't work yet, and the metadata
-// format isn't stable.
+// The first field is an identity field. It can be any value, usually
+// an MDString, which uniquely identifies the type. The most important
+// name in the tree is the name of the root node. Two trees with
+// different root node names are entirely disjoint, even if they
+// have leaves with common names.
//
-// TODO: getModRefBehavior. The AliasAnalysis infrastructure will need to
-// be extended.
-// TODO: AA chaining
-// TODO: struct fields
+// The second field identifies the type's parent node in the tree, or
+// is null or omitted for a root node. A type is considered to alias
+// all of its decendents and all of its ancestors in the tree. Also,
+// a type is considered to alias all types in other trees, so that
+// bitcode produced from multiple front-ends is handled conservatively.
+//
+// If the third field is present, it's an integer which if equal to 1
+// indicates that the type is "constant" (meaning pointsToConstantMemory
+// should return true; see
+// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
+//
+// TODO: The current metadata format doesn't support struct
+// fields. For example:
+// struct X {
+// double d;
+// int i;
+// };
+// void foo(struct X *x, struct X *y, double *p) {
+// *x = *y;
+// *p = 0.0;
+// }
+// Struct X has a double member, so the store to *x can alias the store to *p.
+// Currently it's not possible to precisely describe all the things struct X
+// aliases, so struct assignments must use conservative TBAA nodes. There's
+// no scheme for attaching metadata to @llvm.memcpy yet either.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Metadata.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+// A handy option for disabling TBAA functionality. The same effect can also be
+// achieved by stripping the !tbaa tags from IR, but this option is sometimes
+// more convenient.
+static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+
namespace {
/// TBAANode - This is a simple wrapper around an MDNode which provides a
/// higher-level interface by hiding the details of how alias analysis
@@ -44,16 +80,16 @@ namespace {
public:
TBAANode() : Node(0) {}
- explicit TBAANode(MDNode *N) : Node(N) {}
+ explicit TBAANode(const MDNode *N) : Node(N) {}
/// getNode - Get the MDNode for this TBAANode.
const MDNode *getNode() const { return Node; }
- /// getParent - Get this TBAANode's Alias DAG parent.
+ /// getParent - Get this TBAANode's Alias tree parent.
TBAANode getParent() const {
if (Node->getNumOperands() < 2)
return TBAANode();
- MDNode *P = dyn_cast<MDNode>(Node->getOperand(1));
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
if (!P)
return TBAANode();
// Ok, this node has a valid parent. Return it.
@@ -69,8 +105,7 @@ namespace {
ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
if (!CI)
return false;
- // TODO: Think about the encoding.
- return CI->isOne();
+ return CI->getValue()[0];
}
};
}
@@ -82,7 +117,13 @@ namespace {
public AliasAnalysis {
public:
static char ID; // Class identification, replacement for typeinfo
- TypeBasedAliasAnalysis() : ImmutablePass(ID) {}
+ TypeBasedAliasAnalysis() : ImmutablePass(ID) {
+ initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ InitializeAliasAnalysis(this);
+ }
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
@@ -94,18 +135,25 @@ namespace {
return this;
}
+ bool Aliases(const MDNode *A, const MDNode *B) const;
+
private:
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual AliasResult alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size);
- virtual bool pointsToConstantMemory(const Value *P);
+ virtual AliasResult alias(const Location &LocA, const Location &LocB);
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2);
};
} // End of anonymous namespace
// Register this pass...
char TypeBasedAliasAnalysis::ID = 0;
INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
- "Type-Based Alias Analysis", false, true, false);
+ "Type-Based Alias Analysis", false, true, false)
ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
return new TypeBasedAliasAnalysis();
@@ -117,34 +165,19 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AliasAnalysis::getAnalysisUsage(AU);
}
-AliasAnalysis::AliasResult
-TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize,
- const Value *B, unsigned BSize) {
- // Currently, metadata can only be attached to Instructions.
- const Instruction *AI = dyn_cast<Instruction>(A);
- if (!AI) return MayAlias;
- const Instruction *BI = dyn_cast<Instruction>(B);
- if (!BI) return MayAlias;
-
- // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
- // be conservative.
- MDNode *AM =
- AI->getMetadata(AI->getParent()->getParent()->getParent()
- ->getMDKindID("tbaa"));
- if (!AM) return MayAlias;
- MDNode *BM =
- BI->getMetadata(BI->getParent()->getParent()->getParent()
- ->getMDKindID("tbaa"));
- if (!BM) return MayAlias;
-
+/// Aliases - Test whether the type represented by A may alias the
+/// type represented by B.
+bool
+TypeBasedAliasAnalysis::Aliases(const MDNode *A,
+ const MDNode *B) const {
// Keep track of the root node for A and B.
TBAANode RootA, RootB;
- // Climb the DAG from A to see if we reach B.
- for (TBAANode T(AM); ; ) {
- if (T.getNode() == BM)
+ // Climb the tree from A to see if we reach B.
+ for (TBAANode T(A); ; ) {
+ if (T.getNode() == B)
// B is an ancestor of A.
- return MayAlias;
+ return true;
RootA = T;
T = T.getParent();
@@ -152,11 +185,11 @@ TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize,
break;
}
- // Climb the DAG from B to see if we reach A.
- for (TBAANode T(BM); ; ) {
- if (T.getNode() == AM)
+ // Climb the tree from B to see if we reach A.
+ for (TBAANode T(B); ; ) {
+ if (T.getNode() == A)
// A is an ancestor of B.
- return MayAlias;
+ return true;
RootB = T;
T = T.getParent();
@@ -166,26 +199,101 @@ TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize,
// Neither node is an ancestor of the other.
- // If they have the same root, then we've proved there's no alias.
- if (RootA.getNode() == RootB.getNode())
- return NoAlias;
-
// If they have different roots, they're part of different potentially
// unrelated type systems, so we must be conservative.
- return MayAlias;
+ if (RootA.getNode() != RootB.getNode())
+ return true;
+
+ // If they have the same root, then we've proved there's no alias.
+ return false;
+}
+
+AliasAnalysis::AliasResult
+TypeBasedAliasAnalysis::alias(const Location &LocA,
+ const Location &LocB) {
+ if (!EnableTBAA)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
+ // be conservative.
+ const MDNode *AM = LocA.TBAATag;
+ if (!AM) return AliasAnalysis::alias(LocA, LocB);
+ const MDNode *BM = LocB.TBAATag;
+ if (!BM) return AliasAnalysis::alias(LocA, LocB);
+
+ // If they may alias, chain to the next AliasAnalysis.
+ if (Aliases(AM, BM))
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // Otherwise return a definitive result.
+ return NoAlias;
}
-bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Value *P) {
- // Currently, metadata can only be attached to Instructions.
- const Instruction *I = dyn_cast<Instruction>(P);
- if (!I) return false;
+bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ if (!EnableTBAA)
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
- MDNode *M =
- I->getMetadata(I->getParent()->getParent()->getParent()
- ->getMDKindID("tbaa"));
- if (!M) return false;
+ const MDNode *M = Loc.TBAATag;
+ if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
// If this is an "immutable" type, we can assume the pointer is pointing
// to constant memory.
- return TBAANode(M).TypeIsImmutable();
+ if (TBAANode(M).TypeIsImmutable())
+ return true;
+
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefBehavior(CS);
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If this is an "immutable" type, we can assume the call doesn't write
+ // to memory.
+ if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (TBAANode(M).TypeIsImmutable())
+ Min = OnlyReadsMemory;
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
+ // Functions don't have metadata. Just chain to the next implementation.
+ return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+
+ if (const MDNode *L = Loc.TBAATag)
+ if (const MDNode *M =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (!Aliases(L, M))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+
+ if (const MDNode *M1 =
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M2 =
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (!Aliases(M1, M2))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index 181c9b0..1060bc5 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/GlobalVariable.h"
@@ -23,9 +24,22 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/PatternMatch.h"
#include "llvm/ADT/SmallPtrSet.h"
#include <cstring>
using namespace llvm;
+using namespace llvm::PatternMatch;
+
+const unsigned MaxDepth = 6;
+
+/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if
+/// unknown returns 0). For vector types, returns the element type's bitwidth.
+static unsigned getBitWidth(const Type *Ty, const TargetData *TD) {
+ if (unsigned BitWidth = Ty->getScalarSizeInBits())
+ return BitWidth;
+ assert(isa<PointerType>(Ty) && "Expected a pointer type!");
+ return TD ? TD->getPointerSizeInBits() : 0;
+}
/// ComputeMaskedBits - Determine which of the bits specified in Mask are
/// known to be either zero or one and return them in the KnownZero/KnownOne
@@ -46,7 +60,6 @@ using namespace llvm;
void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
APInt &KnownZero, APInt &KnownOne,
const TargetData *TD, unsigned Depth) {
- const unsigned MaxDepth = 6;
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
unsigned BitWidth = Mask.getBitWidth();
@@ -69,14 +82,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
// Null and aggregate-zero are all-zeros.
if (isa<ConstantPointerNull>(V) ||
isa<ConstantAggregateZero>(V)) {
- KnownOne.clear();
+ KnownOne.clearAllBits();
KnownZero = Mask;
return;
}
// Handle a constant vector by taking the intersection of the known bits of
// each element.
if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
- KnownZero.set(); KnownOne.set();
+ KnownZero.setAllBits(); KnownOne.setAllBits();
for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2,
@@ -103,15 +116,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
CountTrailingZeros_32(Align));
else
- KnownZero.clear();
- KnownOne.clear();
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
return;
}
// A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
// the bits of its aliasee.
if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
if (GA->mayBeOverridden()) {
- KnownZero.clear(); KnownOne.clear();
+ KnownZero.clearAllBits(); KnownOne.clearAllBits();
} else {
ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne,
TD, Depth+1);
@@ -119,7 +132,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
return;
}
- KnownZero.clear(); KnownOne.clear(); // Start out not knowing anything.
+ KnownZero.clearAllBits(); KnownOne.clearAllBits(); // Start out not knowing anything.
if (Depth == MaxDepth || Mask == 0)
return; // Limit search depth.
@@ -185,7 +198,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
// Also compute a conserative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
- KnownOne.clear();
+ KnownOne.clearAllBits();
unsigned TrailZ = KnownZero.countTrailingOnes() +
KnownZero2.countTrailingOnes();
unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
@@ -208,8 +221,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
unsigned LeadZ = KnownZero2.countLeadingOnes();
- KnownOne2.clear();
- KnownZero2.clear();
+ KnownOne2.clearAllBits();
+ KnownZero2.clearAllBits();
ComputeMaskedBits(I->getOperand(1),
AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
@@ -255,14 +268,13 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
else
SrcBitWidth = SrcTy->getScalarSizeInBits();
- APInt MaskIn(Mask);
- MaskIn.zextOrTrunc(SrcBitWidth);
- KnownZero.zextOrTrunc(SrcBitWidth);
- KnownOne.zextOrTrunc(SrcBitWidth);
+ APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth);
+ KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
+ KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
Depth+1);
- KnownZero.zextOrTrunc(BitWidth);
- KnownOne.zextOrTrunc(BitWidth);
+ KnownZero = KnownZero.zextOrTrunc(BitWidth);
+ KnownOne = KnownOne.zextOrTrunc(BitWidth);
// Any top bits are known to be zero.
if (BitWidth > SrcBitWidth)
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
@@ -284,15 +296,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
// Compute the bits in the result that are not present in the input.
unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
- APInt MaskIn(Mask);
- MaskIn.trunc(SrcBitWidth);
- KnownZero.trunc(SrcBitWidth);
- KnownOne.trunc(SrcBitWidth);
+ APInt MaskIn = Mask.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
@@ -338,7 +349,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
// (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
// Compute the new bits that are at the top now.
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Signed shift right.
APInt Mask2(Mask.shl(ShiftAmt));
@@ -474,7 +485,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
- KnownOne.clear();
+ KnownOne.clearAllBits();
KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
break;
}
@@ -579,6 +590,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
}
}
+ // Unreachable blocks may have zero-operand PHI nodes.
+ if (P->getNumIncomingValues() == 0)
+ return;
+
// Otherwise take the unions of the known bit sets of the operands,
// taking conservative care to avoid excessive recursion.
if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) {
@@ -621,6 +636,156 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
}
}
+/// ComputeSignBit - Determine whether the sign bit is known to be zero or
+/// one. Convenience wrapper around ComputeMaskedBits.
+void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+ const TargetData *TD, unsigned Depth) {
+ unsigned BitWidth = getBitWidth(V->getType(), TD);
+ if (!BitWidth) {
+ KnownZero = false;
+ KnownOne = false;
+ return;
+ }
+ APInt ZeroBits(BitWidth, 0);
+ APInt OneBits(BitWidth, 0);
+ ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD,
+ Depth);
+ KnownOne = OneBits[BitWidth - 1];
+ KnownZero = ZeroBits[BitWidth - 1];
+}
+
+/// isPowerOfTwo - Return true if the given value is known to have exactly one
+/// bit set when defined. For vectors return true if every element is known to
+/// be a power of two when defined. Supports values with integer or pointer
+/// types and vectors of integers.
+bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ return CI->getValue().isPowerOf2();
+ // TODO: Handle vector constants.
+
+ // 1 << X is clearly a power of two if the one is not shifted off the end. If
+ // it is shifted off the end then the result is undefined.
+ if (match(V, m_Shl(m_One(), m_Value())))
+ return true;
+
+ // (signbit) >>l X is clearly a power of two if the one is not shifted off the
+ // bottom. If it is shifted off the bottom then the result is undefined.
+ if (match(V, m_LShr(m_SignBit(), m_Value())))
+ return true;
+
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth++ == MaxDepth)
+ return false;
+
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
+ return isPowerOfTwo(ZI->getOperand(0), TD, Depth);
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(V))
+ return isPowerOfTwo(SI->getTrueValue(), TD, Depth) &&
+ isPowerOfTwo(SI->getFalseValue(), TD, Depth);
+
+ return false;
+}
+
+/// isKnownNonZero - Return true if the given value is known to be non-zero
+/// when defined. For vectors return true if every element is known to be
+/// non-zero when defined. Supports values with integer or pointer type and
+/// vectors of integers.
+bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (C->isNullValue())
+ return false;
+ if (isa<ConstantInt>(C))
+ // Must be non-zero due to null test above.
+ return true;
+ // TODO: Handle vectors
+ return false;
+ }
+
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth++ == MaxDepth)
+ return false;
+
+ unsigned BitWidth = getBitWidth(V->getType(), TD);
+
+ // X | Y != 0 if X != 0 or Y != 0.
+ Value *X = 0, *Y = 0;
+ if (match(V, m_Or(m_Value(X), m_Value(Y))))
+ return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth);
+
+ // ext X != 0 if X != 0.
+ if (isa<SExtInst>(V) || isa<ZExtInst>(V))
+ return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth);
+
+ // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
+ // if the lowest bit is shifted off the end.
+ if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth);
+ if (KnownOne[0])
+ return true;
+ }
+ // shr X, Y != 0 if X is negative. Note that the value of the shift is not
+ // defined if the sign bit is shifted off the end.
+ else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
+ bool XKnownNonNegative, XKnownNegative;
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+ if (XKnownNegative)
+ return true;
+ }
+ // X + Y.
+ else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
+ bool XKnownNonNegative, XKnownNegative;
+ bool YKnownNonNegative, YKnownNegative;
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+ ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth);
+
+ // If X and Y are both non-negative (as signed values) then their sum is not
+ // zero unless both X and Y are zero.
+ if (XKnownNonNegative && YKnownNonNegative)
+ if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth))
+ return true;
+
+ // If X and Y are both negative (as signed values) then their sum is not
+ // zero unless both X and Y equal INT_MIN.
+ if (BitWidth && XKnownNegative && YKnownNegative) {
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ APInt Mask = APInt::getSignedMaxValue(BitWidth);
+ // The sign bit of X is set. If some other bit is set then X is not equal
+ // to INT_MIN.
+ ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth);
+ if ((KnownOne & Mask) != 0)
+ return true;
+ // The sign bit of Y is set. If some other bit is set then Y is not equal
+ // to INT_MIN.
+ ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth);
+ if ((KnownOne & Mask) != 0)
+ return true;
+ }
+
+ // The sum of a non-negative number and a power of two is not zero.
+ if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth))
+ return true;
+ if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth))
+ return true;
+ }
+ // (C ? X : Y) != 0 if X != 0 and Y != 0.
+ else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ if (isKnownNonZero(SI->getTrueValue(), TD, Depth) &&
+ isKnownNonZero(SI->getFalseValue(), TD, Depth))
+ return true;
+ }
+
+ if (!BitWidth) return false;
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne,
+ TD, Depth);
+ return KnownOne != 0;
+}
+
/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
/// this predicate to simplify operations downstream. Mask is known to be zero
/// for bits that V cannot have.
@@ -679,6 +844,13 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
Tmp += C->getZExtValue();
if (Tmp > TyBits) Tmp = TyBits;
}
+ // vector ashr X, <C, C, C, C> -> adds C sign bits
+ if (ConstantVector *C = dyn_cast<ConstantVector>(U->getOperand(1))) {
+ if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
+ Tmp += CI->getZExtValue();
+ if (Tmp > TyBits) Tmp = TyBits;
+ }
+ }
return Tmp;
case Instruction::Shl:
if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
@@ -875,8 +1047,9 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
// Turn Op0 << Op1 into Op0 * 2^Op1
APInt Op1Int = Op1CI->getValue();
uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
- Op1 = ConstantInt::get(V->getContext(),
- APInt(Op1Int.getBitWidth(), 0).set(BitToSet));
+ APInt API(Op1Int.getBitWidth(), 0);
+ API.setBit(BitToSet);
+ Op1 = ConstantInt::get(V->getContext(), API);
}
Value *Mul0 = NULL;
@@ -982,6 +1155,80 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
return false;
}
+/// isBytewiseValue - If the specified value can be set by repeating the same
+/// byte in memory, return the i8 value that it is represented with. This is
+/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
+/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
+/// byte store (e.g. i16 0x1234), return null.
+Value *llvm::isBytewiseValue(Value *V) {
+ // All byte-wide stores are splatable, even of arbitrary variables.
+ if (V->getType()->isIntegerTy(8)) return V;
+
+ // Handle 'null' ConstantArrayZero etc.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
+
+ // Constant float and double values can be handled as integer values if the
+ // corresponding integer value is "byteable". An important case is 0.0.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (CFP->getType()->isFloatTy())
+ V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
+ if (CFP->getType()->isDoubleTy())
+ V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
+ // Don't handle long double formats, which have strange constraints.
+ }
+
+ // We can handle constant integers that are power of two in size and a
+ // multiple of 8 bits.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ unsigned Width = CI->getBitWidth();
+ if (isPowerOf2_32(Width) && Width > 8) {
+ // We can handle this value if the recursive binary decomposition is the
+ // same at all levels.
+ APInt Val = CI->getValue();
+ APInt Val2;
+ while (Val.getBitWidth() != 8) {
+ unsigned NextWidth = Val.getBitWidth()/2;
+ Val2 = Val.lshr(NextWidth);
+ Val2 = Val2.trunc(Val.getBitWidth()/2);
+ Val = Val.trunc(Val.getBitWidth()/2);
+
+ // If the top/bottom halves aren't the same, reject it.
+ if (Val != Val2)
+ return 0;
+ }
+ return ConstantInt::get(V->getContext(), Val);
+ }
+ }
+
+ // A ConstantArray is splatable if all its members are equal and also
+ // splatable.
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
+ if (CA->getNumOperands() == 0)
+ return 0;
+
+ Value *Val = isBytewiseValue(CA->getOperand(0));
+ if (!Val)
+ return 0;
+
+ for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I)
+ if (CA->getOperand(I-1) != CA->getOperand(I))
+ return 0;
+
+ return Val;
+ }
+
+ // Conceptually, we could handle things like:
+ // %a = zext i8 %X to i16
+ // %b = shl i16 %a, 8
+ // %c = or i16 %a, %b
+ // but until there is an example that actually needs this, it doesn't seem
+ // worth worrying about.
+ return 0;
+}
+
+
// This is the recursive version of BuildSubAggregate. It takes a few different
// arguments. Idxs is the index within the nested struct From that we are
// looking at now (which is of type IndexedType). IdxSkip is the number of
@@ -1159,6 +1406,47 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
return 0;
}
+/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
+/// it can be expressed as a base pointer plus a constant offset. Return the
+/// base and offset to the caller.
+Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+ const TargetData &TD) {
+ Operator *PtrOp = dyn_cast<Operator>(Ptr);
+ if (PtrOp == 0) return Ptr;
+
+ // Just look through bitcasts.
+ if (PtrOp->getOpcode() == Instruction::BitCast)
+ return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
+
+ // If this is a GEP with constant indices, we can look through it.
+ GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
+ if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
+ ++I, ++GTI) {
+ ConstantInt *OpC = cast<ConstantInt>(*I);
+ if (OpC->isZero()) continue;
+
+ // Handle a struct and array indices which add their offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ } else {
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ Offset += OpC->getSExtValue()*Size;
+ }
+ }
+
+ // Re-sign extend from the pointer size if needed to get overflow edge cases
+ // right.
+ unsigned PtrSize = TD.getPointerSizeInBits();
+ if (PtrSize < 64)
+ Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
+
+ return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
+}
+
+
/// GetConstantStringInfo - This function computes the length of a
/// null-terminated C string pointed to by V. If successful, it returns true
/// and returns the string in Str. If unsuccessful, it returns false.
@@ -1386,3 +1674,32 @@ uint64_t llvm::GetStringLength(Value *V) {
// an empty string as a length.
return Len == ~0ULL ? 1 : Len;
}
+
+Value *
+llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) {
+ if (!V->getType()->isPointerTy())
+ return V;
+ for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ return V;
+ V = GA->getAliasee();
+ } else {
+ // See if InstructionSimplify knows any relevant tricks.
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ // TODO: Aquire a DominatorTree and use it.
+ if (Value *Simplified = SimplifyInstruction(I, TD, 0)) {
+ V = Simplified;
+ continue;
+ }
+
+ return V;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ }
+ return V;
+}
diff --git a/contrib/llvm/lib/Archive/Archive.cpp b/contrib/llvm/lib/Archive/Archive.cpp
index 54c715c..1eab27d 100644
--- a/contrib/llvm/lib/Archive/Archive.cpp
+++ b/contrib/llvm/lib/Archive/Archive.cpp
@@ -15,8 +15,10 @@
#include "ArchiveInternals.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/Module.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/System/Process.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/system_error.h"
#include <memory>
#include <cstring>
using namespace llvm;
@@ -65,8 +67,9 @@ ArchiveMember::ArchiveMember(Archive* PAR)
// different file, presumably as an update to the member. It also makes sure
// the flags are reset correctly.
bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) {
- if (!newFile.exists()) {
- if (ErrMsg)
+ bool Exists;
+ if (sys::fs::exists(newFile.str(), Exists) || !Exists) {
+ if (ErrMsg)
*ErrMsg = "Can not replace an archive member with a non-existent file";
return true;
}
@@ -113,11 +116,10 @@ bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) {
// Get the signature and status info
const char* signature = (const char*) data;
- std::string magic;
+ SmallString<4> magic;
if (!signature) {
- path.getMagicNumber(magic,4);
+ sys::fs::get_magic(path.str(), magic.capacity(), magic);
signature = magic.c_str();
- std::string err;
const sys::FileStatus *FSinfo = path.getFileStatus(false, ErrMsg);
if (FSinfo)
info = *FSinfo;
@@ -147,9 +149,13 @@ Archive::Archive(const sys::Path& filename, LLVMContext& C)
bool
Archive::mapToMemory(std::string* ErrMsg) {
- mapfile = MemoryBuffer::getFile(archPath.c_str(), ErrMsg);
- if (mapfile == 0)
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFile(archPath.c_str(), File)) {
+ if (ErrMsg)
+ *ErrMsg = ec.message();
return true;
+ }
+ mapfile = File.take();
base = mapfile->getBufferStart();
return false;
}
@@ -159,19 +165,19 @@ void Archive::cleanUpMemory() {
delete mapfile;
mapfile = 0;
base = 0;
-
+
// Forget the entire symbol table
symTab.clear();
symTabSize = 0;
-
+
firstFileOffset = 0;
-
+
// Free the foreign symbol table member
if (foreignST) {
delete foreignST;
foreignST = 0;
}
-
+
// Delete any Modules and ArchiveMember's we've allocated as a result of
// symbol table searches.
for (ModuleMap::iterator I=modules.begin(), E=modules.end(); I != E; ++I ) {
@@ -193,7 +199,7 @@ static void getSymbols(Module*M, std::vector<std::string>& symbols) {
if (!GI->isDeclaration() && !GI->hasLocalLinkage())
if (!GI->getName().empty())
symbols.push_back(GI->getName());
-
+
// Loop over functions
for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
if (!FI->isDeclaration() && !FI->hasLocalLinkage())
@@ -213,20 +219,20 @@ bool llvm::GetBitcodeSymbols(const sys::Path& fName,
LLVMContext& Context,
std::vector<std::string>& symbols,
std::string* ErrMsg) {
- std::auto_ptr<MemoryBuffer> Buffer(
- MemoryBuffer::getFileOrSTDIN(fName.c_str()));
- if (!Buffer.get()) {
- if (ErrMsg) *ErrMsg = "Could not open file '" + fName.str() + "'";
+ OwningPtr<MemoryBuffer> Buffer;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(fName.c_str(), Buffer)) {
+ if (ErrMsg) *ErrMsg = "Could not open file '" + fName.str() + "'" + ": "
+ + ec.message();
return true;
}
-
+
Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
if (!M)
return true;
-
+
// Get the symbols
getSymbols(M, symbols);
-
+
// Done with the module.
delete M;
return true;
@@ -239,16 +245,16 @@ llvm::GetBitcodeSymbols(const char *BufPtr, unsigned Length,
std::vector<std::string>& symbols,
std::string* ErrMsg) {
// Get the module.
- std::auto_ptr<MemoryBuffer> Buffer(
+ OwningPtr<MemoryBuffer> Buffer(
MemoryBuffer::getMemBufferCopy(StringRef(BufPtr, Length),ModuleID.c_str()));
-
+
Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
if (!M)
return 0;
-
+
// Get the symbols
getSymbols(M, symbols);
-
+
// Done with the module. Note that it's the caller's responsibility to delete
// the Module.
return M;
diff --git a/contrib/llvm/lib/Archive/ArchiveInternals.h b/contrib/llvm/lib/Archive/ArchiveInternals.h
index 08f20e7..55684f7 100644
--- a/contrib/llvm/lib/Archive/ArchiveInternals.h
+++ b/contrib/llvm/lib/Archive/ArchiveInternals.h
@@ -15,7 +15,7 @@
#define LIB_ARCHIVE_ARCHIVEINTERNALS_H
#include "llvm/Bitcode/Archive.h"
-#include "llvm/System/TimeValue.h"
+#include "llvm/Support/TimeValue.h"
#include "llvm/ADT/StringExtras.h"
#include <cstring>
diff --git a/contrib/llvm/lib/Archive/ArchiveWriter.cpp b/contrib/llvm/lib/Archive/ArchiveWriter.cpp
index 7eeeb59..c5ad5fc 100644
--- a/contrib/llvm/lib/Archive/ArchiveWriter.cpp
+++ b/contrib/llvm/lib/Archive/ArchiveWriter.cpp
@@ -15,9 +15,12 @@
#include "llvm/Module.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/System/Process.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
#include <fstream>
#include <ostream>
#include <iomanip>
@@ -25,7 +28,7 @@ using namespace llvm;
// Write an integer using variable bit rate encoding. This saves a few bytes
// per entry in the symbol table.
-static inline void writeInteger(unsigned num, std::ofstream& ARFile) {
+static inline void writeInteger(unsigned num, raw_ostream& ARFile) {
while (1) {
if (num < 0x80) { // done?
ARFile << (unsigned char)num;
@@ -153,9 +156,10 @@ Archive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr,
// Insert a file into the archive before some other member. This also takes care
// of extracting the necessary flags and information from the file.
bool
-Archive::addFileBefore(const sys::Path& filePath, iterator where,
+Archive::addFileBefore(const sys::Path& filePath, iterator where,
std::string* ErrMsg) {
- if (!filePath.exists()) {
+ bool Exists;
+ if (sys::fs::exists(filePath.str(), Exists) || !Exists) {
if (ErrMsg)
*ErrMsg = "Can not add a non-existent file to archive";
return true;
@@ -178,9 +182,11 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where,
flags |= ArchiveMember::HasPathFlag;
if (hasSlash || filePath.str().length() > 15)
flags |= ArchiveMember::HasLongFilenameFlag;
- std::string magic;
- mbr->path.getMagicNumber(magic,4);
- switch (sys::IdentifyFileType(magic.c_str(),4)) {
+
+ sys::LLVMFileType type;
+ if (sys::fs::identify_magic(mbr->path.str(), type))
+ type = sys::Unknown_FileType;
+ switch (type) {
case sys::Bitcode_FileType:
flags |= ArchiveMember::BitcodeFlag;
break;
@@ -196,14 +202,14 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where,
bool
Archive::writeMember(
const ArchiveMember& member,
- std::ofstream& ARFile,
+ raw_ostream& ARFile,
bool CreateSymbolTable,
bool TruncateNames,
bool ShouldCompress,
std::string* ErrMsg
) {
- unsigned filepos = ARFile.tellp();
+ unsigned filepos = ARFile.tell();
filepos -= 8;
// Get the data and its size either from the
@@ -212,9 +218,13 @@ Archive::writeMember(
const char *data = (const char*)member.getData();
MemoryBuffer *mFile = 0;
if (!data) {
- mFile = MemoryBuffer::getFile(member.getPath().c_str(), ErrMsg);
- if (mFile == 0)
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFile(member.getPath().c_str(), File)) {
+ if (ErrMsg)
+ *ErrMsg = ec.message();
return true;
+ }
+ mFile = File.take();
data = mFile->getBufferStart();
fSize = mFile->getBufferSize();
}
@@ -225,7 +235,7 @@ Archive::writeMember(
std::vector<std::string> symbols;
std::string FullMemberName = archPath.str() + "(" + member.getPath().str()
+ ")";
- Module* M =
+ Module* M =
GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg);
// If the bitcode parsed successfully
@@ -272,7 +282,7 @@ Archive::writeMember(
ARFile.write(data,fSize);
// Make sure the member is an even length
- if ((ARFile.tellp() & 1) == 1)
+ if ((ARFile.tell() & 1) == 1)
ARFile << ARFILE_PAD;
// Close the mapped file if it was opened
@@ -282,7 +292,7 @@ Archive::writeMember(
// Write out the LLVM symbol table as an archive member to the file.
void
-Archive::writeSymbolTable(std::ofstream& ARFile) {
+Archive::writeSymbolTable(raw_ostream& ARFile) {
// Construct the symbol table's header
ArchiveMemberHeader Hdr;
@@ -306,7 +316,7 @@ Archive::writeSymbolTable(std::ofstream& ARFile) {
#ifndef NDEBUG
// Save the starting position of the symbol tables data content.
- unsigned startpos = ARFile.tellp();
+ unsigned startpos = ARFile.tell();
#endif
// Write out the symbols sequentially
@@ -323,7 +333,7 @@ Archive::writeSymbolTable(std::ofstream& ARFile) {
#ifndef NDEBUG
// Now that we're done with the symbol table, get the ending file position
- unsigned endpos = ARFile.tellp();
+ unsigned endpos = ARFile.tell();
#endif
// Make sure that the amount we wrote is what we pre-computed. This is
@@ -352,25 +362,20 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
}
// Create a temporary file to store the archive in
- sys::Path TmpArchive = archPath;
- if (TmpArchive.createTemporaryFileOnDisk(ErrMsg))
+ SmallString<128> TempArchivePath;
+ int ArchFD;
+ if (error_code ec =
+ sys::fs::unique_file("%%-%%-%%-%%-" + sys::path::filename(archPath.str()),
+ ArchFD, TempArchivePath)) {
+ if (ErrMsg) *ErrMsg = ec.message();
return true;
+ }
// Make sure the temporary gets removed if we crash
- sys::RemoveFileOnSignal(TmpArchive);
+ sys::RemoveFileOnSignal(sys::Path(TempArchivePath.str()));
// Create archive file for output.
- std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
- std::ios::binary;
- std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode);
-
- // Check for errors opening or creating archive file.
- if (!ArchiveFile.is_open() || ArchiveFile.bad()) {
- TmpArchive.eraseFromDisk();
- if (ErrMsg)
- *ErrMsg = "Error opening archive file: " + archPath.str();
- return true;
- }
+ raw_fd_ostream ArchiveFile(ArchFD, true);
// If we're creating a symbol table, reset it now
if (CreateSymbolTable) {
@@ -386,8 +391,9 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
for (MembersList::iterator I = begin(), E = end(); I != E; ++I) {
if (writeMember(*I, ArchiveFile, CreateSymbolTable,
TruncateNames, Compress, ErrMsg)) {
- TmpArchive.eraseFromDisk();
ArchiveFile.close();
+ bool existed;
+ sys::fs::remove(TempArchivePath.str(), existed);
return true;
}
}
@@ -402,27 +408,29 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
// ensure compatibility with other archivers we need to put the symbol
// table first in the file. Unfortunately, this means mapping the file
// we just wrote back in and copying it to the destination file.
- sys::Path FinalFilePath = archPath;
+ SmallString<128> TempArchiveWithSymbolTablePath;
// Map in the archive we just wrote.
{
- OwningPtr<MemoryBuffer> arch(MemoryBuffer::getFile(TmpArchive.c_str()));
- if (arch == 0) return true;
+ OwningPtr<MemoryBuffer> arch;
+ if (error_code ec = MemoryBuffer::getFile(TempArchivePath.c_str(), arch)) {
+ if (ErrMsg)
+ *ErrMsg = ec.message();
+ return true;
+ }
const char* base = arch->getBufferStart();
- // Open another temporary file in order to avoid invalidating the
+ // Open another temporary file in order to avoid invalidating the
// mmapped data
- if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg))
- return true;
- sys::RemoveFileOnSignal(FinalFilePath);
-
- std::ofstream FinalFile(FinalFilePath.c_str(), io_mode);
- if (!FinalFile.is_open() || FinalFile.bad()) {
- TmpArchive.eraseFromDisk();
- if (ErrMsg)
- *ErrMsg = "Error opening archive file: " + FinalFilePath.str();
+ if (error_code ec =
+ sys::fs::unique_file("%%-%%-%%-%%-" + sys::path::filename(archPath.str()),
+ ArchFD, TempArchiveWithSymbolTablePath)) {
+ if (ErrMsg) *ErrMsg = ec.message();
return true;
}
+ sys::RemoveFileOnSignal(sys::Path(TempArchiveWithSymbolTablePath.str()));
+
+ raw_fd_ostream FinalFile(ArchFD, true);
// Write the file magic number
FinalFile << ARFILE_MAGIC;
@@ -435,7 +443,8 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
if (foreignST) {
if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) {
FinalFile.close();
- TmpArchive.eraseFromDisk();
+ bool existed;
+ sys::fs::remove(TempArchiveWithSymbolTablePath.str(), existed);
return true;
}
}
@@ -451,19 +460,25 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
// Close up shop
FinalFile.close();
} // free arch.
-
+
// Move the final file over top of TmpArchive
- if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg))
+ if (error_code ec = sys::fs::rename(TempArchiveWithSymbolTablePath.str(),
+ TempArchivePath.str())) {
+ if (ErrMsg) *ErrMsg = ec.message();
return true;
+ }
}
-
+
// Before we replace the actual archive, we need to forget all the
// members, since they point to data in that old archive. We need to do
// this because we cannot replace an open file on Windows.
cleanUpMemory();
-
- if (TmpArchive.renamePathOnDisk(archPath, ErrMsg))
+
+ if (error_code ec = sys::fs::rename(TempArchivePath.str(),
+ archPath.str())) {
+ if (ErrMsg) *ErrMsg = ec.message();
return true;
+ }
// Set correct read and write permissions after temporary file is moved
// to final destination path.
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp
index 032753a..857fa1e 100644
--- a/contrib/llvm/lib/AsmParser/LLLexer.cpp
+++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp
@@ -15,18 +15,20 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Instruction.h"
#include "llvm/LLVMContext.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Assembly/Parser.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Assembly/Parser.h"
+#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
using namespace llvm;
-bool LLLexer::Error(LocTy ErrorLoc, const std::string &Msg) const {
+bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error");
return true;
}
@@ -507,6 +509,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(default);
KEYWORD(hidden);
KEYWORD(protected);
+ KEYWORD(unnamed_addr);
KEYWORD(extern_weak);
KEYWORD(external);
KEYWORD(thread_local);
@@ -544,6 +547,8 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(arm_aapcscc);
KEYWORD(arm_aapcs_vfpcc);
KEYWORD(msp430_intrcc);
+ KEYWORD(ptx_kernel);
+ KEYWORD(ptx_device);
KEYWORD(cc);
KEYWORD(c);
@@ -570,6 +575,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(noredzone);
KEYWORD(noimplicitfloat);
KEYWORD(naked);
+ KEYWORD(hotpatch);
KEYWORD(type);
KEYWORD(opaque);
@@ -595,6 +601,7 @@ lltok::Kind LLLexer::LexIdentifier() {
TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
TYPEKEYWORD("label", Type::getLabelTy(Context));
TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
+ TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));
#undef TYPEKEYWORD
// Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is
@@ -677,7 +684,7 @@ lltok::Kind LLLexer::LexIdentifier() {
APInt Tmp(bits, StringRef(TokStart+3, len), 16);
uint32_t activeBits = Tmp.getActiveBits();
if (activeBits > 0 && activeBits < bits)
- Tmp.trunc(activeBits);
+ Tmp = Tmp.trunc(activeBits);
APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
return lltok::APSInt;
}
@@ -804,12 +811,12 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
if (TokStart[0] == '-') {
uint32_t minBits = Tmp.getMinSignedBits();
if (minBits > 0 && minBits < numBits)
- Tmp.trunc(minBits);
+ Tmp = Tmp.trunc(minBits);
APSIntVal = APSInt(Tmp, false);
} else {
uint32_t activeBits = Tmp.getActiveBits();
if (activeBits > 0 && activeBits < numBits)
- Tmp.trunc(activeBits);
+ Tmp = Tmp.trunc(activeBits);
APSIntVal = APSInt(Tmp, true);
}
return lltok::APSInt;
@@ -828,7 +835,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
}
}
- APFloatVal = APFloat(atof(TokStart));
+ APFloatVal = APFloat(std::atof(TokStart));
return lltok::APFloat;
}
@@ -862,6 +869,6 @@ lltok::Kind LLLexer::LexPositive() {
}
}
- APFloatVal = APFloat(atof(TokStart));
+ APFloatVal = APFloat(std::atof(TokStart));
return lltok::APFloat;
}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.h b/contrib/llvm/lib/AsmParser/LLLexer.h
index 70f1cfd..09ae801 100644
--- a/contrib/llvm/lib/AsmParser/LLLexer.h
+++ b/contrib/llvm/lib/AsmParser/LLLexer.h
@@ -62,8 +62,8 @@ namespace llvm {
const APFloat &getAPFloatVal() const { return APFloatVal; }
- bool Error(LocTy L, const std::string &Msg) const;
- bool Error(const std::string &Msg) const { return Error(getLoc(), Msg); }
+ bool Error(LocTy L, const Twine &Msg) const;
+ bool Error(const Twine &Msg) const { return Error(getLoc(), Msg); }
std::string getFilename() const;
private:
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
index f21a065..cdfacbe 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -22,7 +22,6 @@
#include "llvm/Operator.h"
#include "llvm/ValueSymbolTable.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -52,7 +51,7 @@ bool LLParser::ValidateEndOfModule() {
if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0)
return Error(MDList[i].Loc, "use of undefined metadata '!" +
- utostr(SlotNo) + "'");
+ Twine(SlotNo) + "'");
Inst->setMetadata(MDList[i].MDKind, NumberedMetadata[SlotNo]);
}
}
@@ -109,7 +108,7 @@ bool LLParser::ValidateEndOfModule() {
if (!ForwardRefTypeIDs.empty())
return Error(ForwardRefTypeIDs.begin()->second.second,
"use of undefined type '%" +
- utostr(ForwardRefTypeIDs.begin()->first) + "'");
+ Twine(ForwardRefTypeIDs.begin()->first) + "'");
if (!ForwardRefVals.empty())
return Error(ForwardRefVals.begin()->second.second,
@@ -119,12 +118,12 @@ bool LLParser::ValidateEndOfModule() {
if (!ForwardRefValIDs.empty())
return Error(ForwardRefValIDs.begin()->second.second,
"use of undefined value '@" +
- utostr(ForwardRefValIDs.begin()->first) + "'");
+ Twine(ForwardRefValIDs.begin()->first) + "'");
if (!ForwardRefMDNodes.empty())
return Error(ForwardRefMDNodes.begin()->second.second,
"use of undefined metadata '!" +
- utostr(ForwardRefMDNodes.begin()->first) + "'");
+ Twine(ForwardRefMDNodes.begin()->first) + "'");
// Look for intrinsic functions and CallInst that need to be upgraded
@@ -195,7 +194,8 @@ bool LLParser::ParseTopLevelEntities() {
// The Global variable production with no name can have many different
// optional leading prefixes, the production is:
// GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
- // OptionalAddrSpace ('constant'|'global') ...
+ // OptionalAddrSpace OptionalUnNammedAddr
+ // ('constant'|'global') ...
case lltok::kw_private: // OptionalLinkage
case lltok::kw_linker_private: // OptionalLinkage
case lltok::kw_linker_private_weak: // OptionalLinkage
@@ -317,7 +317,7 @@ bool LLParser::ParseUnnamedType() {
if (Lex.getKind() == lltok::LocalVarID) {
if (Lex.getUIntVal() != TypeID)
return Error(Lex.getLoc(), "type expected to be numbered '%" +
- utostr(TypeID) + "'");
+ Twine(TypeID) + "'");
Lex.Lex(); // eat LocalVarID;
if (ParseToken(lltok::equal, "expected '=' after name"))
@@ -444,7 +444,7 @@ bool LLParser::ParseUnnamedGlobal() {
if (Lex.getKind() == lltok::GlobalID) {
if (Lex.getUIntVal() != VarID)
return Error(Lex.getLoc(), "variable expected to be numbered '%" +
- utostr(VarID) + "'");
+ Twine(VarID) + "'");
Lex.Lex(); // eat GlobalID;
if (ParseToken(lltok::equal, "expected '=' after name"))
@@ -676,16 +676,16 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
// Insert into the module, we know its name won't collide now.
M->getAliasList().push_back(GA);
- assert(GA->getNameStr() == Name && "Should not be a name conflict!");
+ assert(GA->getName() == Name && "Should not be a name conflict!");
return false;
}
/// ParseGlobal
/// ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalThreadLocal
-/// OptionalAddrSpace GlobalType Type Const
+/// OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const
/// ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
-/// OptionalAddrSpace GlobalType Type Const
+/// OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const
///
/// Everything through visibility has been parsed already.
///
@@ -693,12 +693,15 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
unsigned Linkage, bool HasLinkage,
unsigned Visibility) {
unsigned AddrSpace;
- bool ThreadLocal, IsConstant;
+ bool ThreadLocal, IsConstant, UnnamedAddr;
+ LocTy UnnamedAddrLoc;
LocTy TyLoc;
PATypeHolder Ty(Type::getVoidTy(Context));
if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) ||
ParseOptionalAddrSpace(AddrSpace) ||
+ ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
+ &UnnamedAddrLoc) ||
ParseGlobalType(IsConstant) ||
ParseType(Ty, TyLoc))
return true;
@@ -756,6 +759,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
GV->setLinkage((GlobalValue::LinkageTypes)Linkage);
GV->setVisibility((GlobalValue::VisibilityTypes)Visibility);
GV->setThreadLocal(ThreadLocal);
+ GV->setUnnamedAddr(UnnamedAddr);
// Parse attributes on the global.
while (Lex.getKind() == lltok::comma) {
@@ -855,7 +859,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
// If we have the value in the symbol table or fwd-ref table, return it.
if (Val) {
if (Val->getType() == Ty) return Val;
- Error(Loc, "'@" + utostr(ID) + "' defined with type '" +
+ Error(Loc, "'@" + Twine(ID) + "' defined with type '" +
Val->getType()->getDescription() + "'");
return 0;
}
@@ -983,6 +987,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
case lltok::kw_noredzone: Attrs |= Attribute::NoRedZone; break;
case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
case lltok::kw_naked: Attrs |= Attribute::Naked; break;
+ case lltok::kw_hotpatch: Attrs |= Attribute::Hotpatch; break;
case lltok::kw_alignstack: {
unsigned Alignment;
@@ -1084,6 +1089,8 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
/// ::= 'arm_aapcscc'
/// ::= 'arm_aapcs_vfpcc'
/// ::= 'msp430_intrcc'
+/// ::= 'ptx_kernel'
+/// ::= 'ptx_device'
/// ::= 'cc' UINT
///
bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
@@ -1099,6 +1106,8 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
case lltok::kw_msp430_intrcc: CC = CallingConv::MSP430_INTR; break;
+ case lltok::kw_ptx_kernel: CC = CallingConv::PTX_Kernel; break;
+ case lltok::kw_ptx_device: CC = CallingConv::PTX_Device; break;
case lltok::kw_cc: {
unsigned ArbitraryCC;
Lex.Lex();
@@ -1128,7 +1137,6 @@ bool LLParser::ParseInstructionMetadata(Instruction *Inst,
Lex.Lex();
MDNode *Node;
- unsigned NodeID;
SMLoc Loc = Lex.getLoc();
if (ParseToken(lltok::exclaim, "expected '!' here"))
@@ -1145,6 +1153,7 @@ bool LLParser::ParseInstructionMetadata(Instruction *Inst,
assert(ID.Kind == ValID::t_MDNode);
Inst->setMetadata(MDK, ID.MDNodeVal);
} else {
+ unsigned NodeID = 0;
if (ParseMDNodeID(Node, NodeID))
return true;
if (Node) {
@@ -1196,8 +1205,7 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
if (Lex.getKind() != lltok::kw_align)
return Error(Lex.getLoc(), "expected metadata or 'align'");
-
- LocTy AlignLoc = Lex.getLoc();
+
if (ParseOptionalAlignment(Alignment)) return true;
}
@@ -1245,7 +1253,7 @@ bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
AteExtraComma = true;
return false;
}
- unsigned Idx;
+ unsigned Idx = 0;
if (ParseUInt32(Idx)) return true;
Indices.push_back(Idx);
}
@@ -1778,7 +1786,7 @@ bool LLParser::PerFunctionState::FinishFunction() {
if (!ForwardRefValIDs.empty())
return P.Error(ForwardRefValIDs.begin()->second.second,
"use of undefined value '%" +
- utostr(ForwardRefValIDs.begin()->first) + "'");
+ Twine(ForwardRefValIDs.begin()->first) + "'");
return false;
}
@@ -1846,9 +1854,9 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
if (Val) {
if (Val->getType() == Ty) return Val;
if (Ty->isLabelTy())
- P.Error(Loc, "'%" + utostr(ID) + "' is not a basic block");
+ P.Error(Loc, "'%" + Twine(ID) + "' is not a basic block");
else
- P.Error(Loc, "'%" + utostr(ID) + "' defined with type '" +
+ P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" +
Val->getType()->getDescription() + "'");
return 0;
}
@@ -1890,7 +1898,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
if (unsigned(NameID) != NumberedVals.size())
return P.Error(NameLoc, "instruction expected to be numbered '%" +
- utostr(NumberedVals.size()) + "'");
+ Twine(NumberedVals.size()) + "'");
std::map<unsigned, std::pair<Value*, LocTy> >::iterator FI =
ForwardRefValIDs.find(NameID);
@@ -1922,7 +1930,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
// Set the name on the instruction.
Inst->setName(NameStr);
- if (Inst->getNameStr() != NameStr)
+ if (Inst->getName() != NameStr)
return P.Error(NameLoc, "multiple definition of local value named '" +
NameStr + "'");
return false;
@@ -2068,10 +2076,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
for (unsigned i = 1, e = Elts.size(); i != e; ++i)
if (Elts[i]->getType() != Elts[0]->getType())
return Error(FirstEltLoc,
- "vector element #" + utostr(i) +
+ "vector element #" + Twine(i) +
" is not of type '" + Elts[0]->getType()->getDescription());
- ID.ConstantVal = ConstantVector::get(Elts.data(), Elts.size());
+ ID.ConstantVal = ConstantVector::get(Elts);
ID.Kind = ValID::t_Constant;
return false;
}
@@ -2101,7 +2109,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
for (unsigned i = 0, e = Elts.size(); i != e; ++i) {
if (Elts[i]->getType() != Elts[0]->getType())
return Error(FirstEltLoc,
- "array element #" + utostr(i) +
+ "array element #" + Twine(i) +
" is not of type '" +Elts[0]->getType()->getDescription());
}
@@ -2278,7 +2286,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
case lltok::kw_fdiv:
case lltok::kw_urem:
case lltok::kw_srem:
- case lltok::kw_frem: {
+ case lltok::kw_frem:
+ case lltok::kw_shl:
+ case lltok::kw_lshr:
+ case lltok::kw_ashr: {
bool NUW = false;
bool NSW = false;
bool Exact = false;
@@ -2286,9 +2297,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
Constant *Val0, *Val1;
Lex.Lex();
LocTy ModifierLoc = Lex.getLoc();
- if (Opc == Instruction::Add ||
- Opc == Instruction::Sub ||
- Opc == Instruction::Mul) {
+ if (Opc == Instruction::Add || Opc == Instruction::Sub ||
+ Opc == Instruction::Mul || Opc == Instruction::Shl) {
if (EatIfPresent(lltok::kw_nuw))
NUW = true;
if (EatIfPresent(lltok::kw_nsw)) {
@@ -2296,7 +2306,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
if (EatIfPresent(lltok::kw_nuw))
NUW = true;
}
- } else if (Opc == Instruction::SDiv) {
+ } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv ||
+ Opc == Instruction::LShr || Opc == Instruction::AShr) {
if (EatIfPresent(lltok::kw_exact))
Exact = true;
}
@@ -2323,6 +2334,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::AShr:
+ case Instruction::LShr:
if (!Val0->getType()->isIntOrIntVectorTy())
return Error(ID.Loc, "constexpr requires integer operands");
break;
@@ -2339,7 +2353,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
unsigned Flags = 0;
if (NUW) Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
if (NSW) Flags |= OverflowingBinaryOperator::NoSignedWrap;
- if (Exact) Flags |= SDivOperator::IsExact;
+ if (Exact) Flags |= PossiblyExactOperator::IsExact;
Constant *C = ConstantExpr::get(Opc, Val0, Val1, Flags);
ID.ConstantVal = C;
ID.Kind = ValID::t_Constant;
@@ -2347,9 +2361,6 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
}
// Logical Operations
- case lltok::kw_shl:
- case lltok::kw_lshr:
- case lltok::kw_ashr:
case lltok::kw_and:
case lltok::kw_or:
case lltok::kw_xor: {
@@ -2572,7 +2583,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
case ValID::t_APSInt:
if (!Ty->isIntegerTy())
return Error(ID.Loc, "integer constant must have integer type");
- ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
+ ID.APSIntVal = ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
V = ConstantInt::get(Context, ID.APSIntVal);
return false;
case ValID::t_APFloat:
@@ -2654,7 +2665,7 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
/// FunctionHeader
/// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
-/// Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
+/// OptUnnamedAddr Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
/// OptionalAlign OptGC
bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
// Parse the linkage.
@@ -2714,7 +2725,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
if (NameID != NumberedVals.size())
return TokError("function expected to be numbered '%" +
- utostr(NumberedVals.size()) + "'");
+ Twine(NumberedVals.size()) + "'");
} else {
return TokError("expected function name");
}
@@ -2730,8 +2741,12 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
std::string Section;
unsigned Alignment;
std::string GC;
+ bool UnnamedAddr;
+ LocTy UnnamedAddrLoc;
if (ParseArgumentList(ArgList, isVarArg, false) ||
+ ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
+ &UnnamedAddrLoc) ||
ParseOptionalAttrs(FuncAttrs, 2) ||
(EatIfPresent(lltok::kw_section) &&
ParseStringConstant(Section)) ||
@@ -2821,7 +2836,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
Fn = cast<Function>(I->second.first);
if (Fn->getType() != PFT)
return Error(NameLoc, "type of definition and forward reference of '@" +
- utostr(NumberedVals.size()) +"' disagree");
+ Twine(NumberedVals.size()) + "' disagree");
ForwardRefValIDs.erase(I);
}
}
@@ -2838,6 +2853,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
Fn->setVisibility((GlobalValue::VisibilityTypes)Visibility);
Fn->setCallingConv(CC);
Fn->setAttributes(PAL);
+ Fn->setUnnamedAddr(UnnamedAddr);
Fn->setAlignment(Alignment);
Fn->setSection(Section);
if (!GC.empty()) Fn->setGC(GC.c_str());
@@ -2855,7 +2871,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
// Set the name, if it conflicted, it will be auto-renamed.
ArgIt->setName(ArgList[i].Name);
- if (ArgIt->getNameStr() != ArgList[i].Name)
+ if (ArgIt->getName() != ArgList[i].Name)
return Error(ArgList[i].Loc, "redefinition of argument '%" +
ArgList[i].Name + "'");
}
@@ -2989,55 +3005,38 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
// Binary Operators.
case lltok::kw_add:
case lltok::kw_sub:
- case lltok::kw_mul: {
- bool NUW = false;
- bool NSW = false;
+ case lltok::kw_mul:
+ case lltok::kw_shl: {
LocTy ModifierLoc = Lex.getLoc();
- if (EatIfPresent(lltok::kw_nuw))
- NUW = true;
- if (EatIfPresent(lltok::kw_nsw)) {
- NSW = true;
- if (EatIfPresent(lltok::kw_nuw))
- NUW = true;
- }
- bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 1);
- if (!Result) {
- if (!Inst->getType()->isIntOrIntVectorTy()) {
- if (NUW)
- return Error(ModifierLoc, "nuw only applies to integer operations");
- if (NSW)
- return Error(ModifierLoc, "nsw only applies to integer operations");
- }
- if (NUW)
- cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
- if (NSW)
- cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
- }
- return Result;
+ bool NUW = EatIfPresent(lltok::kw_nuw);
+ bool NSW = EatIfPresent(lltok::kw_nsw);
+ if (!NUW) NUW = EatIfPresent(lltok::kw_nuw);
+
+ if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+
+ if (NUW) cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
+ if (NSW) cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
+ return false;
}
case lltok::kw_fadd:
case lltok::kw_fsub:
case lltok::kw_fmul: return ParseArithmetic(Inst, PFS, KeywordVal, 2);
- case lltok::kw_sdiv: {
- bool Exact = false;
- if (EatIfPresent(lltok::kw_exact))
- Exact = true;
- bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 1);
- if (!Result)
- if (Exact)
- cast<BinaryOperator>(Inst)->setIsExact(true);
- return Result;
+ case lltok::kw_sdiv:
+ case lltok::kw_udiv:
+ case lltok::kw_lshr:
+ case lltok::kw_ashr: {
+ bool Exact = EatIfPresent(lltok::kw_exact);
+
+ if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+ if (Exact) cast<BinaryOperator>(Inst)->setIsExact(true);
+ return false;
}
- case lltok::kw_udiv:
case lltok::kw_urem:
case lltok::kw_srem: return ParseArithmetic(Inst, PFS, KeywordVal, 1);
case lltok::kw_fdiv:
case lltok::kw_frem: return ParseArithmetic(Inst, PFS, KeywordVal, 2);
- case lltok::kw_shl:
- case lltok::kw_lshr:
- case lltok::kw_ashr:
case lltok::kw_and:
case lltok::kw_or:
case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal);
diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h
index 404cec3..93e7f77 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.h
+++ b/contrib/llvm/lib/AsmParser/LLParser.h
@@ -142,10 +142,10 @@ namespace llvm {
private:
- bool Error(LocTy L, const std::string &Msg) const {
+ bool Error(LocTy L, const Twine &Msg) const {
return Lex.Error(L, Msg);
}
- bool TokError(const std::string &Msg) const {
+ bool TokError(const Twine &Msg) const {
return Error(Lex.getLoc(), Msg);
}
@@ -162,10 +162,12 @@ namespace llvm {
Lex.Lex();
return true;
}
- bool ParseOptionalToken(lltok::Kind T, bool &Present) {
+ bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
if (Lex.getKind() != T) {
Present = false;
} else {
+ if (Loc)
+ *Loc = Lex.getLoc();
Lex.Lex();
Present = true;
}
diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h
index 61f93a4..576da19 100644
--- a/contrib/llvm/lib/AsmParser/LLToken.h
+++ b/contrib/llvm/lib/AsmParser/LLToken.h
@@ -42,6 +42,7 @@ namespace lltok {
kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending,
kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
kw_default, kw_hidden, kw_protected,
+ kw_unnamed_addr,
kw_extern_weak,
kw_external, kw_thread_local,
kw_zeroinitializer,
@@ -72,6 +73,7 @@ namespace lltok {
kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
kw_msp430_intrcc,
+ kw_ptx_kernel, kw_ptx_device,
kw_signext,
kw_zeroext,
@@ -95,6 +97,7 @@ namespace lltok {
kw_noredzone,
kw_noimplicitfloat,
kw_naked,
+ kw_hotpatch,
kw_type,
kw_opaque,
diff --git a/contrib/llvm/lib/AsmParser/Parser.cpp b/contrib/llvm/lib/AsmParser/Parser.cpp
index e7cef9b..59fb471 100644
--- a/contrib/llvm/lib/AsmParser/Parser.cpp
+++ b/contrib/llvm/lib/AsmParser/Parser.cpp
@@ -18,6 +18,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
#include <cstring>
using namespace llvm;
@@ -41,15 +42,14 @@ Module *llvm::ParseAssembly(MemoryBuffer *F,
Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
LLVMContext &Context) {
- std::string ErrorStr;
- MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr);
- if (F == 0) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
Err = SMDiagnostic(Filename,
- "Could not open input file: " + ErrorStr);
+ "Could not open input file: " + ec.message());
return 0;
}
- return ParseAssembly(F, 0, Err, Context);
+ return ParseAssembly(File.take(), 0, Err, Context);
}
Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 830c79a..dbf8da0 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -136,7 +136,6 @@ namespace {
/// @brief A class for maintaining the slot number definition
/// as a placeholder for the actual definition for forward constants defs.
class ConstantPlaceHolder : public ConstantExpr {
- ConstantPlaceHolder(); // DO NOT IMPLEMENT
void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT
public:
// allocate space for exactly one operand
@@ -149,7 +148,7 @@ namespace {
}
/// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
- static inline bool classof(const ConstantPlaceHolder *) { return true; }
+ //static inline bool classof(const ConstantPlaceHolder *) { return true; }
static bool classof(const Value *V) {
return isa<ConstantExpr>(V) &&
cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
@@ -163,7 +162,8 @@ namespace {
// FIXME: can we inherit this from ConstantExpr?
template <>
-struct OperandTraits<ConstantPlaceHolder> : public FixedNumOperandTraits<1> {
+struct OperandTraits<ConstantPlaceHolder> :
+ public FixedNumOperandTraits<ConstantPlaceHolder, 1> {
};
}
@@ -298,7 +298,7 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(),
UserCS->getType()->isPacked());
} else if (isa<ConstantVector>(UserC)) {
- NewC = ConstantVector::get(&NewOps[0], NewOps.size());
+ NewC = ConstantVector::get(NewOps);
} else {
assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr.");
NewC = cast<ConstantExpr>(UserC)->getWithOperands(&NewOps[0],
@@ -550,6 +550,9 @@ bool BitcodeReader::ParseTypeTable() {
case bitc::TYPE_CODE_METADATA: // METADATA
ResultTy = Type::getMetadataTy(Context);
break;
+ case bitc::TYPE_CODE_X86_MMX: // X86_MMX
+ ResultTy = Type::getX86_MMXTy(Context);
+ break;
case bitc::TYPE_CODE_INTEGER: // INTEGER: [width]
if (Record.size() < 1)
return Error("Invalid Integer type record");
@@ -794,7 +797,7 @@ bool BitcodeReader::ParseMetadata() {
if (NextBitCode == bitc::METADATA_NAMED_NODE) {
LLVM2_7MetadataDetected = true;
} else if (NextBitCode != bitc::METADATA_NAMED_NODE2)
- assert ( 0 && "Inavlid Named Metadata record");
+ assert ( 0 && "Invalid Named Metadata record");
// Read named metadata elements.
unsigned Size = Record.size();
@@ -832,7 +835,8 @@ bool BitcodeReader::ParseMetadata() {
unsigned Size = Record.size();
SmallVector<Value*, 8> Elts;
for (unsigned i = 0; i != Size; i += 2) {
- const Type *Ty = getTypeByID(Record[i], false);
+ const Type *Ty = getTypeByID(Record[i]);
+ if (!Ty) return Error("Invalid METADATA_NODE2 record");
if (Ty->isMetadataTy())
Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
else if (!Ty->isVoidTy())
@@ -1081,13 +1085,17 @@ bool BitcodeReader::ParseConstants() {
if (Record.size() >= 4) {
if (Opc == Instruction::Add ||
Opc == Instruction::Sub ||
- Opc == Instruction::Mul) {
+ Opc == Instruction::Mul ||
+ Opc == Instruction::Shl) {
if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP))
Flags |= OverflowingBinaryOperator::NoSignedWrap;
if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
- } else if (Opc == Instruction::SDiv) {
- if (Record[3] & (1 << bitc::SDIV_EXACT))
+ } else if (Opc == Instruction::SDiv ||
+ Opc == Instruction::UDiv ||
+ Opc == Instruction::LShr ||
+ Opc == Instruction::AShr) {
+ if (Record[3] & (1 << bitc::PEO_EXACT))
Flags |= SDivOperator::IsExact;
}
}
@@ -1167,7 +1175,8 @@ bool BitcodeReader::ParseConstants() {
}
case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval]
const VectorType *RTy = dyn_cast<VectorType>(CurTy);
- const VectorType *OpTy = dyn_cast<VectorType>(getTypeByID(Record[0]));
+ const VectorType *OpTy =
+ dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
if (Record.size() < 4 || RTy == 0 || OpTy == 0)
return Error("Invalid CE_SHUFVEC_EX record");
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
@@ -1418,11 +1427,13 @@ bool BitcodeReader::ParseModule() {
break;
}
// GLOBALVAR: [pointer type, isconst, initid,
- // linkage, alignment, section, visibility, threadlocal]
+ // linkage, alignment, section, visibility, threadlocal,
+ // unnamed_addr]
case bitc::MODULE_CODE_GLOBALVAR: {
if (Record.size() < 6)
return Error("Invalid MODULE_CODE_GLOBALVAR record");
const Type *Ty = getTypeByID(Record[0]);
+ if (!Ty) return Error("Invalid MODULE_CODE_GLOBALVAR record");
if (!Ty->isPointerTy())
return Error("Global not a pointer type!");
unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
@@ -1444,6 +1455,10 @@ bool BitcodeReader::ParseModule() {
if (Record.size() > 7)
isThreadLocal = Record[7];
+ bool UnnamedAddr = false;
+ if (Record.size() > 8)
+ UnnamedAddr = Record[8];
+
GlobalVariable *NewGV =
new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0,
isThreadLocal, AddressSpace);
@@ -1452,6 +1467,7 @@ bool BitcodeReader::ParseModule() {
NewGV->setSection(Section);
NewGV->setVisibility(Visibility);
NewGV->setThreadLocal(isThreadLocal);
+ NewGV->setUnnamedAddr(UnnamedAddr);
ValueList.push_back(NewGV);
@@ -1461,11 +1477,12 @@ bool BitcodeReader::ParseModule() {
break;
}
// FUNCTION: [type, callingconv, isproto, linkage, paramattr,
- // alignment, section, visibility, gc]
+ // alignment, section, visibility, gc, unnamed_addr]
case bitc::MODULE_CODE_FUNCTION: {
if (Record.size() < 8)
return Error("Invalid MODULE_CODE_FUNCTION record");
const Type *Ty = getTypeByID(Record[0]);
+ if (!Ty) return Error("Invalid MODULE_CODE_FUNCTION record");
if (!Ty->isPointerTy())
return Error("Function not a pointer type!");
const FunctionType *FTy =
@@ -1493,6 +1510,10 @@ bool BitcodeReader::ParseModule() {
return Error("Invalid GC ID");
Func->setGC(GCTable[Record[8]-1].c_str());
}
+ bool UnnamedAddr = false;
+ if (Record.size() > 9)
+ UnnamedAddr = Record[9];
+ Func->setUnnamedAddr(UnnamedAddr);
ValueList.push_back(Func);
// If this is a function with a body, remember the prototype we are
@@ -1507,6 +1528,7 @@ bool BitcodeReader::ParseModule() {
if (Record.size() < 3)
return Error("Invalid MODULE_ALIAS record");
const Type *Ty = getTypeByID(Record[0]);
+ if (!Ty) return Error("Invalid MODULE_ALIAS record");
if (!Ty->isPointerTy())
return Error("Function not a pointer type!");
@@ -1598,6 +1620,112 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) {
return false;
}
+bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
+ if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this module.
+ while (!Stream.AtEndOfStream()) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of module block");
+
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ switch (Stream.ReadSubBlockID()) {
+ default: // Skip unknown content.
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ }
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a record.
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: break; // Default behavior, ignore unknown content.
+ case bitc::MODULE_CODE_VERSION: // VERSION: [version#]
+ if (Record.size() < 1)
+ return Error("Malformed MODULE_CODE_VERSION");
+ // Only version #0 is supported so far.
+ if (Record[0] != 0)
+ return Error("Unknown bitstream version!");
+ break;
+ case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
+ std::string S;
+ if (ConvertToString(Record, 0, S))
+ return Error("Invalid MODULE_CODE_TRIPLE record");
+ Triple = S;
+ break;
+ }
+ }
+ Record.clear();
+ }
+
+ return Error("Premature end of bitstream");
+}
+
+bool BitcodeReader::ParseTriple(std::string &Triple) {
+ if (Buffer->getBufferSize() & 3)
+ return Error("Bitcode stream should be a multiple of 4 bytes in length");
+
+ unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
+ unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+ // If we have a wrapper header, parse it and ignore the non-bc file contents.
+ // The magic number is 0x0B17C0DE stored in little endian.
+ if (isBitcodeWrapper(BufPtr, BufEnd))
+ if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
+ return Error("Invalid bitcode wrapper header");
+
+ StreamFile.init(BufPtr, BufEnd);
+ Stream.init(StreamFile);
+
+ // Sniff for the signature.
+ if (Stream.Read(8) != 'B' ||
+ Stream.Read(8) != 'C' ||
+ Stream.Read(4) != 0x0 ||
+ Stream.Read(4) != 0xC ||
+ Stream.Read(4) != 0xE ||
+ Stream.Read(4) != 0xD)
+ return Error("Invalid bitcode signature");
+
+ // We expect a number of well-defined blocks, though we don't necessarily
+ // need to understand them all.
+ while (!Stream.AtEndOfStream()) {
+ unsigned Code = Stream.ReadCode();
+
+ if (Code != bitc::ENTER_SUBBLOCK)
+ return Error("Invalid record at top-level");
+
+ unsigned BlockID = Stream.ReadSubBlockID();
+
+ // We only know the MODULE subblock ID.
+ switch (BlockID) {
+ case bitc::MODULE_BLOCK_ID:
+ if (ParseModuleTriple(Triple))
+ return true;
+ break;
+ default:
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ break;
+ }
+ }
+
+ return false;
+}
+
/// ParseMetadataAttachment - Parse metadata attachments.
bool BitcodeReader::ParseMetadataAttachment() {
if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
@@ -1776,13 +1904,17 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (OpNum < Record.size()) {
if (Opc == Instruction::Add ||
Opc == Instruction::Sub ||
- Opc == Instruction::Mul) {
+ Opc == Instruction::Mul ||
+ Opc == Instruction::Shl) {
if (Record[OpNum] & (1 << bitc::OBO_NO_SIGNED_WRAP))
cast<BinaryOperator>(I)->setHasNoSignedWrap(true);
if (Record[OpNum] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
cast<BinaryOperator>(I)->setHasNoUnsignedWrap(true);
- } else if (Opc == Instruction::SDiv) {
- if (Record[OpNum] & (1 << bitc::SDIV_EXACT))
+ } else if (Opc == Instruction::SDiv ||
+ Opc == Instruction::UDiv ||
+ Opc == Instruction::LShr ||
+ Opc == Instruction::AShr) {
+ if (Record[OpNum] & (1 << bitc::PEO_EXACT))
cast<BinaryOperator>(I)->setIsExact(true);
}
}
@@ -2535,7 +2667,24 @@ Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
// Read in the entire module, and destroy the BitcodeReader.
if (M->MaterializeAllPermanently(ErrMsg)) {
delete M;
- return NULL;
+ return 0;
}
+
return M;
}
+
+std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer,
+ LLVMContext& Context,
+ std::string *ErrMsg) {
+ BitcodeReader *R = new BitcodeReader(Buffer, Context);
+ // Don't let the BitcodeReader dtor delete 'Buffer'.
+ R->setBufferOwned(false);
+
+ std::string Triple("");
+ if (R->ParseTriple(Triple))
+ if (ErrMsg)
+ *ErrMsg = R->getErrorString();
+
+ delete R;
+ return Triple;
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
index 053121b..f8fc079 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h
@@ -212,6 +212,10 @@ public:
/// @brief Main interface to parsing a bitcode buffer.
/// @returns true if an error occurred.
bool ParseBitcodeInto(Module *M);
+
+ /// @brief Cheap mechanism to just extract module triple
+ /// @returns true if an error occurred.
+ bool ParseTriple(std::string &Triple);
private:
const Type *getTypeByID(unsigned ID, bool isTypeTable = false);
Value *getFnValueByID(unsigned ID, const Type *Ty) {
@@ -270,6 +274,7 @@ private:
bool ResolveGlobalAndAliasInits();
bool ParseMetadata();
bool ParseMetadataAttachment();
+ bool ParseModuleTriple(std::string &Triple);
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 7b6fc6c..f8ef8c6 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -26,7 +26,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/Program.h"
+#include <cctype>
using namespace llvm;
/// These are manifest constants used by the bitcode writer. They do not need to
@@ -211,6 +212,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break;
case Type::OpaqueTyID: Code = bitc::TYPE_CODE_OPAQUE; break;
case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break;
+ case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break;
case Type::IntegerTyID:
// INTEGER: [width]
Code = bitc::TYPE_CODE_INTEGER;
@@ -402,7 +404,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
unsigned AbbrevToUse = 0;
// GLOBALVAR: [type, isconst, initid,
- // linkage, alignment, section, visibility, threadlocal]
+ // linkage, alignment, section, visibility, threadlocal,
+ // unnamed_addr]
Vals.push_back(VE.getTypeID(GV->getType()));
Vals.push_back(GV->isConstant());
Vals.push_back(GV->isDeclaration() ? 0 :
@@ -411,9 +414,11 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
Vals.push_back(Log2_32(GV->getAlignment())+1);
Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0);
if (GV->isThreadLocal() ||
- GV->getVisibility() != GlobalValue::DefaultVisibility) {
+ GV->getVisibility() != GlobalValue::DefaultVisibility ||
+ GV->hasUnnamedAddr()) {
Vals.push_back(getEncodedVisibility(GV));
Vals.push_back(GV->isThreadLocal());
+ Vals.push_back(GV->hasUnnamedAddr());
} else {
AbbrevToUse = SimpleGVarAbbrev;
}
@@ -425,7 +430,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
// Emit the function proto information.
for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
// FUNCTION: [type, callingconv, isproto, paramattr,
- // linkage, alignment, section, visibility, gc]
+ // linkage, alignment, section, visibility, gc, unnamed_addr]
Vals.push_back(VE.getTypeID(F->getType()));
Vals.push_back(F->getCallingConv());
Vals.push_back(F->isDeclaration());
@@ -435,6 +440,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0);
Vals.push_back(getEncodedVisibility(F));
Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0);
+ Vals.push_back(F->hasUnnamedAddr());
unsigned AbbrevToUse = 0;
Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
@@ -464,9 +470,10 @@ static uint64_t GetOptimizationFlags(const Value *V) {
Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP;
if (OBO->hasNoUnsignedWrap())
Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP;
- } else if (const SDivOperator *Div = dyn_cast<SDivOperator>(V)) {
- if (Div->isExact())
- Flags |= 1 << bitc::SDIV_EXACT;
+ } else if (const PossiblyExactOperator *PEO =
+ dyn_cast<PossiblyExactOperator>(V)) {
+ if (PEO->isExact())
+ Flags |= 1 << bitc::PEO_EXACT;
}
return Flags;
@@ -1641,9 +1648,12 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
/// WriteBitcodeToStream - Write the specified module to the specified output
/// stream.
void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
- // If this is darwin, emit a file header and trailer if needed.
- bool isDarwin = M->getTargetTriple().find("-darwin") != std::string::npos;
- if (isDarwin)
+ // If this is darwin or another generic macho target, emit a file header and
+ // trailer if needed.
+ bool isMacho =
+ M->getTargetTriple().find("-darwin") != std::string::npos ||
+ M->getTargetTriple().find("-macho") != std::string::npos;
+ if (isMacho)
EmitDarwinBCHeader(Stream, M->getTargetTriple());
// Emit the file header.
@@ -1657,6 +1667,6 @@ void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
// Emit the module.
WriteModule(M, Stream);
- if (isDarwin)
+ if (isMacho)
EmitDarwinBCTrailer(Stream, Stream.getBuffer().size());
}
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 5a634d6..b520d8f 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -155,16 +155,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
- unsigned Reg = *I;
- State->UnionGroups(Reg, 0);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- State->UnionGroups(AliasReg, 0);
- KillIndices[AliasReg] = BB->size();
- DefIndices[AliasReg] = ~0u;
+ for (const unsigned *Alias = TRI->getOverlaps(*I);
+ unsigned Reg = *Alias; ++Alias) {
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
}
}
}
@@ -176,16 +171,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- State->UnionGroups(Reg, 0);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- State->UnionGroups(AliasReg, 0);
- KillIndices[AliasReg] = BB->size();
- DefIndices[AliasReg] = ~0u;
+ for (const unsigned *Alias = TRI->getOverlaps(*I);
+ unsigned Reg = *Alias; ++Alias) {
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
}
}
@@ -197,12 +187,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
- State->UnionGroups(Reg, 0);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
+ for (const unsigned *Alias = TRI->getOverlaps(Reg);
+ unsigned AliasReg = *Alias; ++Alias) {
State->UnionGroups(AliasReg, 0);
KillIndices[AliasReg] = BB->size();
DefIndices[AliasReg] = ~0u;
@@ -435,12 +421,9 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
continue;
// Update def for Reg and aliases.
- DefIndices[Reg] = Count;
- for (const unsigned *Alias = TRI->getAliasSet(Reg);
- *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
+ for (const unsigned *Alias = TRI->getOverlaps(Reg);
+ unsigned AliasReg = *Alias; ++Alias)
DefIndices[AliasReg] = Count;
- }
}
}
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
new file mode 100644
index 0000000..20c7625
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -0,0 +1,68 @@
+//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+// Compare VirtRegMap::getRegAllocPref().
+AllocationOrder::AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const BitVector &ReservedRegs)
+ : Pos(0), Reserved(ReservedRegs) {
+ const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
+ std::pair<unsigned, unsigned> HintPair =
+ VRM.getRegInfo().getRegAllocationHint(VirtReg);
+
+ // HintPair.second is a register, phys or virt.
+ Hint = HintPair.second;
+
+ // Translate to physreg, or 0 if not assigned yet.
+ if (TargetRegisterInfo::isVirtualRegister(Hint))
+ Hint = VRM.getPhys(Hint);
+
+ // The remaining allocation order may depend on the hint.
+ tie(Begin, End) = VRM.getTargetRegInfo()
+ .getAllocationOrder(RC, HintPair.first, Hint, VRM.getMachineFunction());
+
+ // Target-dependent hints require resolution.
+ if (HintPair.first)
+ Hint = VRM.getTargetRegInfo().ResolveRegAllocHint(HintPair.first, Hint,
+ VRM.getMachineFunction());
+
+ // The hint must be a valid physreg for allocation.
+ if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+ !RC->contains(Hint) || ReservedRegs.test(Hint)))
+ Hint = 0;
+}
+
+unsigned AllocationOrder::next() {
+ // First take the hint.
+ if (!Pos) {
+ Pos = Begin;
+ if (Hint)
+ return Hint;
+ }
+ // Then look at the order from TRI.
+ while(Pos != End) {
+ unsigned Reg = *Pos++;
+ if (Reg != Hint && !Reserved.test(Reg))
+ return Reg;
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h
new file mode 100644
index 0000000..3db4b69
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h
@@ -0,0 +1,54 @@
+//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_CODEGEN_ALLOCATIONORDER_H
+
+namespace llvm {
+
+class BitVector;
+class VirtRegMap;
+
+class AllocationOrder {
+ const unsigned *Begin;
+ const unsigned *End;
+ const unsigned *Pos;
+ const BitVector &Reserved;
+ unsigned Hint;
+public:
+
+ /// AllocationOrder - Create a new AllocationOrder for VirtReg.
+ /// @param VirtReg Virtual register to allocate for.
+ /// @param VRM Virtual register map for function.
+ /// @param ReservedRegs Set of reserved registers as returned by
+ /// TargetRegisterInfo::getReservedRegs().
+ AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const BitVector &ReservedRegs);
+
+ /// next - Return the next physical register in the allocation order, or 0.
+ /// It is safe to call next again after it returned 0.
+ /// It will keep returning 0 until rewind() is called.
+ unsigned next();
+
+ /// rewind - Start over from the beginning.
+ void rewind() { Pos = 0; }
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index e3dd646..36638c3 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -19,6 +19,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
@@ -30,7 +31,7 @@ using namespace llvm;
/// of insertvalue or extractvalue indices that identify a member, return
/// the linearized index of the start of the member.
///
-unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+unsigned llvm::ComputeLinearIndex(const Type *Ty,
const unsigned *Indices,
const unsigned *IndicesEnd,
unsigned CurIndex) {
@@ -45,8 +46,8 @@ unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
EE = STy->element_end();
EI != EE; ++EI) {
if (Indices && *Indices == unsigned(EI - EB))
- return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
- CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+ return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex);
}
return CurIndex;
}
@@ -55,8 +56,8 @@ unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
const Type *EltTy = ATy->getElementType();
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
if (Indices && *Indices == i)
- return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
- CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+ return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex);
}
return CurIndex;
}
@@ -125,7 +126,7 @@ GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
/// processed uses a memory 'm' constraint.
bool
-llvm::hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
const TargetLowering &TLI) {
for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
InlineAsm::ConstraintInfo &CI = CInfos[i];
@@ -283,3 +284,20 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
return true;
}
+bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+ const TargetLowering &TLI) {
+ const Function *F = DAG.getMachineFunction().getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if (CallerRetAttr & ~Attribute::NoAlias)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ return false;
+
+ // Check if the only use is a function return node.
+ return TLI.isUsedByReturnOnly(Node);
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d358ab2..43e8990 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -38,6 +38,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/ErrorHandling.h"
@@ -178,16 +179,24 @@ bool AsmPrinter::doInitialization(Module &M) {
if (!M.getModuleInlineAsm().empty()) {
OutStreamer.AddComment("Start of file scope inline assembly");
OutStreamer.AddBlankLine();
- EmitInlineAsm(M.getModuleInlineAsm()+"\n", 0/*no loc cookie*/);
+ EmitInlineAsm(M.getModuleInlineAsm()+"\n");
OutStreamer.AddComment("End of file scope inline assembly");
OutStreamer.AddBlankLine();
}
if (MAI->doesSupportDebugInformation())
DD = new DwarfDebug(this, &M);
-
+
if (MAI->doesSupportExceptionHandling())
- DE = new DwarfException(this);
+ switch (MAI->getExceptionHandlingType()) {
+ default:
+ case ExceptionHandling::DwarfTable:
+ DE = new DwarfTableException(this);
+ break;
+ case ExceptionHandling::DwarfCFI:
+ DE = new DwarfCFIException(this);
+ break;
+ }
return false;
}
@@ -282,8 +291,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Handle common symbols.
if (GVKind.isCommon()) {
+ unsigned Align = 1 << AlignLog;
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
+
// .comm _foo, 42, 4
- OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog);
+ OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
return;
}
@@ -301,11 +314,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer.EmitLocalCommonSymbol(GVSym, Size);
return;
}
+
+ unsigned Align = 1 << AlignLog;
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
// .local _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
// .comm _foo, 42, 4
- OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog);
+ OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
return;
}
@@ -327,6 +344,13 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Handle thread local data for mach-o which requires us to output an
// additional structure of data and mangle the original symbol so that we
// can reference it later.
+ //
+ // TODO: This should become an "emit thread local global" method on TLOF.
+ // All of this macho specific stuff should be sunk down into TLOFMachO and
+ // stuff like "TLSExtraDataSection" should no longer be part of the parent
+ // TLOF class. This will also make it more obvious that stuff like
+ // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
+ // specific code.
if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
// Emit the .tbss symbol
MCSymbol *MangSym =
@@ -623,7 +647,7 @@ void AsmPrinter::EmitFunctionBody() {
if (ShouldPrintDebugScopes) {
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
- DD->beginScope(II);
+ DD->beginInstruction(II);
}
if (isVerbose())
@@ -657,7 +681,7 @@ void AsmPrinter::EmitFunctionBody() {
if (ShouldPrintDebugScopes) {
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
- DD->endScope(II);
+ DD->endInstruction(II);
}
}
}
@@ -729,7 +753,20 @@ bool AsmPrinter::doFinalization(Module &M) {
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I)
EmitGlobalVariable(I);
-
+
+ // Emit visibility info for declarations
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ const Function &F = *I;
+ if (!F.isDeclaration())
+ continue;
+ GlobalValue::VisibilityTypes V = F.getVisibility();
+ if (V == GlobalValue::DefaultVisibility)
+ continue;
+
+ MCSymbol *Name = Mang->getSymbol(&F);
+ EmitVisibility(Name, V);
+ }
+
// Finalize debug and EH information.
if (DE) {
{
@@ -905,14 +942,6 @@ void AsmPrinter::EmitConstantPool() {
const Type *Ty = CPE.getType();
Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
-
- // Emit the label with a comment on it.
- if (isVerbose()) {
- OutStreamer.GetCommentOS() << "constant pool ";
- WriteTypeSymbolic(OutStreamer.GetCommentOS(), CPE.getType(),
- MF->getFunction()->getParent());
- OutStreamer.GetCommentOS() << '\n';
- }
OutStreamer.EmitLabel(GetCPISymbol(CPI));
if (CPE.isMachineConstantPoolEntry())
@@ -983,7 +1012,7 @@ void AsmPrinter::EmitJumpTableInfo() {
}
}
- // On some targets (e.g. Darwin) we want to emit two consequtive labels
+ // On some targets (e.g. Darwin) we want to emit two consecutive labels
// before each jump table. The first label is never referenced, but tells
// the assembler and linker the extents of the jump table object. The
// second label is actually referenced by the code.
@@ -1004,6 +1033,7 @@ void AsmPrinter::EmitJumpTableInfo() {
void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned UID) const {
+ assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
const MCExpr *Value = 0;
switch (MJTI->getEntryKind()) {
case MachineJumpTableInfo::EK_Inline:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index ce4519c..98a1bf2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -19,7 +19,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -36,9 +36,8 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
- if (MAI->hasLEB128() && OutStreamer.hasRawTextSupport()) {
- // FIXME: MCize.
- OutStreamer.EmitRawText("\t.sleb128\t" + Twine(Value));
+ if (MAI->hasLEB128()) {
+ OutStreamer.EmitSLEB128IntValue(Value);
return;
}
@@ -60,10 +59,10 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
unsigned PadTo) const {
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
-
- if (MAI->hasLEB128() && PadTo == 0 && OutStreamer.hasRawTextSupport()) {
- // FIXME: MCize.
- OutStreamer.EmitRawText("\t.uleb128\t" + Twine(Value));
+
+ // FIXME: Should we add a PadTo option to the streamer?
+ if (MAI->hasLEB128() && PadTo == 0) {
+ OutStreamer.EmitULEB128IntValue(Value);
return;
}
@@ -157,7 +156,7 @@ void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
const MCExpr *Exp =
TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer);
- OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+ OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
}
void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
@@ -215,8 +214,8 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves,
const TargetRegisterInfo *RI = TM.getRegisterInfo();
int stackGrowth = TM.getTargetData()->getPointerSize();
- if (TM.getFrameInfo()->getStackGrowthDirection() !=
- TargetFrameInfo::StackGrowsUp)
+ if (TM.getFrameLowering()->getStackGrowthDirection() !=
+ TargetFrameLowering::StackGrowsUp)
stackGrowth *= -1;
for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
@@ -277,3 +276,43 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves,
}
}
}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const {
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+ int stackGrowth = TM.getTargetData()->getPointerSize();
+ if (TM.getFrameLowering()->getStackGrowthDirection() !=
+ TargetFrameLowering::StackGrowsUp)
+ stackGrowth *= -1;
+
+ for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+ const MachineMove &Move = Moves[i];
+ MCSymbol *Label = Move.getLabel();
+ // Throw out move if the label is invalid.
+ if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ assert(!Src.isReg() && "Machine move not supported yet.");
+
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
+ } else {
+ assert("Machine move not supported yet");
+ // Reg + Offset
+ }
+ } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+ assert(Dst.isReg() && "Machine move not supported yet.");
+ OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+ } else {
+ assert(!Dst.isReg() && "Machine move not supported yet.");
+ OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
+ Dst.getOffset());
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index df03168..c6166e2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -34,15 +34,47 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+namespace {
+ struct SrcMgrDiagInfo {
+ const MDNode *LocInfo;
+ LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
+ void *DiagContext;
+ };
+}
+
+/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo
+/// struct above.
+static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+ SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+ assert(DiagInfo && "Diagnostic context not passed down?");
+
+ // If the inline asm had metadata associated with it, pull out a location
+ // cookie corresponding to which line the error occurred on.
+ unsigned LocCookie = 0;
+ if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+ unsigned ErrorLine = Diag.getLineNo()-1;
+ if (ErrorLine >= LocInfo->getNumOperands())
+ ErrorLine = 0;
+
+ if (LocInfo->getNumOperands() != 0)
+ if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+ LocCookie = CI->getZExtValue();
+ }
+
+ DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
+}
+
/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
-void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
assert(!Str.empty() && "Can't emit empty inline asm block");
-
+
// Remember if the buffer is nul terminated or not so we can avoid a copy.
bool isNullTerminated = Str.back() == 0;
if (isNullTerminated)
Str = Str.substr(0, Str.size()-1);
-
+
// If the output streamer is actually a .s file, just emit the blob textually.
// This is useful in case the asm parser doesn't handle something but the
// system assembler does.
@@ -50,18 +82,23 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
OutStreamer.EmitRawText(Str);
return;
}
-
+
SourceMgr SrcMgr;
-
+ SrcMgrDiagInfo DiagInfo;
+
// If the current LLVMContext has an inline asm handler, set it in SourceMgr.
LLVMContext &LLVMCtx = MMI->getModule()->getContext();
bool HasDiagHandler = false;
- if (void *DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler()) {
- SrcMgr.setDiagHandler((SourceMgr::DiagHandlerTy)(intptr_t)DiagHandler,
- LLVMCtx.getInlineAsmDiagnosticContext(), LocCookie);
+ if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
+ // If the source manager has an issue, we arrange for SrcMgrDiagHandler
+ // to be invoked, getting DiagInfo passed into it.
+ DiagInfo.LocInfo = LocMDNode;
+ DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+ DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+ SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo);
HasDiagHandler = true;
}
-
+
MemoryBuffer *Buffer;
if (isNullTerminated)
Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
@@ -70,7 +107,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
// Tell SrcMgr about this buffer, it takes ownership of the buffer.
SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
-
+
OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr,
OutContext, OutStreamer,
*MAI));
@@ -92,15 +129,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
/// instruction that is an inline asm.
void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
-
+
unsigned NumOperands = MI->getNumOperands();
-
+
// Count the number of register definitions to find the asm string.
unsigned NumDefs = 0;
for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
++NumDefs)
assert(NumDefs != NumOperands-2 && "No asm string?");
-
+
assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
// Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
@@ -128,22 +165,23 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
// Get the !srcloc metadata node if we have it, and decode the loc cookie from
// it.
unsigned LocCookie = 0;
+ const MDNode *LocMD = 0;
for (unsigned i = MI->getNumOperands(); i != 0; --i) {
- if (MI->getOperand(i-1).isMetadata())
- if (const MDNode *SrcLoc = MI->getOperand(i-1).getMetadata())
- if (SrcLoc->getNumOperands() != 0)
- if (const ConstantInt *CI =
- dyn_cast<ConstantInt>(SrcLoc->getOperand(0))) {
- LocCookie = CI->getZExtValue();
- break;
- }
+ if (MI->getOperand(i-1).isMetadata() &&
+ (LocMD = MI->getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
}
-
+
// Emit the inline asm to a temporary string so we can emit it through
// EmitInlineAsm.
SmallString<256> StringData;
raw_svector_ostream OS(StringData);
-
+
OS << '\t';
// The variant of the current asmprinter.
@@ -151,7 +189,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
-
+
while (*LastEmitted) {
switch (*LastEmitted) {
default: {
@@ -199,18 +237,18 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
++LastEmitted; // consume ')' character.
if (CurVariant == -1)
OS << '}'; // this is gcc's behavior for } outside a variant
- else
+ else
CurVariant = -1;
break;
}
if (Done) break;
-
+
bool HasCurlyBraces = false;
if (*LastEmitted == '{') { // ${variable}
++LastEmitted; // Consume '{' character.
HasCurlyBraces = true;
}
-
+
// If we have ${:foo}, then this is not a real operand reference, it is a
// "magic" string reference, just like in .td files. Arrange to call
// PrintSpecial.
@@ -221,25 +259,25 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
if (StrEnd == 0)
report_fatal_error("Unterminated ${:foo} operand in inline asm"
" string: '" + Twine(AsmStr) + "'");
-
+
std::string Val(StrStart, StrEnd);
PrintSpecial(MI, OS, Val.c_str());
LastEmitted = StrEnd+1;
break;
}
-
+
const char *IDStart = LastEmitted;
const char *IDEnd = IDStart;
- while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
-
+ while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
unsigned Val;
if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
report_fatal_error("Bad $ operand number in inline asm string: '" +
Twine(AsmStr) + "'");
LastEmitted = IDEnd;
-
+
char Modifier[2] = { 0, 0 };
-
+
if (HasCurlyBraces) {
// If we have curly braces, check for a modifier character. This
// supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
@@ -248,25 +286,25 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
if (*LastEmitted == 0)
report_fatal_error("Bad ${:} expression in inline asm string: '" +
Twine(AsmStr) + "'");
-
+
Modifier[0] = *LastEmitted;
++LastEmitted; // Consume modifier character.
}
-
+
if (*LastEmitted != '}')
report_fatal_error("Bad ${} expression in inline asm string: '" +
Twine(AsmStr) + "'");
++LastEmitted; // Consume '}' character.
}
-
+
if (Val >= NumOperands-1)
report_fatal_error("Invalid $ operand number in inline asm string: '" +
Twine(AsmStr) + "'");
-
+
// Okay, we finally have a value number. Ask the target to print this
// operand!
if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
- unsigned OpNo = 2;
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
bool Error = false;
@@ -310,8 +348,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
}
}
OS << '\n' << (char)0; // null terminate string.
- EmitInlineAsm(OS.str(), LocCookie);
-
+ EmitInlineAsm(OS.str(), LocMD);
+
// Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
// enabled, so we use EmitRawText.
if (OutStreamer.hasRawTextSupport())
@@ -335,7 +373,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
} else if (!strcmp(Code, "uid")) {
// Comparing the address of MI isn't sufficient, because machineinstrs may
// be allocated to the same address across functions.
-
+
// If this is a new LastFn instruction, bump the counter.
if (LastMI != MI || LastFn != getFunctionNumber()) {
++Counter;
@@ -349,7 +387,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
Msg << "Unknown special formatter '" << Code
<< "' for machine instr: " << *MI;
report_fatal_error(Msg.str());
- }
+ }
}
/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
new file mode 100644
index 0000000..68be2ee
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -0,0 +1,138 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfCFIException::DwarfCFIException(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
+ {}
+
+DwarfCFIException::~DwarfCFIException() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfCFIException::EndModule() {
+ if (!Asm->MAI->isExceptionHandlingDwarf())
+ return;
+
+ if (!shouldEmitTableModule)
+ return;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+ // Begin eh frame section.
+ Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+ // Emit references to all used personality functions
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+ for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("personality", i));
+ Asm->EmitReference(Personalities[i], PerEncoding);
+ }
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
+ shouldEmitTable = shouldEmitMoves = false;
+
+ // If any landing pads survive, we need an EH table.
+ shouldEmitTable = !MMI->getLandingPads().empty();
+
+ // See if we need frame move info.
+ shouldEmitMoves =
+ !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
+
+ if (shouldEmitMoves || shouldEmitTable)
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+
+ shouldEmitTableModule |= shouldEmitTable;
+
+ if (shouldEmitMoves) {
+ const TargetFrameLowering *TFL = Asm->TM.getFrameLowering();
+ Asm->OutStreamer.EmitCFIStartProc();
+
+ // Indicate locations of general callee saved registers in frame.
+ std::vector<MachineMove> Moves;
+ TFL->getInitialFrameState(Moves);
+ Asm->EmitCFIFrameMoves(Moves);
+ Asm->EmitCFIFrameMoves(MMI->getFrameMoves());
+ }
+
+ if (!shouldEmitTable)
+ return;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ // Provide LSDA information.
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ if (LSDAEncoding != dwarf::DW_EH_PE_omit)
+ Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
+ Asm->getFunctionNumber()),
+ LSDAEncoding);
+
+ // Indicate personality routine, if any.
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ if (PerEncoding != dwarf::DW_EH_PE_omit &&
+ MMI->getPersonalities()[MMI->getPersonalityIndex()])
+ Asm->OutStreamer.EmitCFIPersonality(Asm->GetTempSymbol("personality",
+ MMI->getPersonalityIndex()),
+ PerEncoding);
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfCFIException::EndFunction() {
+ if (!shouldEmitMoves && !shouldEmitTable) return;
+
+ if (shouldEmitMoves)
+ Asm->OutStreamer.EmitCFIEndProc();
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ if (shouldEmitTable)
+ EmitExceptionTable();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c886a5e..5106d57 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -16,6 +16,7 @@
#include "DIE.h"
#include "llvm/Constants.h"
#include "llvm/Module.h"
+#include "llvm/Instructions.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -24,12 +25,13 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
@@ -38,7 +40,7 @@
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Timer.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
using namespace llvm;
static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
@@ -52,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
cl::desc("Make an absense of debug location information explicit."),
cl::init(false));
+#ifndef NDEBUG
+STATISTIC(BlocksWithoutLineNo, "Number of blocks without any line number");
+#endif
+
namespace {
const char *DWARFGroupName = "DWARF Emission";
const char *DbgTimerName = "DWARF Debug Writer";
@@ -507,8 +513,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) {
return;
unsigned Line = V.getLineNumber();
- unsigned FileID = GetOrCreateSourceID(V.getContext().getDirectory(),
- V.getContext().getFilename());
+ if (Line == 0)
+ return;
+ unsigned FileID = GetOrCreateSourceID(V.getContext().getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -522,8 +529,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) {
return;
unsigned Line = G.getLineNumber();
- unsigned FileID = GetOrCreateSourceID(G.getContext().getDirectory(),
- G.getContext().getFilename());
+ if (Line == 0)
+ return;
+ unsigned FileID = GetOrCreateSourceID(G.getContext().getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -542,8 +550,7 @@ void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) {
unsigned Line = SP.getLineNumber();
if (!SP.getContext().Verify())
return;
- unsigned FileID = GetOrCreateSourceID(SP.getDirectory(),
- SP.getFilename());
+ unsigned FileID = GetOrCreateSourceID(SP.getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -557,10 +564,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) {
return;
unsigned Line = Ty.getLineNumber();
- if (!Ty.getContext().Verify())
+ if (Line == 0 || !Ty.getContext().Verify())
return;
- unsigned FileID = GetOrCreateSourceID(Ty.getContext().getDirectory(),
- Ty.getContext().getFilename());
+ unsigned FileID = GetOrCreateSourceID(Ty.getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -574,10 +580,11 @@ void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
return;
unsigned Line = NS.getLineNumber();
+ if (Line == 0)
+ return;
StringRef FN = NS.getFilename();
- StringRef Dir = NS.getDirectory();
- unsigned FileID = GetOrCreateSourceID(Dir, FN);
+ unsigned FileID = GetOrCreateSourceID(FN);
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -588,8 +595,8 @@ void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) {
MachineLocation Location;
unsigned FrameReg;
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
- int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
Location.set(FrameReg, Offset);
if (DV->variableHasComplexAddress())
@@ -620,8 +627,7 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
if (Reg < 32) {
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
} else {
- Reg = Reg - dwarf::DW_OP_reg0;
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
}
} else {
@@ -760,8 +766,7 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
if (Reg < 32)
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
else {
- Reg = Reg - dwarf::DW_OP_reg0;
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
}
} else {
@@ -812,6 +817,15 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ if (RI->getFrameRegister(*Asm->MF) == Location.getReg()
+ && Location.getOffset()) {
+ // If variable offset is based in frame register then use fbreg.
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+ addSInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+ addBlock(Die, Attribute, 0, Block);
+ return;
+ }
+
if (Location.isReg()) {
if (Reg < 32) {
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
@@ -834,35 +848,28 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
}
/// addRegisterAddress - Add register location entry in variable DIE.
-bool DwarfDebug::addRegisterAddress(DIE *Die, const MCSymbol *VS,
- const MachineOperand &MO) {
+bool DwarfDebug::addRegisterAddress(DIE *Die, const MachineOperand &MO) {
assert (MO.isReg() && "Invalid machine operand!");
if (!MO.getReg())
return false;
MachineLocation Location;
Location.set(MO.getReg());
addAddress(Die, dwarf::DW_AT_location, Location);
- if (VS)
- addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
return true;
}
/// addConstantValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantValue(DIE *Die, const MCSymbol *VS,
- const MachineOperand &MO) {
+bool DwarfDebug::addConstantValue(DIE *Die, const MachineOperand &MO) {
assert (MO.isImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
unsigned Imm = MO.getImm();
addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
- if (VS)
- addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
return true;
}
/// addConstantFPValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS,
- const MachineOperand &MO) {
+bool DwarfDebug::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
assert (MO.isFPImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
APFloat FPImm = MO.getFPImm()->getValueAPF();
@@ -883,11 +890,42 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS,
(unsigned char)0xFF & FltPtr[Start]);
addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
- if (VS)
- addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
return true;
}
+/// addConstantValue - Add constant value entry in variable DIE.
+bool DwarfDebug::addConstantValue(DIE *Die, ConstantInt *CI,
+ bool Unsigned) {
+ if (CI->getBitWidth() <= 64) {
+ if (Unsigned)
+ addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+ CI->getZExtValue());
+ else
+ addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
+ CI->getSExtValue());
+ return true;
+ }
+
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ // Get the raw data form of the large APInt.
+ const APInt Val = CI->getValue();
+ const char *Ptr = (const char*)Val.getRawData();
+
+ int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getTargetData().isLittleEndian();
+ int Incr = (LittleEndian ? 1 : -1);
+ int Start = (LittleEndian ? 0 : NumBytes - 1);
+ int Stop = (LittleEndian ? NumBytes : -1);
+
+ // Output the constant to DWARF one byte at a time.
+ for (; Start != Stop; Start += Incr)
+ addUInt(Block, 0, dwarf::DW_FORM_data1,
+ (unsigned char)0xFF & Ptr[Start]);
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
/// addToContextOwner - Add Die into the list of its context owner's children.
void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
@@ -898,8 +936,7 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
ContextDIE->addChild(Die);
} else if (Context.isSubprogram()) {
- DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context),
- /*MakeDecl=*/false);
+ DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context));
ContextDIE->addChild(Die);
} else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
ContextDIE->addChild(Die);
@@ -1033,16 +1070,23 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DIDescriptor RTy = Elements.getElement(0);
addType(&Buffer, DIType(RTy));
- // Add prototype flag.
- addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
-
+ bool isPrototyped = true;
// Add arguments.
for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
- DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
DIDescriptor Ty = Elements.getElement(i);
- addType(Arg, DIType(Ty));
- Buffer.addChild(Arg);
+ if (Ty.isUnspecifiedParameter()) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
+ Buffer.addChild(Arg);
+ isPrototyped = false;
+ } else {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ addType(Arg, DIType(Ty));
+ Buffer.addChild(Arg);
+ }
}
+ // Add prototype flag.
+ if (isPrototyped)
+ addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
}
break;
case dwarf::DW_TAG_structure_type:
@@ -1060,8 +1104,21 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
for (unsigned i = 0; i < N; ++i) {
DIDescriptor Element = Elements.getElement(i);
DIE *ElemDie = NULL;
- if (Element.isSubprogram())
+ if (Element.isSubprogram()) {
+ DISubprogram SP(Element);
ElemDie = createSubprogramDIE(DISubprogram(Element));
+ if (SP.isProtected())
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_protected);
+ else if (SP.isPrivate())
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_private);
+ else
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_public);
+ if (SP.isExplicit())
+ addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
+ }
else if (Element.isVariable()) {
DIVariable DV(Element);
ElemDie = new DIE(dwarf::DW_TAG_variable);
@@ -1094,6 +1151,21 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DIDescriptor Context = CTy.getContext();
addToContextOwner(&Buffer, Context);
}
+
+ if (Tag == dwarf::DW_TAG_class_type) {
+ DIArray TParams = CTy.getTemplateParams();
+ unsigned N = TParams.getNumElements();
+ // Add template parameters.
+ for (unsigned i = 0; i < N; ++i) {
+ DIDescriptor Element = TParams.getElement(i);
+ if (Element.isTemplateTypeParameter())
+ Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
+ DITemplateTypeParameter(Element)));
+ else if (Element.isTemplateValueParameter())
+ Buffer.addChild(getOrCreateTemplateValueParameterDIE(
+ DITemplateValueParameter(Element)));
+ }
+ }
break;
}
default:
@@ -1124,6 +1196,38 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
}
}
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+/// for the given DITemplateTypeParameter.
+DIE *
+DwarfDebug::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
+ CompileUnit *TypeCU = getCompileUnit(TP);
+ DIE *ParamDIE = TypeCU->getDIE(TP);
+ if (ParamDIE)
+ return ParamDIE;
+
+ ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
+ addType(ParamDIE, TP.getType());
+ addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+ return ParamDIE;
+}
+
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
+/// for the given DITemplateValueParameter.
+DIE *
+DwarfDebug::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
+ CompileUnit *TVCU = getCompileUnit(TPV);
+ DIE *ParamDIE = TVCU->getDIE(TPV);
+ if (ParamDIE)
+ return ParamDIE;
+
+ ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+ addType(ParamDIE, TPV.getType());
+ addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+ addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+ TPV.getValue());
+ return ParamDIE;
+}
+
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
int64_t L = SR.getLo();
@@ -1258,7 +1362,8 @@ DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
else if (DT.isPrivate())
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
dwarf::DW_ACCESS_private);
- else if (DT.getTag() == dwarf::DW_TAG_inheritance)
+ // Otherwise C++ member and base classes are considered public.
+ else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus)
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
dwarf::DW_ACCESS_public);
if (DT.isVirtual())
@@ -1268,7 +1373,7 @@ DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
}
/// createSubprogramDIE - Create new DIE using SP.
-DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
+DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
CompileUnit *SPCU = getCompileUnit(SP);
DIE *SPDie = SPCU->getDIE(SP);
if (SPDie)
@@ -1286,10 +1391,7 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
addSourceLine(SPDie, SP);
- // Add prototyped tag, if C or ObjC.
- unsigned Lang = SP.getCompileUnit().getLanguage();
- if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 ||
- Lang == dwarf::DW_LANG_ObjC)
+ if (SP.isPrototyped())
addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
// Add Return Type.
@@ -1307,13 +1409,13 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
+ addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
ContainingTypeMap.insert(std::make_pair(SPDie,
SP.getContainingType()));
}
- if (MakeDecl || !SP.isDefinition()) {
+ if (!SP.isDefinition()) {
addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
// Add arguments. Do not add arguments for subprogram definition. They will
@@ -1603,6 +1705,8 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial())
addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ else if (DIVariable(DV->getVariable()).isArtificial())
+ addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
if (Scope->isAbstractScope()) {
DV->setDIE(VariableDie);
@@ -1625,7 +1729,6 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
DbgVariableToDbgInstMap.find(DV);
if (DVI != DbgVariableToDbgInstMap.end()) {
const MachineInstr *DVInsn = DVI->second;
- const MCSymbol *DVLabel = findVariableLabel(DV);
bool updated = false;
// FIXME : Handle getNumOperands != 3
if (DVInsn->getNumOperands() == 3) {
@@ -1637,20 +1740,17 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm());
updated = true;
} else
- updated = addRegisterAddress(VariableDie, DVLabel, RegOp);
+ updated = addRegisterAddress(VariableDie, RegOp);
}
else if (DVInsn->getOperand(0).isImm())
- updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+ updated = addConstantValue(VariableDie, DVInsn->getOperand(0));
else if (DVInsn->getOperand(0).isFPImm())
updated =
- addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+ addConstantFPValue(VariableDie, DVInsn->getOperand(0));
} else {
MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
if (Location.getReg()) {
addAddress(VariableDie, dwarf::DW_AT_location, Location);
- if (DVLabel)
- addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
- DVLabel);
updated = true;
}
}
@@ -1700,6 +1800,16 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
if (!Scope || !Scope->getScopeNode())
return NULL;
+ SmallVector <DIE *, 8> Children;
+ // Collect lexical scope childrens first.
+ const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i)
+ if (DIE *Variable = constructVariableDIE(Variables[i], Scope))
+ Children.push_back(Variable);
+ const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+ for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
+ if (DIE *Nested = constructScopeDIE(Scopes[j]))
+ Children.push_back(Nested);
DIScope DS(Scope->getScopeNode());
DIE *ScopeDIE = NULL;
if (Scope->getInlinedAt())
@@ -1715,26 +1825,19 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
else
ScopeDIE = updateSubprogramScopeDIE(DS);
}
- else
+ else {
+ // There is no need to emit empty lexical block DIE.
+ if (Children.empty())
+ return NULL;
ScopeDIE = constructLexicalScopeDIE(Scope);
- if (!ScopeDIE) return NULL;
-
- // Add variables to scope.
- const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
- for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
- DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
- if (VariableDIE)
- ScopeDIE->addChild(VariableDIE);
}
+
+ if (!ScopeDIE) return NULL;
- // Add nested scopes.
- const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
- for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
- // Define the Scope debug information entry.
- DIE *NestedDIE = constructScopeDIE(Scopes[j]);
- if (NestedDIE)
- ScopeDIE->addChild(NestedDIE);
- }
+ // Add children
+ for (SmallVector<DIE *, 8>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ ScopeDIE->addChild(*I);
if (DS.isSubprogram())
addPubTypes(DISubprogram(DS));
@@ -1746,37 +1849,21 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
/// source file names. If none currently exists, create a new id and insert it
/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
/// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){
- unsigned DId;
- assert (DirName.empty() == false && "Invalid directory name!");
- StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
- if (DI != DirectoryIdMap.end()) {
- DId = DI->getValue();
- } else {
- DId = DirectoryNames.size() + 1;
- DirectoryIdMap[DirName] = DId;
- DirectoryNames.push_back(DirName);
- }
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){
+ // If FE did not provide a file name, then assume stdin.
+ if (FileName.empty())
+ return GetOrCreateSourceID("<stdin>");
- unsigned FId;
- StringMap<unsigned>::iterator FI = SourceFileIdMap.find(FileName);
- if (FI != SourceFileIdMap.end()) {
- FId = FI->getValue();
- } else {
- FId = SourceFileNames.size() + 1;
- SourceFileIdMap[FileName] = FId;
- SourceFileNames.push_back(FileName);
- }
+ StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+ if (Entry.getValue())
+ return Entry.getValue();
- DenseMap<std::pair<unsigned, unsigned>, unsigned>::iterator SI =
- SourceIdMap.find(std::make_pair(DId, FId));
- if (SI != SourceIdMap.end())
- return SI->second;
+ unsigned SrcId = SourceIdMap.size();
+ Entry.setValue(SrcId);
- unsigned SrcId = SourceIds.size() + 1; // DW_AT_decl_file cannot be 0.
- SourceIdMap[std::make_pair(DId, FId)] = SrcId;
- SourceIds.push_back(std::make_pair(DId, FId));
+ // Print out a .file directive to specify files for .loc directives.
+ Asm->OutStreamer.EmitDwarfFileDirective(SrcId, FileName);
return SrcId;
}
@@ -1802,7 +1889,7 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
DICompileUnit DIUnit(N);
StringRef FN = DIUnit.getFilename();
StringRef Dir = DIUnit.getDirectory();
- unsigned ID = GetOrCreateSourceID(Dir, FN);
+ unsigned ID = GetOrCreateSourceID(FN);
DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
@@ -1886,6 +1973,32 @@ static bool isUnsignedDIType(DIType Ty) {
return false;
}
+// Return const exprssion if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+ if (!CE || CE->getNumOperands() != 3 ||
+ CE->getOpcode() != Instruction::GetElementPtr)
+ return NULL;
+
+ // First operand points to a global value.
+ if (!isa<GlobalValue>(CE->getOperand(0)))
+ return NULL;
+
+ // Second operand is zero.
+ const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+ if (!CI || !CI->isZero())
+ return NULL;
+
+ // Third operand is offset.
+ if (!isa<ConstantInt>(CE->getOperand(2)))
+ return NULL;
+
+ return CE;
+}
+
/// constructGlobalVariableDIE - Construct global variable DIE.
void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
DIGlobalVariable GV(N);
@@ -1952,16 +2065,22 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
} else {
addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
}
- } else if (Constant *C = GV.getConstant()) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
- if (isUnsignedDIType(GTy))
- addUInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
- CI->getZExtValue());
- else
- addSInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
- CI->getSExtValue());
- }
+ } else if (ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(GV.getConstant()))
+ addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
+ else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+ // GV is a merged global.
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0))));
+ ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue());
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
}
+
return;
}
@@ -2043,25 +2162,12 @@ void DwarfDebug::beginModule(Module *M) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+
// Prime section data.
SectionMap.insert(Asm->getObjFileLowering().getTextSection());
-
- // Print out .file directives to specify files for .loc directives. These are
- // printed out early so that they precede any .loc directives.
- if (Asm->MAI->hasDotLocAndDotFile()) {
- for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) {
- // Remember source id starts at 1.
- std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i);
- // FIXME: don't use sys::path for this! This should not depend on the
- // host.
- sys::Path FullPath(getSourceDirectoryName(Id.first));
- bool AppendOk =
- FullPath.appendComponent(getSourceFileName(Id.second));
- assert(AppendOk && "Could not append filename to directory!");
- AppendOk = false;
- Asm->OutStreamer.EmitDwarfFileDirective(i, FullPath.str());
- }
- }
}
/// endModule - Emit all Dwarf sections that should come after the content.
@@ -2081,8 +2187,7 @@ void DwarfDebug::endModule() {
StringRef FName = SP.getLinkageName();
if (FName.empty())
FName = SP.getName();
- NamedMDNode *NMD =
- M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName)));
+ NamedMDNode *NMD = getFnSpecificMDNode(*(MMI->getModule()), FName);
if (!NMD) continue;
unsigned E = NMD->getNumOperands();
if (!E) continue;
@@ -2152,9 +2257,6 @@ void DwarfDebug::endModule() {
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
- // Emit source line correspondence into a debug line section.
- emitDebugLines();
-
// Emit info into a debug pubnames section.
emitDebugPubNames();
@@ -2242,15 +2344,6 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
}
}
-/// isDbgValueInUndefinedReg - Return true if debug value, encoded by
-/// DBG_VALUE instruction, is in undefined reg.
-static bool isDbgValueInUndefinedReg(const MachineInstr *MI) {
- assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
- if (MI->getOperand(0).isReg() && !MI->getOperand(0).getReg())
- return true;
- return false;
-}
-
/// isDbgValueInDefinedReg - Return true if debug value, encoded by
/// DBG_VALUE instruction, is in a defined reg.
static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
@@ -2275,7 +2368,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
II != IE; ++II) {
const MachineInstr *MInsn = II;
- if (!MInsn->isDebugValue() || isDbgValueInUndefinedReg(MInsn))
+ if (!MInsn->isDebugValue())
continue;
DbgValues.push_back(MInsn);
}
@@ -2297,19 +2390,18 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
ME = DbgValues.end(); MI != ME; ++MI) {
const MDNode *Var =
(*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata();
- if (Var == DV && isDbgValueInDefinedReg(*MI) &&
+ if (Var == DV &&
!PrevMI->isIdenticalTo(*MI))
MultipleValues.push_back(*MI);
PrevMI = *MI;
}
- DbgScope *Scope = findDbgScope(MInsn);
- bool CurFnArg = false;
+ DbgScope *Scope = NULL;
if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
DISubprogram(DV.getContext()).describes(MF->getFunction()))
- CurFnArg = true;
- if (!Scope && CurFnArg)
Scope = CurrentFnDbgScope;
+ else
+ Scope = findDbgScope(MInsn);
// If variable scope is not found then skip this variable.
if (!Scope)
continue;
@@ -2317,8 +2409,6 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
Processed.insert(DV);
DbgVariable *RegVar = new DbgVariable(DV);
Scope->addVariable(RegVar);
- if (!CurFnArg)
- DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn);
if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
DbgVariableToDbgInstMap[AbsVar] = MInsn;
VarToAbstractVarMap[RegVar] = AbsVar;
@@ -2375,10 +2465,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
// Collect info for variables that were optimized out.
const Function *F = MF->getFunction();
- const Module *M = F->getParent();
- if (NamedMDNode *NMD =
- M->getNamedMetadata(Twine("llvm.dbg.lv.",
- getRealLinkageName(F->getName())))) {
+ if (NamedMDNode *NMD = getFnSpecificMDNode(*(F->getParent()), F->getName())) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
if (!DV || !Processed.insert(DV))
@@ -2409,8 +2496,8 @@ const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
return I->second;
}
-/// beginScope - Process beginning of a scope.
-void DwarfDebug::beginScope(const MachineInstr *MI) {
+/// beginInstruction - Process beginning of an instruction.
+void DwarfDebug::beginInstruction(const MachineInstr *MI) {
if (InsnNeedsLabel.count(MI) == 0) {
LabelsBeforeInsn[MI] = PrevLabel;
return;
@@ -2444,8 +2531,8 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
assert (0 && "Instruction is not processed!");
}
-/// endScope - Process end of a scope.
-void DwarfDebug::endScope(const MachineInstr *MI) {
+/// endInstruction - Process end of an instruction.
+void DwarfDebug::endInstruction(const MachineInstr *MI) {
if (InsnsEndScopeSet.count(MI) != 0) {
// Emit a label if this instruction ends a scope.
MCSymbol *Label = MMI->getContext().CreateTempSymbol();
@@ -2624,6 +2711,10 @@ bool DwarfDebug::extractScopeInformation() {
continue;
}
+ // Ignore DBG_VALUE. It does not contribute any instruction in output.
+ if (MInsn->isDebugValue())
+ continue;
+
if (RangeBeginMI) {
// If we have alread seen a beginning of a instruction range and
// current instruction scope does not match scope of first instruction
@@ -2727,12 +2818,37 @@ static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) {
return DebugLoc();
}
+#ifndef NDEBUG
+/// CheckLineNumbers - Count basicblocks whose instructions do not have any
+/// line number information.
+static void CheckLineNumbers(const MachineFunction *MF) {
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ bool FoundLineNo = false;
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MI = II;
+ if (!MI->getDebugLoc().isUnknown()) {
+ FoundLineNo = true;
+ break;
+ }
+ }
+ if (!FoundLineNo && I->size())
+ ++BlocksWithoutLineNo;
+ }
+}
+#endif
+
/// beginFunction - Gather pre-function debug information. Assumes being
/// emitted immediately after the function entry point.
void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (!MMI->hasDebugInfo()) return;
if (!extractScopeInformation()) return;
+#ifndef NDEBUG
+ CheckLineNumbers(MF);
+#endif
+
FunctionBeginSym = Asm->GetTempSymbol("func_begin",
Asm->getFunctionNumber());
// Assumes in correct section after the entry point.
@@ -2775,16 +2891,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata());
if (!DV.Verify()) continue;
// If DBG_VALUE is for a local variable then it needs a label.
- if (DV.getTag() != dwarf::DW_TAG_arg_variable
- && isDbgValueInUndefinedReg(MI) == false)
+ if (DV.getTag() != dwarf::DW_TAG_arg_variable)
InsnNeedsLabel.insert(MI);
// DBG_VALUE for inlined functions argument needs a label.
else if (!DISubprogram(getDISubprogram(DV.getContext())).
describes(MF->getFunction()))
InsnNeedsLabel.insert(MI);
// DBG_VALUE indicating argument location change needs a label.
- else if (isDbgValueInUndefinedReg(MI) == false
- && !ProcessedArgs.insert(DV))
+ else if (!ProcessedArgs.insert(DV))
InsnNeedsLabel.insert(MI);
} else {
// If location is unknown then instruction needs a location only if
@@ -2820,17 +2934,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
SmallPtrSet<const MDNode *, 16> ProcessedVars;
collectVariableInfo(MF, ProcessedVars);
- // Get function line info.
- if (!Lines.empty()) {
- // Get section line info.
- unsigned ID = SectionMap.insert(Asm->getCurrentSection());
- if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
- std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
- // Append the function info to section info.
- SectionLineInfos.insert(SectionLineInfos.end(),
- Lines.begin(), Lines.end());
- }
-
// Construct abstract scopes.
for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
AE = AbstractScopesList.end(); AI != AE; ++AI) {
@@ -2840,10 +2943,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
StringRef FName = SP.getLinkageName();
if (FName.empty())
FName = SP.getName();
- const Module *M = MF->getFunction()->getParent();
- if (NamedMDNode *NMD =
- M->getNamedMetadata(Twine("llvm.dbg.lv.",
- getRealLinkageName(FName)))) {
+ if (NamedMDNode *NMD =
+ getFnSpecificMDNode(*(MF->getFunction()->getParent()), FName)) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
if (!DV || !ProcessedVars.insert(DV))
@@ -2875,7 +2976,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
DbgVariableToFrameIndexMap.clear();
VarToAbstractVarMap.clear();
DbgVariableToDbgInstMap.clear();
- DbgVariableLabelsMap.clear();
DeleteContainerSeconds(DbgScopeMap);
InsnsEndScopeSet.clear();
ConcreteScopes.clear();
@@ -2884,7 +2984,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
AbstractVariables.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
- Lines.clear();
PrevLabel = NULL;
}
@@ -2906,15 +3005,6 @@ bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) {
return true;
}
-/// findVariableLabel - Find MCSymbol for the variable.
-const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) {
- DenseMap<const DbgVariable *, const MCSymbol *>::iterator I
- = DbgVariableLabelsMap.find(V);
- if (I == DbgVariableLabelsMap.end())
- return NULL;
- else return I->second;
-}
-
/// findDbgScope - Find DbgScope for the debug loc attached with an
/// instruction.
DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
@@ -2940,7 +3030,6 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
/// the source line list.
MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
const MDNode *S) {
- StringRef Dir;
StringRef Fn;
unsigned Src = 1;
@@ -2949,25 +3038,26 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
if (Scope.isCompileUnit()) {
DICompileUnit CU(S);
- Dir = CU.getDirectory();
Fn = CU.getFilename();
+ } else if (Scope.isFile()) {
+ DIFile F(S);
+ Fn = F.getFilename();
} else if (Scope.isSubprogram()) {
DISubprogram SP(S);
- Dir = SP.getDirectory();
Fn = SP.getFilename();
} else if (Scope.isLexicalBlock()) {
DILexicalBlock DB(S);
- Dir = DB.getDirectory();
Fn = DB.getFilename();
} else
assert(0 && "Unexpected scope info");
- Src = GetOrCreateSourceID(Dir, Fn);
+ Src = GetOrCreateSourceID(Fn);
}
- MCSymbol *Label = MMI->getContext().CreateTempSymbol();
- Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
+ Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, DWARF2_FLAG_IS_STMT,
+ 0, 0);
+ MCSymbol *Label = MMI->getContext().CreateTempSymbol();
Asm->OutStreamer.EmitLabel(Label);
return Label;
}
@@ -3151,6 +3241,14 @@ void DwarfDebug::emitDIE(DIE *Die) {
Values[i]->EmitValue(Asm, Form);
break;
}
+ case dwarf::DW_AT_accessibility: {
+ if (Asm->isVerbose()) {
+ DIEInteger *V = cast<DIEInteger>(Values[i]);
+ Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue()));
+ }
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
default:
// Emit an attribute using the defined form.
Values[i]->EmitValue(Asm, Form);
@@ -3270,185 +3368,6 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
Asm->EmitInt8(1);
}
-/// emitDebugLines - Emit source line information.
-///
-void DwarfDebug::emitDebugLines() {
- // If the target is using .loc/.file, the assembler will be emitting the
- // .debug_line table automatically.
- if (Asm->MAI->hasDotLocAndDotFile())
- return;
-
- // Minimum line delta, thus ranging from -10..(255-10).
- const int MinLineDelta = -(dwarf::DW_LNS_fixed_advance_pc + 1);
- // Maximum line delta, thus ranging from -10..(255-10).
- const int MaxLineDelta = 255 + MinLineDelta;
-
- // Start the dwarf line section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfLineSection());
-
- // Construct the section header.
- Asm->OutStreamer.AddComment("Length of Source Line Info");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"),
- Asm->GetTempSymbol("line_begin"), 4);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_begin"));
-
- Asm->OutStreamer.AddComment("DWARF version number");
- Asm->EmitInt16(dwarf::DWARF_VERSION);
-
- Asm->OutStreamer.AddComment("Prolog Length");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("line_prolog_end"),
- Asm->GetTempSymbol("line_prolog_begin"), 4);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_begin"));
-
- Asm->OutStreamer.AddComment("Minimum Instruction Length");
- Asm->EmitInt8(1);
- Asm->OutStreamer.AddComment("Default is_stmt_start flag");
- Asm->EmitInt8(1);
- Asm->OutStreamer.AddComment("Line Base Value (Special Opcodes)");
- Asm->EmitInt8(MinLineDelta);
- Asm->OutStreamer.AddComment("Line Range Value (Special Opcodes)");
- Asm->EmitInt8(MaxLineDelta);
- Asm->OutStreamer.AddComment("Special Opcode Base");
- Asm->EmitInt8(-MinLineDelta);
-
- // Line number standard opcode encodings argument count
- Asm->OutStreamer.AddComment("DW_LNS_copy arg count");
- Asm->EmitInt8(0);
- Asm->OutStreamer.AddComment("DW_LNS_advance_pc arg count");
- Asm->EmitInt8(1);
- Asm->OutStreamer.AddComment("DW_LNS_advance_line arg count");
- Asm->EmitInt8(1);
- Asm->OutStreamer.AddComment("DW_LNS_set_file arg count");
- Asm->EmitInt8(1);
- Asm->OutStreamer.AddComment("DW_LNS_set_column arg count");
- Asm->EmitInt8(1);
- Asm->OutStreamer.AddComment("DW_LNS_negate_stmt arg count");
- Asm->EmitInt8(0);
- Asm->OutStreamer.AddComment("DW_LNS_set_basic_block arg count");
- Asm->EmitInt8(0);
- Asm->OutStreamer.AddComment("DW_LNS_const_add_pc arg count");
- Asm->EmitInt8(0);
- Asm->OutStreamer.AddComment("DW_LNS_fixed_advance_pc arg count");
- Asm->EmitInt8(1);
-
- // Emit directories.
- for (unsigned DI = 1, DE = getNumSourceDirectories()+1; DI != DE; ++DI) {
- const std::string &Dir = getSourceDirectoryName(DI);
- if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Directory");
- Asm->OutStreamer.EmitBytes(StringRef(Dir.c_str(), Dir.size()+1), 0);
- }
-
- Asm->OutStreamer.AddComment("End of directories");
- Asm->EmitInt8(0);
-
- // Emit files.
- for (unsigned SI = 1, SE = getNumSourceIds()+1; SI != SE; ++SI) {
- // Remember source id starts at 1.
- std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(SI);
- const std::string &FN = getSourceFileName(Id.second);
- if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Source");
- Asm->OutStreamer.EmitBytes(StringRef(FN.c_str(), FN.size()+1), 0);
-
- Asm->EmitULEB128(Id.first, "Directory #");
- Asm->EmitULEB128(0, "Mod date");
- Asm->EmitULEB128(0, "File size");
- }
-
- Asm->OutStreamer.AddComment("End of files");
- Asm->EmitInt8(0);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_end"));
-
- // A sequence for each text section.
- unsigned SecSrcLinesSize = SectionSourceLines.size();
-
- for (unsigned j = 0; j < SecSrcLinesSize; ++j) {
- // Isolate current sections line info.
- const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j];
-
- // Dwarf assumes we start with first line of first source file.
- unsigned Source = 1;
- unsigned Line = 1;
-
- // Construct rows of the address, source, line, column matrix.
- for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) {
- const SrcLineInfo &LineInfo = LineInfos[i];
- MCSymbol *Label = LineInfo.getLabel();
- if (!Label->isDefined()) continue; // Not emitted, in dead code.
-
- if (Asm->isVerbose()) {
- std::pair<unsigned, unsigned> SrcID =
- getSourceDirectoryAndFileIds(LineInfo.getSourceID());
- Asm->OutStreamer.AddComment(Twine(getSourceDirectoryName(SrcID.first)) +
- "/" +
- Twine(getSourceFileName(SrcID.second)) +
- ":" + Twine(LineInfo.getLine()));
- }
-
- // Define the line address.
- Asm->OutStreamer.AddComment("Extended Op");
- Asm->EmitInt8(0);
- Asm->OutStreamer.AddComment("Op size");
- Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
-
- Asm->OutStreamer.AddComment("DW_LNE_set_address");
- Asm->EmitInt8(dwarf::DW_LNE_set_address);
-
- Asm->OutStreamer.AddComment("Location label");
- Asm->OutStreamer.EmitSymbolValue(Label,
- Asm->getTargetData().getPointerSize(),
- 0/*AddrSpace*/);
-
- // If change of source, then switch to the new source.
- if (Source != LineInfo.getSourceID()) {
- Source = LineInfo.getSourceID();
- Asm->OutStreamer.AddComment("DW_LNS_set_file");
- Asm->EmitInt8(dwarf::DW_LNS_set_file);
- Asm->EmitULEB128(Source, "New Source");
- }
-
- // If change of line.
- if (Line != LineInfo.getLine()) {
- // Determine offset.
- int Offset = LineInfo.getLine() - Line;
- int Delta = Offset - MinLineDelta;
-
- // Update line.
- Line = LineInfo.getLine();
-
- // If delta is small enough and in range...
- if (Delta >= 0 && Delta < (MaxLineDelta - 1)) {
- // ... then use fast opcode.
- Asm->OutStreamer.AddComment("Line Delta");
- Asm->EmitInt8(Delta - MinLineDelta);
- } else {
- // ... otherwise use long hand.
- Asm->OutStreamer.AddComment("DW_LNS_advance_line");
- Asm->EmitInt8(dwarf::DW_LNS_advance_line);
- Asm->EmitSLEB128(Offset, "Line Offset");
- Asm->OutStreamer.AddComment("DW_LNS_copy");
- Asm->EmitInt8(dwarf::DW_LNS_copy);
- }
- } else {
- // Copy the previous row (different address or source)
- Asm->OutStreamer.AddComment("DW_LNS_copy");
- Asm->EmitInt8(dwarf::DW_LNS_copy);
- }
- }
-
- emitEndOfLineMatrix(j + 1);
- }
-
- if (SecSrcLinesSize == 0)
- // Because we're emitting a debug_line section, we still need a line
- // table. The linker and friends expect it to exist. If there's nothing to
- // put into it, emit an empty table.
- emitEndOfLineMatrix(1);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_end"));
-}
-
/// emitCommonDebugFrame - Emit common frame info into a debug frame section.
///
void DwarfDebug::emitCommonDebugFrame() {
@@ -3456,8 +3375,8 @@ void DwarfDebug::emitCommonDebugFrame() {
return;
int stackGrowth = Asm->getTargetData().getPointerSize();
- if (Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
- TargetFrameInfo::StackGrowsDown)
+ if (Asm->TM.getFrameLowering()->getStackGrowthDirection() ==
+ TargetFrameLowering::StackGrowsDown)
stackGrowth *= -1;
// Start the dwarf frame section.
@@ -3480,10 +3399,11 @@ void DwarfDebug::emitCommonDebugFrame() {
Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
Asm->OutStreamer.AddComment("CIE RA Column");
const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false));
std::vector<MachineMove> Moves;
- RI->getInitialFrameState(Moves);
+ TFI->getInitialFrameState(Moves);
Asm->EmitFrameMoves(Moves, 0, false);
@@ -3667,6 +3587,14 @@ void DwarfDebug::emitDebugLoc() {
if (DotDebugLocEntries.empty())
return;
+ for (SmallVector<DotDebugLocEntry, 4>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ I != E; ++I) {
+ DotDebugLocEntry &Entry = *I;
+ if (I + 1 != DotDebugLocEntries.end())
+ Entry.Merge(I+1);
+ }
+
// Start the dwarf loc section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfLocSection());
@@ -3676,7 +3604,8 @@ void DwarfDebug::emitDebugLoc() {
for (SmallVector<DotDebugLocEntry, 4>::iterator
I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
I != E; ++I, ++index) {
- DotDebugLocEntry Entry = *I;
+ DotDebugLocEntry &Entry = *I;
+ if (Entry.isMerged()) continue;
if (Entry.isEmpty()) {
Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index f0ff3bc..7df0510 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -23,6 +23,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DebugLoc.h"
namespace llvm {
@@ -51,6 +52,8 @@ class DIType;
class DINameSpace;
class DISubrange;
class DICompositeType;
+class DITemplateTypeParameter;
+class DITemplateValueParameter;
//===----------------------------------------------------------------------===//
/// SrcLineInfo - This class is used to record source line correspondence.
@@ -71,6 +74,28 @@ public:
MCSymbol *getLabel() const { return Label; }
};
+/// DotDebugLocEntry - This struct describes location entries emitted in
+/// .debug_loc section.
+typedef struct DotDebugLocEntry {
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+ MachineLocation Loc;
+ bool Merged;
+ DotDebugLocEntry() : Begin(0), End(0), Merged(false) {}
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L)
+ : Begin(B), End(E), Loc(L), Merged(false) {}
+ /// Empty entries are also used as a trigger to emit temp label. Such
+ /// labels are referenced is used to find debug_loc offset for a given DIE.
+ bool isEmpty() { return Begin == 0 && End == 0; }
+ bool isMerged() { return Merged; }
+ void Merge(DotDebugLocEntry *Next) {
+ if (!(Begin && Loc == Next->Loc && End == Next->Begin))
+ return;
+ Next->Begin = Begin;
+ Merged = true;
+ }
+} DotDebugLocEntry;
+
class DwarfDebug {
/// Asm - Target of Dwarf emission.
AsmPrinter *Asm;
@@ -93,30 +118,9 @@ class DwarfDebug {
///
std::vector<DIEAbbrev *> Abbreviations;
- /// DirectoryIdMap - Directory name to directory id map.
- ///
- StringMap<unsigned> DirectoryIdMap;
-
- /// DirectoryNames - A list of directory names.
- SmallVector<std::string, 8> DirectoryNames;
-
- /// SourceFileIdMap - Source file name to source file id map.
- ///
- StringMap<unsigned> SourceFileIdMap;
-
- /// SourceFileNames - A list of source file names.
- SmallVector<std::string, 8> SourceFileNames;
-
/// SourceIdMap - Source id map, i.e. pair of directory id and source file
/// id mapped to a unique id.
- DenseMap<std::pair<unsigned, unsigned>, unsigned> SourceIdMap;
-
- /// SourceIds - Reverse map from source id to directory id + file id pair.
- ///
- SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds;
-
- /// Lines - List of source line correspondence.
- std::vector<SrcLineInfo> Lines;
+ StringMap<unsigned> SourceIdMap;
/// DIEBlocks - A list of all the DIEBlocks in use.
std::vector<DIEBlock *> DIEBlocks;
@@ -135,10 +139,6 @@ class DwarfDebug {
///
UniqueVector<const MCSection*> SectionMap;
- /// SectionSourceLines - Tracks line numbers per text section.
- ///
- std::vector<std::vector<SrcLineInfo> > SectionSourceLines;
-
// CurrentFnDbgScope - Top level scope for the current function.
//
DbgScope *CurrentFnDbgScope;
@@ -175,23 +175,6 @@ class DwarfDebug {
/// machine instruction.
DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap;
- /// DbgVariableLabelsMap - Maps DbgVariable to corresponding MCSymbol.
- DenseMap<const DbgVariable *, const MCSymbol *> DbgVariableLabelsMap;
-
- /// DotDebugLocEntry - This struct describes location entries emitted in
- /// .debug_loc section.
- typedef struct DotDebugLocEntry {
- const MCSymbol *Begin;
- const MCSymbol *End;
- MachineLocation Loc;
- DotDebugLocEntry() : Begin(0), End(0) {}
- DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
- MachineLocation &L) : Begin(B), End(E), Loc(L) {}
- /// Empty entries are also used as a trigger to emit temp label. Such
- /// labels are referenced is used to find debug_loc offset for a given DIE.
- bool isEmpty() { return Begin == 0 && End == 0; }
- } DotDebugLocEntry;
-
/// DotDebugLocEntries - Collection of DotDebugLocEntry.
SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
@@ -265,35 +248,10 @@ class DwarfDebug {
DIEInteger *DIEIntegerOne;
private:
-
- /// getSourceDirectoryAndFileIds - Return the directory and file ids that
- /// maps to the source id. Source id starts at 1.
- std::pair<unsigned, unsigned>
- getSourceDirectoryAndFileIds(unsigned SId) const {
- return SourceIds[SId-1];
- }
-
- /// getNumSourceDirectories - Return the number of source directories in the
- /// debug info.
- unsigned getNumSourceDirectories() const {
- return DirectoryNames.size();
- }
-
- /// getSourceDirectoryName - Return the name of the directory corresponding
- /// to the id.
- const std::string &getSourceDirectoryName(unsigned Id) const {
- return DirectoryNames[Id - 1];
- }
-
- /// getSourceFileName - Return the name of the source file corresponding
- /// to the id.
- const std::string &getSourceFileName(unsigned Id) const {
- return SourceFileNames[Id - 1];
- }
/// getNumSourceIds - Return the number of unique source ids.
unsigned getNumSourceIds() const {
- return SourceIds.size();
+ return SourceIdMap.size();
}
/// assignAbbrevNumber - Define a unique number for the abbreviation.
@@ -349,13 +307,14 @@ private:
const MachineLocation &Location);
/// addRegisterAddress - Add register location entry in variable DIE.
- bool addRegisterAddress(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
+ bool addRegisterAddress(DIE *Die, const MachineOperand &MO);
/// addConstantValue - Add constant value entry in variable DIE.
- bool addConstantValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
+ bool addConstantValue(DIE *Die, const MachineOperand &MO);
+ bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned);
/// addConstantFPValue - Add constant value entry in variable DIE.
- bool addConstantFPValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
+ bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
/// addComplexAddress - Start with the address based on the location provided,
/// and generate the DWARF information necessary to find the actual variable
@@ -393,6 +352,14 @@ private:
/// given DIType.
DIE *getOrCreateTypeDIE(DIType Ty);
+ /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+ /// for the given DITemplateTypeParameter.
+ DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+
+ /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
+ /// for the given DITemplateValueParameter.
+ DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+
void addPubTypes(DISubprogram SP);
/// constructTypeDIE - Construct basic type die from DIBasicType.
@@ -421,7 +388,7 @@ private:
DIE *createMemberDIE(DIDerivedType DT);
/// createSubprogramDIE - Create new DIE using SP.
- DIE *createSubprogramDIE(DISubprogram SP, bool MakeDecl = false);
+ DIE *createSubprogramDIE(DISubprogram SP);
/// getOrCreateDbgScope - Create DbgScope for the scope.
DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
@@ -481,10 +448,6 @@ private:
///
void emitEndOfLineMatrix(unsigned SectionEnd);
- /// emitDebugLines - Emit source line information.
- ///
- void emitDebugLines();
-
/// emitCommonDebugFrame - Emit common frame info into a debug frame section.
///
void emitCommonDebugFrame();
@@ -543,9 +506,8 @@ private:
/// GetOrCreateSourceID - Look up the source id with the given directory and
/// source file names. If none currently exists, create a new id and insert it
- /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
- /// maps as well.
- unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
+ /// in the SourceIds map.
+ unsigned GetOrCreateSourceID(StringRef FullName);
/// constructCompileUnit - Create new CompileUnit for the given
/// metadata node with tag DW_TAG_compile_unit.
@@ -565,12 +527,6 @@ private:
/// the source line list.
MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope);
- /// getSourceLineCount - Return the number of source lines in the debug
- /// info.
- unsigned getSourceLineCount() const {
- return Lines.size();
- }
-
/// recordVariableFrameIndex - Record a variable's index.
void recordVariableFrameIndex(const DbgVariable *V, int Index);
@@ -578,9 +534,6 @@ private:
/// is found. Update FI to hold value of the index.
bool findVariableFrameIndex(const DbgVariable *V, int *FI);
- /// findVariableLabel - Find MCSymbol for the variable.
- const MCSymbol *findVariableLabel(const DbgVariable *V);
-
/// findDbgScope - Find DbgScope for the debug loc attached with an
/// instruction.
DbgScope *findDbgScope(const MachineInstr *MI);
@@ -630,11 +583,11 @@ public:
/// getLabelAfterInsn - Return Label immediately following the instruction.
const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
- /// beginScope - Process beginning of a scope.
- void beginScope(const MachineInstr *MI);
+ /// beginInstruction - Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI);
- /// endScope - Prcess end of a scope.
- void endScope(const MachineInstr *MI);
+ /// endInstruction - Prcess end of an instruction.
+ void endInstruction(const MachineInstr *MI);
};
} // End of namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 86a3688..967a278 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -26,7 +26,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -39,238 +39,10 @@
using namespace llvm;
DwarfException::DwarfException(AsmPrinter *A)
- : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false),
- shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
+ : Asm(A), MMI(Asm->MMI) {}
DwarfException::~DwarfException() {}
-/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
-/// is shared among many Frame Description Entries. There is at least one CIE
-/// in every non-empty .debug_frame section.
-void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
- // Size and sign of stack growth.
- int stackGrowth = Asm->getTargetData().getPointerSize();
- if (Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
- TargetFrameInfo::StackGrowsDown)
- stackGrowth *= -1;
-
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
- // Begin eh frame section.
- Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
-
- MCSymbol *EHFrameSym;
- if (TLOF.isFunctionEHFrameSymbolPrivate())
- EHFrameSym = Asm->GetTempSymbol("EH_frame", Index);
- else
- EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") +
- Twine(Index));
- Asm->OutStreamer.EmitLabel(EHFrameSym);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index));
-
- // Define base labels.
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index));
-
- // Define the eh frame length.
- Asm->OutStreamer.AddComment("Length of Common Information Entry");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index),
- Asm->GetTempSymbol("eh_frame_common_begin", Index),
- 4);
-
- // EH frame header.
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index));
- Asm->OutStreamer.AddComment("CIE Identifier Tag");
- Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
- Asm->OutStreamer.AddComment("DW_CIE_VERSION");
- Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/);
-
- // The personality presence indicates that language specific information will
- // show up in the eh frame. Find out how we are supposed to lower the
- // personality function reference:
-
- unsigned LSDAEncoding = TLOF.getLSDAEncoding();
- unsigned FDEEncoding = TLOF.getFDEEncoding();
- unsigned PerEncoding = TLOF.getPersonalityEncoding();
-
- char Augmentation[6] = { 0 };
- unsigned AugmentationSize = 0;
- char *APtr = Augmentation + 1;
-
- if (PersonalityFn) {
- // There is a personality function.
- *APtr++ = 'P';
- AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding);
- }
-
- if (UsesLSDA[Index]) {
- // An LSDA pointer is in the FDE augmentation.
- *APtr++ = 'L';
- ++AugmentationSize;
- }
-
- if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
- // A non-default pointer encoding for the FDE.
- *APtr++ = 'R';
- ++AugmentationSize;
- }
-
- if (APtr != Augmentation + 1)
- Augmentation[0] = 'z';
-
- Asm->OutStreamer.AddComment("CIE Augmentation");
- Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0);
-
- // Round out reader.
- Asm->EmitULEB128(1, "CIE Code Alignment Factor");
- Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
- Asm->OutStreamer.AddComment("CIE Return Address Column");
-
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
- Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
-
- if (Augmentation[0]) {
- Asm->EmitULEB128(AugmentationSize, "Augmentation Size");
-
- // If there is a personality, we need to indicate the function's location.
- if (PersonalityFn) {
- Asm->EmitEncodingByte(PerEncoding, "Personality");
- Asm->OutStreamer.AddComment("Personality");
- Asm->EmitReference(PersonalityFn, PerEncoding);
- }
- if (UsesLSDA[Index])
- Asm->EmitEncodingByte(LSDAEncoding, "LSDA");
- if (FDEEncoding != dwarf::DW_EH_PE_absptr)
- Asm->EmitEncodingByte(FDEEncoding, "FDE");
- }
-
- // Indicate locations of general callee saved registers in frame.
- std::vector<MachineMove> Moves;
- RI->getInitialFrameState(Moves);
- Asm->EmitFrameMoves(Moves, 0, true);
-
- // On Darwin the linker honors the alignment of eh_frame, which means it must
- // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get
- // holes which confuse readers of eh_frame.
- Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
-}
-
-/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
-void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
- assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
- "Should not emit 'available externally' functions at all");
-
- const Function *TheFunc = EHFrameInfo.function;
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
- unsigned LSDAEncoding = TLOF.getLSDAEncoding();
- unsigned FDEEncoding = TLOF.getFDEEncoding();
-
- Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
-
- // Externally visible entry into the functions eh frame info. If the
- // corresponding function is static, this should not be externally visible.
- if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal())
- Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global);
-
- // If corresponding function is weak definition, this should be too.
- if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective())
- Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
- MCSA_WeakDefinition);
-
- // If corresponding function is hidden, this should be too.
- if (TheFunc->hasHiddenVisibility())
- if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr())
- Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
- HiddenAttr);
-
- // If there are no calls then you can't unwind. This may mean we can omit the
- // EH Frame, but some environments do not handle weak absolute symbols. If
- // UnwindTablesMandatory is set we cannot do this optimization; the unwind
- // info is to be available for non-EH uses.
- if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory &&
- (!TheFunc->isWeakForLinker() ||
- !Asm->MAI->getWeakDefDirective() ||
- TLOF.getSupportsWeakOmittedEHFrame())) {
- Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym,
- MCConstantExpr::Create(0, Asm->OutContext));
- // This name has no connection to the function, so it might get
- // dead-stripped when the function is not, erroneously. Prohibit
- // dead-stripping unconditionally.
- if (Asm->MAI->hasNoDeadStrip())
- Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
- MCSA_NoDeadStrip);
- } else {
- Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym);
-
- // EH frame header.
- Asm->OutStreamer.AddComment("Length of Frame Information Entry");
- Asm->EmitLabelDifference(
- Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number),
- Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin",
- EHFrameInfo.Number));
-
- Asm->OutStreamer.AddComment("FDE CIE offset");
- Asm->EmitLabelDifference(
- Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number),
- Asm->GetTempSymbol("eh_frame_common",
- EHFrameInfo.PersonalityIndex), 4);
-
- MCSymbol *EHFuncBeginSym =
- Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number);
-
- Asm->OutStreamer.AddComment("FDE initial location");
- Asm->EmitReference(EHFuncBeginSym, FDEEncoding);
-
- Asm->OutStreamer.AddComment("FDE address range");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end",
- EHFrameInfo.Number),
- EHFuncBeginSym,
- Asm->GetSizeOfEncodedValue(FDEEncoding));
-
- // If there is a personality and landing pads then point to the language
- // specific data area in the exception table.
- if (MMI->getPersonalities()[0] != NULL) {
- unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding);
-
- Asm->EmitULEB128(Size, "Augmentation size");
- Asm->OutStreamer.AddComment("Language Specific Data Area");
- if (EHFrameInfo.hasLandingPads)
- Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number),
- LSDAEncoding);
- else
- Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
-
- } else {
- Asm->EmitULEB128(0, "Augmentation size");
- }
-
- // Indicate locations of function specific callee saved registers in frame.
- Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true);
-
- // On Darwin the linker honors the alignment of eh_frame, which means it
- // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you
- // get holes which confuse readers of eh_frame.
- Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
- EHFrameInfo.Number));
-
- // If the function is marked used, this table should be also. We cannot
- // make the mark unconditional in this case, since retaining the table also
- // retains the function in this case, and there is code around that depends
- // on unused functions (calling undefined externals) being dead-stripped to
- // link correctly. Yes, there really is.
- if (MMI->isUsedFunction(EHFrameInfo.function))
- if (Asm->MAI->hasNoDeadStrip())
- Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
- MCSA_NoDeadStrip);
- }
- Asm->OutStreamer.AddBlankLine();
-}
-
/// SharedTypeIds - How many leading type ids two landing pads have in common.
unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
const LandingPadInfo *R) {
@@ -422,7 +194,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(I);
if (!MO.isGlobal()) continue;
-
+
const Function *F = dyn_cast<Function>(MO.getGlobal());
if (F == 0) continue;
@@ -430,7 +202,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
// Be conservative. If we have more than one function operand for this
// call, then we can't make the assumption that it's the callee and
// not a parameter to the call.
- //
+ //
// FIXME: Determine if there's a way to say that `F' is the callee or
// parameter.
MarkedNoUnwind = false;
@@ -497,8 +269,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// instruction between the previous try-range and this one may throw,
// create a call-site entry with no landing pad for the region between the
// try-ranges.
- if (SawPotentiallyThrowing &&
- Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+ if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
CallSites.push_back(Site);
PreviousIsInvoke = false;
@@ -520,8 +291,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
};
// Try to merge with the previous call-site. SJLJ doesn't do this
- if (PreviousIsInvoke &&
- Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+ if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) {
CallSiteEntry &Prev = CallSites.back();
if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
// Extend the range of the previous entry.
@@ -531,7 +301,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
}
// Otherwise, create a new call-site.
- if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf)
+ if (Asm->MAI->isExceptionHandlingDwarf())
CallSites.push_back(Site);
else {
// SjLj EH must maintain the call sites in the order assigned
@@ -549,8 +319,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// If some instruction between the previous try-range and the end of the
// function may throw, create a call-site entry with no landing pad for the
// region following the try-range.
- if (SawPotentiallyThrowing &&
- Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+ if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
CallSiteEntry Site = { LastLabel, 0, 0, 0 };
CallSites.push_back(Site);
}
@@ -620,7 +389,7 @@ void DwarfException::EmitExceptionTable() {
// Call sites.
bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
-
+
unsigned CallSiteTableLength;
if (IsSJLJ)
CallSiteTableLength = 0;
@@ -628,7 +397,7 @@ void DwarfException::EmitExceptionTable() {
unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4
unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
- CallSiteTableLength =
+ CallSiteTableLength =
CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
}
@@ -656,15 +425,15 @@ void DwarfException::EmitExceptionTable() {
// mode, this reference will require a relocation by the dynamic linker.
//
// Because of this, we have a couple of options:
- //
+ //
// 1) If we are in -static mode, we can always use an absolute reference
// from the LSDA, because the static linker will resolve it.
- //
+ //
// 2) Otherwise, if the LSDA section is writable, we can output the direct
// reference to the typeinfo and allow the dynamic linker to relocate
// it. Since it is in a writable section, the dynamic linker won't
// have a problem.
- //
+ //
// 3) Finally, if we're in PIC mode and the LDSA section isn't writable,
// we need to use some form of indirection. For example, on Darwin,
// we can output a statically-relocatable reference to a dyld stub. The
@@ -682,11 +451,14 @@ void DwarfException::EmitExceptionTable() {
}
// Begin the exception table.
- Asm->OutStreamer.SwitchSection(LSDASection);
+ // Sometimes we want not to emit the data into separate section (e.g. ARM
+ // EHABI). In this case LSDASection will be NULL.
+ if (LSDASection)
+ Asm->OutStreamer.SwitchSection(LSDASection);
Asm->EmitAlignment(2);
// Emit the LSDA.
- MCSymbol *GCCETSym =
+ MCSymbol *GCCETSym =
Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
Twine(Asm->getFunctionNumber()));
Asm->OutStreamer.EmitLabel(GCCETSym);
@@ -764,7 +536,7 @@ void DwarfException::EmitExceptionTable() {
}
} else {
// DWARF Exception handling
- assert(Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf);
+ assert(Asm->MAI->isExceptionHandlingDwarf());
// The call-site table is a list of all call sites that may throw an
// exception (including C++ 'throw' statements) in the procedure
@@ -793,23 +565,23 @@ void DwarfException::EmitExceptionTable() {
for (SmallVectorImpl<CallSiteEntry>::const_iterator
I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
const CallSiteEntry &S = *I;
-
+
MCSymbol *EHFuncBeginSym =
Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
-
+
MCSymbol *BeginLabel = S.BeginLabel;
if (BeginLabel == 0)
BeginLabel = EHFuncBeginSym;
MCSymbol *EndLabel = S.EndLabel;
if (EndLabel == 0)
EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
-
+
// Offset of the call site relative to the previous call site, counted in
// number of 16-byte bundles. The first call site is counted relative to
// the start of the procedure fragment.
Asm->OutStreamer.AddComment("Region start");
Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
-
+
Asm->OutStreamer.AddComment("Region length");
Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
@@ -834,7 +606,7 @@ void DwarfException::EmitExceptionTable() {
Asm->OutStreamer.AddComment("-- Action Record Table --");
Asm->OutStreamer.AddBlankLine();
}
-
+
for (SmallVectorImpl<ActionEntry>::const_iterator
I = Actions.begin(), E = Actions.end(); I != E; ++I) {
const ActionEntry &Action = *I;
@@ -888,73 +660,17 @@ void DwarfException::EmitExceptionTable() {
/// EndModule - Emit all exception information that should come after the
/// content.
void DwarfException::EndModule() {
- if (Asm->MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf)
- return;
-
- if (!shouldEmitMovesModule && !shouldEmitTableModule)
- return;
-
- const std::vector<const Function*> &Personalities = MMI->getPersonalities();
-
- for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
- EmitCIE(Personalities[I], I);
-
- for (std::vector<FunctionEHFrameInfo>::iterator
- I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
- EmitFDE(*I);
+ assert(0 && "Should be implemented");
}
/// BeginFunction - Gather pre-function exception information. Assumes it's
/// being emitted immediately after the function entry point.
void DwarfException::BeginFunction(const MachineFunction *MF) {
- shouldEmitTable = shouldEmitMoves = false;
-
- // If any landing pads survive, we need an EH table.
- shouldEmitTable = !MMI->getLandingPads().empty();
-
- // See if we need frame move info.
- shouldEmitMoves =
- !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
-
- if (shouldEmitMoves || shouldEmitTable)
- // Assumes in correct section after the entry point.
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
- Asm->getFunctionNumber()));
-
- shouldEmitTableModule |= shouldEmitTable;
- shouldEmitMovesModule |= shouldEmitMoves;
+ assert(0 && "Should be implemented");
}
/// EndFunction - Gather and emit post-function exception information.
///
void DwarfException::EndFunction() {
- if (!shouldEmitMoves && !shouldEmitTable) return;
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
- Asm->getFunctionNumber()));
-
- // Record if this personality index uses a landing pad.
- bool HasLandingPad = !MMI->getLandingPads().empty();
- UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad;
-
- // Map all labels and get rid of any dead landing pads.
- MMI->TidyLandingPads();
-
- if (HasLandingPad)
- EmitExceptionTable();
-
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
- MCSymbol *FunctionEHSym =
- Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh",
- TLOF.isFunctionEHFrameSymbolPrivate());
-
- // Save EH frame information
- EHFrames.
- push_back(FunctionEHFrameInfo(FunctionEHSym,
- Asm->getFunctionNumber(),
- MMI->getPersonalityIndex(),
- Asm->MF->getFrameInfo()->adjustsStack(),
- !MMI->getLandingPads().empty(),
- MMI->getFrameMoves(),
- Asm->MF->getFunction()));
+ assert(0 && "Should be implemented");
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index bc311e6..a172e53 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -35,60 +35,13 @@ class AsmPrinter;
/// DwarfException - Emits Dwarf exception handling directives.
///
class DwarfException {
+protected:
/// Asm - Target of Dwarf emission.
AsmPrinter *Asm;
/// MMI - Collected machine module information.
MachineModuleInfo *MMI;
- struct FunctionEHFrameInfo {
- MCSymbol *FunctionEHSym; // L_foo.eh
- unsigned Number;
- unsigned PersonalityIndex;
- bool adjustsStack;
- bool hasLandingPads;
- std::vector<MachineMove> Moves;
- const Function *function;
-
- FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P,
- bool hC, bool hL,
- const std::vector<MachineMove> &M,
- const Function *f):
- FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
- adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
- };
-
- std::vector<FunctionEHFrameInfo> EHFrames;
-
- /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
- /// uses an LSDA. If so, then we need to encode that information in the CIE's
- /// augmentation.
- DenseMap<unsigned, bool> UsesLSDA;
-
- /// shouldEmitTable - Per-function flag to indicate if EH tables should
- /// be emitted.
- bool shouldEmitTable;
-
- /// shouldEmitMoves - Per-function flag to indicate if frame moves info
- /// should be emitted.
- bool shouldEmitMoves;
-
- /// shouldEmitTableModule - Per-module flag to indicate if EH tables
- /// should be emitted.
- bool shouldEmitTableModule;
-
- /// shouldEmitFrameModule - Per-module flag to indicate if frame moves
- /// should be emitted.
- bool shouldEmitMovesModule;
-
- /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
- /// that is shared among many Frame Description Entries. There is at least
- /// one CIE in every non-empty .debug_frame section.
- void EmitCIE(const Function *Personality, unsigned Index);
-
- /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
- void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
-
/// EmitExceptionTable - Emit landing pads and actions.
///
/// The general organization of the table is complex, but the basic concepts
@@ -172,18 +125,116 @@ public:
// Main entry points.
//
DwarfException(AsmPrinter *A);
- ~DwarfException();
+ virtual ~DwarfException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class DwarfCFIException : public DwarfException {
+ /// shouldEmitTable - Per-function flag to indicate if EH tables should
+ /// be emitted.
+ bool shouldEmitTable;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+ /// should be emitted.
+ bool shouldEmitTableModule;
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfCFIException(AsmPrinter *A);
+ virtual ~DwarfCFIException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class DwarfTableException : public DwarfException {
+ /// shouldEmitTable - Per-function flag to indicate if EH tables should
+ /// be emitted.
+ bool shouldEmitTable;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+ /// should be emitted.
+ bool shouldEmitTableModule;
+
+ /// shouldEmitMovesModule - Per-module flag to indicate if frame moves
+ /// should be emitted.
+ bool shouldEmitMovesModule;
+
+ struct FunctionEHFrameInfo {
+ MCSymbol *FunctionEHSym; // L_foo.eh
+ unsigned Number;
+ unsigned PersonalityIndex;
+ bool adjustsStack;
+ bool hasLandingPads;
+ std::vector<MachineMove> Moves;
+ const Function *function;
+
+ FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P,
+ bool hC, bool hL,
+ const std::vector<MachineMove> &M,
+ const Function *f):
+ FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
+ adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
+ };
+
+ std::vector<FunctionEHFrameInfo> EHFrames;
+
+ /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
+ /// uses an LSDA. If so, then we need to encode that information in the CIE's
+ /// augmentation.
+ DenseMap<unsigned, bool> UsesLSDA;
+
+ /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
+ /// that is shared among many Frame Description Entries. There is at least
+ /// one CIE in every non-empty .debug_frame section.
+ void EmitCIE(const Function *Personality, unsigned Index);
+
+ /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+ void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfTableException(AsmPrinter *A);
+ virtual ~DwarfTableException();
/// EndModule - Emit all exception information that should come after the
/// content.
- void EndModule();
+ virtual void EndModule();
/// BeginFunction - Gather pre-function exception information. Assumes being
/// emitted immediately after the function entry point.
- void BeginFunction(const MachineFunction *MF);
+ virtual void BeginFunction(const MachineFunction *MF);
/// EndFunction - Gather and emit post-function exception information.
- void EndFunction();
+ virtual void EndFunction();
};
} // End of namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfTableException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
new file mode 100644
index 0000000..7519011
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
@@ -0,0 +1,349 @@
+//===-- CodeGen/AsmPrinter/DwarfTableException.cpp - Dwarf Exception Impl --==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+// The implementation emits all the necessary tables "by hands".
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfTableException::DwarfTableException(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitTable(false), shouldEmitMoves(false),
+ shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
+
+DwarfTableException::~DwarfTableException() {}
+
+/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
+/// is shared among many Frame Description Entries. There is at least one CIE
+/// in every non-empty .debug_frame section.
+void DwarfTableException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
+ // Size and sign of stack growth.
+ int stackGrowth = Asm->getTargetData().getPointerSize();
+ if (Asm->TM.getFrameLowering()->getStackGrowthDirection() ==
+ TargetFrameLowering::StackGrowsDown)
+ stackGrowth *= -1;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ // Begin eh frame section.
+ Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+ MCSymbol *EHFrameSym;
+ if (TLOF.isFunctionEHFrameSymbolPrivate())
+ EHFrameSym = Asm->GetTempSymbol("EH_frame", Index);
+ else
+ EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") +
+ Twine(Index));
+ Asm->OutStreamer.EmitLabel(EHFrameSym);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index));
+
+ // Define base labels.
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index));
+
+ // Define the eh frame length.
+ Asm->OutStreamer.AddComment("Length of Common Information Entry");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index),
+ Asm->GetTempSymbol("eh_frame_common_begin", Index),
+ 4);
+
+ // EH frame header.
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index));
+ Asm->OutStreamer.AddComment("CIE Identifier Tag");
+ Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ Asm->OutStreamer.AddComment("DW_CIE_VERSION");
+ Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/);
+
+ // The personality presence indicates that language specific information will
+ // show up in the eh frame. Find out how we are supposed to lower the
+ // personality function reference:
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ unsigned FDEEncoding = TLOF.getFDEEncoding();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+ char Augmentation[6] = { 0 };
+ unsigned AugmentationSize = 0;
+ char *APtr = Augmentation + 1;
+
+ if (PersonalityFn) {
+ // There is a personality function.
+ *APtr++ = 'P';
+ AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding);
+ }
+
+ if (UsesLSDA[Index]) {
+ // An LSDA pointer is in the FDE augmentation.
+ *APtr++ = 'L';
+ ++AugmentationSize;
+ }
+
+ if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
+ // A non-default pointer encoding for the FDE.
+ *APtr++ = 'R';
+ ++AugmentationSize;
+ }
+
+ if (APtr != Augmentation + 1)
+ Augmentation[0] = 'z';
+
+ Asm->OutStreamer.AddComment("CIE Augmentation");
+ Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0);
+
+ // Round out reader.
+ Asm->EmitULEB128(1, "CIE Code Alignment Factor");
+ Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
+ Asm->OutStreamer.AddComment("CIE Return Address Column");
+
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
+
+ if (Augmentation[0]) {
+ Asm->EmitULEB128(AugmentationSize, "Augmentation Size");
+
+ // If there is a personality, we need to indicate the function's location.
+ if (PersonalityFn) {
+ Asm->EmitEncodingByte(PerEncoding, "Personality");
+ Asm->OutStreamer.AddComment("Personality");
+ Asm->EmitReference(PersonalityFn, PerEncoding);
+ }
+ if (UsesLSDA[Index])
+ Asm->EmitEncodingByte(LSDAEncoding, "LSDA");
+ if (FDEEncoding != dwarf::DW_EH_PE_absptr)
+ Asm->EmitEncodingByte(FDEEncoding, "FDE");
+ }
+
+ // Indicate locations of general callee saved registers in frame.
+ std::vector<MachineMove> Moves;
+ TFI->getInitialFrameState(Moves);
+ Asm->EmitFrameMoves(Moves, 0, true);
+
+ // On Darwin the linker honors the alignment of eh_frame, which means it must
+ // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get
+ // holes which confuse readers of eh_frame.
+ Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
+}
+
+/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+void DwarfTableException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
+ assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
+ "Should not emit 'available externally' functions at all");
+
+ const Function *TheFunc = EHFrameInfo.function;
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ unsigned FDEEncoding = TLOF.getFDEEncoding();
+
+ Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+ // Externally visible entry into the functions eh frame info. If the
+ // corresponding function is static, this should not be externally visible.
+ if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global);
+
+ // If corresponding function is weak definition, this should be too.
+ if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+ MCSA_WeakDefinition);
+
+ // If corresponding function is hidden, this should be too.
+ if (TheFunc->hasHiddenVisibility())
+ if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+ HiddenAttr);
+
+ // If there are no calls then you can't unwind. This may mean we can omit the
+ // EH Frame, but some environments do not handle weak absolute symbols. If
+ // UnwindTablesMandatory is set we cannot do this optimization; the unwind
+ // info is to be available for non-EH uses.
+ if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory &&
+ (!TheFunc->isWeakForLinker() ||
+ !Asm->MAI->getWeakDefDirective() ||
+ TLOF.getSupportsWeakOmittedEHFrame())) {
+ Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym,
+ MCConstantExpr::Create(0, Asm->OutContext));
+ // This name has no connection to the function, so it might get
+ // dead-stripped when the function is not, erroneously. Prohibit
+ // dead-stripping unconditionally.
+ if (Asm->MAI->hasNoDeadStrip())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+ MCSA_NoDeadStrip);
+ } else {
+ Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym);
+
+ // EH frame header.
+ Asm->OutStreamer.AddComment("Length of Frame Information Entry");
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number),
+ Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin",
+ EHFrameInfo.Number));
+
+ Asm->OutStreamer.AddComment("FDE CIE offset");
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number),
+ Asm->GetTempSymbol("eh_frame_common",
+ EHFrameInfo.PersonalityIndex), 4);
+
+ MCSymbol *EHFuncBeginSym =
+ Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number);
+
+ Asm->OutStreamer.AddComment("FDE initial location");
+ Asm->EmitReference(EHFuncBeginSym, FDEEncoding);
+
+ Asm->OutStreamer.AddComment("FDE address range");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end",
+ EHFrameInfo.Number),
+ EHFuncBeginSym,
+ Asm->GetSizeOfEncodedValue(FDEEncoding));
+
+ // If there is a personality and landing pads then point to the language
+ // specific data area in the exception table.
+ if (MMI->getPersonalities()[0] != NULL) {
+ unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding);
+
+ Asm->EmitULEB128(Size, "Augmentation size");
+ Asm->OutStreamer.AddComment("Language Specific Data Area");
+ if (EHFrameInfo.hasLandingPads)
+ Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number),
+ LSDAEncoding);
+ else
+ Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
+
+ } else {
+ Asm->EmitULEB128(0, "Augmentation size");
+ }
+
+ // Indicate locations of function specific callee saved registers in frame.
+ Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true);
+
+ // On Darwin the linker honors the alignment of eh_frame, which means it
+ // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you
+ // get holes which confuse readers of eh_frame.
+ Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
+ EHFrameInfo.Number));
+
+ // If the function is marked used, this table should be also. We cannot
+ // make the mark unconditional in this case, since retaining the table also
+ // retains the function in this case, and there is code around that depends
+ // on unused functions (calling undefined externals) being dead-stripped to
+ // link correctly. Yes, there really is.
+ if (MMI->isUsedFunction(EHFrameInfo.function))
+ if (Asm->MAI->hasNoDeadStrip())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+ MCSA_NoDeadStrip);
+ }
+ Asm->OutStreamer.AddBlankLine();
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfTableException::EndModule() {
+ if (!Asm->MAI->isExceptionHandlingDwarf())
+ return;
+
+ if (!shouldEmitMovesModule && !shouldEmitTableModule)
+ return;
+
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+
+ for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
+ EmitCIE(Personalities[I], I);
+
+ for (std::vector<FunctionEHFrameInfo>::iterator
+ I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
+ EmitFDE(*I);
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfTableException::BeginFunction(const MachineFunction *MF) {
+ shouldEmitTable = shouldEmitMoves = false;
+
+ // If any landing pads survive, we need an EH table.
+ shouldEmitTable = !MMI->getLandingPads().empty();
+
+ // See if we need frame move info.
+ shouldEmitMoves =
+ !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
+
+ if (shouldEmitMoves || shouldEmitTable)
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+
+ shouldEmitTableModule |= shouldEmitTable;
+ shouldEmitMovesModule |= shouldEmitMoves;
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfTableException::EndFunction() {
+ if (!shouldEmitMoves && !shouldEmitTable) return;
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Record if this personality index uses a landing pad.
+ bool HasLandingPad = !MMI->getLandingPads().empty();
+ UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad;
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ if (HasLandingPad)
+ EmitExceptionTable();
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ MCSymbol *FunctionEHSym =
+ Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh",
+ TLOF.isFunctionEHFrameSymbolPrivate());
+
+ // Save EH frame information
+ EHFrames.
+ push_back(FunctionEHFrameInfo(FunctionEHSym,
+ Asm->getFunctionNumber(),
+ MMI->getPersonalityIndex(),
+ Asm->MF->getFrameInfo()->adjustsStack(),
+ !MMI->getLandingPads().empty(),
+ MMI->getFrameMoves(),
+ Asm->MF->getFunction()));
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index c8a63cf..1153817 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include <cctype>
using namespace llvm;
namespace {
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 1b7e08a..76bb3d1 100644
--- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -25,8 +25,12 @@
using namespace llvm;
char CalculateSpillWeights::ID = 0;
-INITIALIZE_PASS(CalculateSpillWeights, "calcspillweights",
- "Calculate spill weights", false, false);
+INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false)
void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
au.addRequired<LiveIntervals>();
@@ -170,8 +174,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
totalWeight *= 0.5F;
}
- li.weight = totalWeight;
- lis_.normalizeSpillWeight(li);
+ li.weight = normalizeSpillWeight(totalWeight, li.getSize());
}
void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
@@ -218,7 +221,7 @@ void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
if (rc == orc)
return;
- DEBUG(dbgs() << "Inflating " << orc->getName() << ":%reg" << reg << " to "
- << rc->getName() <<".\n");
+ DEBUG(dbgs() << "Inflating " << orc->getName() << ':' << PrintReg(reg)
+ << " to " << rc->getName() <<".\n");
mri.setRegClass(reg, rc);
}
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index 62ad817..2ad80b4 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -34,8 +34,8 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm,
// HandleByVal - Allocate a stack slot large enough to pass an argument by
// value. The size and alignment information of the argument is encoded in its
// parameter attribute.
-void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
- EVT LocVT, CCValAssign::LocInfo LocInfo,
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
int MinSize, int MinAlign,
ISD::ArgFlagsTy ArgFlags) {
unsigned Align = ArgFlags.getByValAlign();
@@ -51,11 +51,9 @@ void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void CCState::MarkAllocated(unsigned Reg) {
- UsedRegs[Reg/32] |= 1 << (Reg&31);
-
- if (const unsigned *RegAliases = TRI.getAliasSet(Reg))
- for (; (Reg = *RegAliases); ++RegAliases)
- UsedRegs[Reg/32] |= 1 << (Reg&31);
+ for (const unsigned *Alias = TRI.getOverlaps(Reg);
+ unsigned Reg = *Alias; ++Alias)
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
}
/// AnalyzeFormalArguments - Analyze an array of argument values,
@@ -66,12 +64,12 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
unsigned NumArgs = Ins.size();
for (unsigned i = 0; i != NumArgs; ++i) {
- EVT ArgVT = Ins[i].VT;
+ MVT ArgVT = Ins[i].VT;
ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Formal argument #" << i << " has unhandled type "
- << ArgVT.getEVTString();
+ << EVT(ArgVT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -84,7 +82,7 @@ bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
// Determine which register each value should be copied into.
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- EVT VT = Outs[i].VT;
+ MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
return false;
@@ -98,12 +96,12 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
// Determine which register each value should be copied into.
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- EVT VT = Outs[i].VT;
+ MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Return operand #" << i << " has unhandled type "
- << VT.getEVTString();
+ << EVT(VT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -116,12 +114,12 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
unsigned NumOps = Outs.size();
for (unsigned i = 0; i != NumOps; ++i) {
- EVT ArgVT = Outs[i].VT;
+ MVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Call operand #" << i << " has unhandled type "
- << ArgVT.getEVTString();
+ << EVT(ArgVT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -130,17 +128,17 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
/// AnalyzeCallOperands - Same as above except it takes vectors of types
/// and argument flags.
-void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
CCAssignFn Fn) {
unsigned NumOps = ArgVTs.size();
for (unsigned i = 0; i != NumOps; ++i) {
- EVT ArgVT = ArgVTs[i];
+ MVT ArgVT = ArgVTs[i];
ISD::ArgFlagsTy ArgFlags = Flags[i];
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Call operand #" << i << " has unhandled type "
- << ArgVT.getEVTString();
+ << EVT(ArgVT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -152,12 +150,12 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
CCAssignFn Fn) {
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- EVT VT = Ins[i].VT;
+ MVT VT = Ins[i].VT;
ISD::ArgFlagsTy Flags = Ins[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
#ifndef NDEBUG
dbgs() << "Call result #" << i << " has unhandled type "
- << VT.getEVTString();
+ << EVT(VT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -166,11 +164,11 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
/// AnalyzeCallResult - Same as above except it's specialized for calls which
/// produce a single value.
-void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) {
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
#ifndef NDEBUG
dbgs() << "Call result has unhandled type "
- << VT.getEVTString();
+ << EVT(VT).getEVTString();
#endif
llvm_unreachable(0);
}
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
new file mode 100644
index 0000000..515e6f9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -0,0 +1,61 @@
+//===-- CodeGen.cpp -------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the
+// CodeGen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void llvm::initializeCodeGen(PassRegistry &Registry) {
+ initializeCalculateSpillWeightsPass(Registry);
+ initializeDeadMachineInstructionElimPass(Registry);
+ initializeGCModuleInfoPass(Registry);
+ initializeIfConverterPass(Registry);
+ initializeLiveDebugVariablesPass(Registry);
+ initializeLiveIntervalsPass(Registry);
+ initializeLiveStacksPass(Registry);
+ initializeLiveVariablesPass(Registry);
+ initializeMachineCSEPass(Registry);
+ initializeMachineDominatorTreePass(Registry);
+ initializeMachineLICMPass(Registry);
+ initializeMachineLoopInfoPass(Registry);
+ initializeMachineModuleInfoPass(Registry);
+ initializeMachineSinkingPass(Registry);
+ initializeMachineVerifierPassPass(Registry);
+ initializeOptimizePHIsPass(Registry);
+ initializePHIEliminationPass(Registry);
+ initializePeepholeOptimizerPass(Registry);
+ initializePreAllocSplittingPass(Registry);
+ initializeProcessImplicitDefsPass(Registry);
+ initializePEIPass(Registry);
+ initializeRALinScanPass(Registry);
+ initializeRegisterCoalescerAnalysisGroup(Registry);
+ initializeRenderMachineFunctionPass(Registry);
+ initializeSimpleRegisterCoalescingPass(Registry);
+ initializeSlotIndexesPass(Registry);
+ initializeLoopSplitterPass(Registry);
+ initializeStackProtectorPass(Registry);
+ initializeStackSlotColoringPass(Registry);
+ initializeStrongPHIEliminationPass(Registry);
+ initializeTwoAddressInstructionPassPass(Registry);
+ initializeUnreachableBlockElimPass(Registry);
+ initializeUnreachableMachineBlockElimPass(Registry);
+ initializeVirtRegMapPass(Registry);
+ initializeLowerIntrinsicsPass(Registry);
+}
+
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
+ initializeCodeGen(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 335d2d8..f79598d 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -130,21 +130,25 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
return;
assert(Count < InsertPosIndex && "Instruction index out of expected range!");
- // Any register which was defined within the previous scheduling region
- // may have been rescheduled and its lifetime may overlap with registers
- // in ways not reflected in our current liveness state. For each such
- // register, adjust the liveness state to be conservatively correct.
- for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg)
- if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
- assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
-
- // Mark this register to be non-renamable.
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] != ~0u) {
+ // If Reg is currently live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled).
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = Count;
+ } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+ // Any register which was defined within the previous scheduling region
+ // may have been rescheduled and its lifetime may overlap with registers
+ // in ways not reflected in our current liveness state. For each such
+ // register, adjust the liveness state to be conservatively correct.
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
// Move the def index to the end of the previous region, to reflect
// that the def could theoretically have been scheduled at the end.
DefIndices[Reg] = InsertPosIndex;
}
+ }
PrescanInstruction(MI);
ScanInstruction(MI, Count);
@@ -177,7 +181,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
// that have special allocation requirements. Also assume all registers
// used in a call must not be changed (ABI).
// FIXME: The issue with predicated instruction is more complex. We are being
- // conservatively here because the kill markers cannot be trusted after
+ // conservative here because the kill markers cannot be trusted after
// if-conversion:
// %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
// ...
@@ -321,8 +325,62 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
}
}
+// Check all machine operands that reference the antidependent register and must
+// be replaced by NewReg. Return true if any of their parent instructions may
+// clobber the new register.
+//
+// Note: AntiDepReg may be referenced by a two-address instruction such that
+// it's use operand is tied to a def operand. We guard against the case in which
+// the two-address instruction also defines NewReg, as may happen with
+// pre/postincrement loads. In this case, both the use and def operands are in
+// RegRefs because the def is inserted by PrescanInstruction and not erased
+// during ScanInstruction. So checking for an instructions with definitions of
+// both NewReg and AntiDepReg covers it.
+bool
+CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg)
+{
+ for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
+ MachineOperand *RefOper = I->second;
+
+ // Don't allow the instruction defining AntiDepReg to earlyclobber its
+ // operands, in case they may be assigned to NewReg. In this case antidep
+ // breaking must fail, but it's too rare to bother optimizing.
+ if (RefOper->isDef() && RefOper->isEarlyClobber())
+ return true;
+
+ // Handle cases in which this instructions defines NewReg.
+ MachineInstr *MI = RefOper->getParent();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &CheckOper = MI->getOperand(i);
+
+ if (!CheckOper.isReg() || !CheckOper.isDef() ||
+ CheckOper.getReg() != NewReg)
+ continue;
+
+ // Don't allow the instruction to define NewReg and AntiDepReg.
+ // When AntiDepReg is renamed it will be an illegal op.
+ if (RefOper->isDef())
+ return true;
+
+ // Don't allow an instruction using AntiDepReg to be earlyclobbered by
+ // NewReg
+ if (CheckOper.isEarlyClobber())
+ return true;
+
+ // Don't allow inline asm to define NewReg at all. Who know what it's
+ // doing with it.
+ if (MI->isInlineAsm())
+ return true;
+ }
+ }
+ return false;
+}
+
unsigned
-CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
+CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
unsigned AntiDepReg,
unsigned LastNewReg,
const TargetRegisterClass *RC)
@@ -338,10 +396,10 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
// an anti-dependence with this AntiDepReg, because that would
// re-introduce that anti-dependence.
if (NewReg == LastNewReg) continue;
- // If the instruction already has a def of the NewReg, it's not suitable.
- // For example, Instruction with multiple definitions can result in this
- // condition.
- if (MI->modifiesRegister(NewReg, TRI)) continue;
+ // If any instructions that define AntiDepReg also define the NewReg, it's
+ // not suitable. For example, Instruction with multiple definitions can
+ // result in this condition.
+ if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
// If NewReg is dead and NewReg's most recent def is not before
// AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
@@ -548,7 +606,11 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// TODO: Instead of picking the first free register, consider which might
// be the best.
if (AntiDepReg != 0) {
- if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg,
+ std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+ std::multimap<unsigned, MachineOperand *>::iterator>
+ Range = RegRefs.equal_range(AntiDepReg);
+ if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
+ AntiDepReg,
LastNewReg[AntiDepReg],
RC)) {
DEBUG(dbgs() << "Breaking anti-dependence edge on "
@@ -558,9 +620,6 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// Update the references to the old register to refer to the new
// register.
- std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
- std::multimap<unsigned, MachineOperand *>::iterator>
- Range = RegRefs.equal_range(AntiDepReg);
for (std::multimap<unsigned, MachineOperand *>::iterator
Q = Range.first, QE = Range.second; Q != QE; ++Q) {
Q->second->setReg(NewReg);
@@ -580,7 +639,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
}
// We just went back in time and modified history; the
- // liveness information for the anti-depenence reg is now
+ // liveness information for the anti-dependence reg is now
// inconsistent. Set the state as if it were dead.
Classes[NewReg] = Classes[AntiDepReg];
DefIndices[NewReg] = DefIndices[AntiDepReg];
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
index 0ed7c35..0daaef2 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -48,8 +48,10 @@ class TargetRegisterInfo;
/// pointer.
std::vector<const TargetRegisterClass*> Classes;
- /// RegRegs - Map registers to all their references within a live range.
+ /// RegRefs - Map registers to all their references within a live range.
std::multimap<unsigned, MachineOperand *> RegRefs;
+ typedef std::multimap<unsigned, MachineOperand *>::const_iterator
+ RegRefIter;
/// KillIndices - The index of the most recent kill (proceding bottom-up),
/// or ~0u if the register is not live.
@@ -90,10 +92,14 @@ class TargetRegisterInfo;
private:
void PrescanInstruction(MachineInstr *MI);
void ScanInstruction(MachineInstr *MI, unsigned Count);
- unsigned findSuitableFreeRegister(MachineInstr *MI,
+ bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg);
+ unsigned findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
unsigned AntiDepReg,
unsigned LastNewReg,
- const TargetRegisterClass *);
+ const TargetRegisterClass *RC);
};
}
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 318d922..fdc1d91 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -36,7 +36,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- DeadMachineInstructionElim() : MachineFunctionPass(ID) {}
+ DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+ initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+ }
private:
bool isDead(const MachineInstr *MI) const;
@@ -45,13 +47,19 @@ namespace {
char DeadMachineInstructionElim::ID = 0;
INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
- "Remove dead machine instructions", false, false);
+ "Remove dead machine instructions", false, false)
FunctionPass *llvm::createDeadMachineInstructionElimPass() {
return new DeadMachineInstructionElim();
}
bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI->isInlineAsm())
+ return false;
+
// Don't delete instructions with side effects.
bool SawStore = false;
if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
@@ -151,7 +159,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef()) {
unsigned Reg = MO.getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
LivePhysRegs.reset(Reg);
// Check the subreg set, not the alias set, because a def
// of a super-register may still be partially live after
@@ -168,7 +176,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isUse()) {
unsigned Reg = MO.getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
LivePhysRegs.set(Reg);
for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
*AliasSet; ++AliasSet)
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index 550fd3e..0ebb5b0 100644
--- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -43,7 +43,7 @@ namespace {
// The eh.selector intrinsic.
Function *SelectorIntrinsic;
- // _Unwind_Resume_or_Rethrow call.
+ // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call.
Constant *URoR;
// The EH language-specific catch-all type.
@@ -82,11 +82,11 @@ namespace {
/// FindAllURoRInvokes - Find all URoR invokes in the function.
void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
- /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow"
- /// calls. The "unwind" part of these invokes jump to a landing pad within
- /// the current function. This is a candidate to merge the selector
- /// associated with the URoR invoke with the one from the URoR's landing
- /// pad.
+ /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+ /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to
+ /// a landing pad within the current function. This is a candidate to merge
+ /// the selector associated with the URoR invoke with the one from the
+ /// URoR's landing pad.
bool HandleURoRInvokes();
/// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
@@ -100,7 +100,9 @@ namespace {
DwarfEHPrepare(const TargetMachine *tm) :
FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
- URoR(0), EHCatchAllValue(0), RewindFunction(0) {}
+ URoR(0), EHCatchAllValue(0), RewindFunction(0) {
+ initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &Fn);
@@ -224,10 +226,11 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
return Changed;
}
-/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The
-/// "unwind" part of these invokes jump to a landing pad within the current
-/// function. This is a candidate to merge the selector associated with the URoR
-/// invoke with the one from the URoR's landing pad.
+/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a
+/// landing pad within the current function. This is a candidate to merge the
+/// selector associated with the URoR invoke with the one from the URoR's
+/// landing pad.
bool DwarfEHPrepare::HandleURoRInvokes() {
if (!EHCatchAllValue) {
EHCatchAllValue =
@@ -247,7 +250,10 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
if (!URoR) {
URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
- if (!URoR) return CleanupSelectors(CatchAllSels);
+ if (!URoR) {
+ URoR = F->getParent()->getFunction("_Unwind_SjLj_Resume");
+ if (!URoR) return CleanupSelectors(CatchAllSels);
+ }
}
SmallPtrSet<InvokeInst*, 32> URoRInvokes;
diff --git a/contrib/llvm/lib/CodeGen/ELF.h b/contrib/llvm/lib/CodeGen/ELF.h
index fb884c9..e08feeb 100644
--- a/contrib/llvm/lib/CodeGen/ELF.h
+++ b/contrib/llvm/lib/CodeGen/ELF.h
@@ -23,7 +23,7 @@
#include "llvm/CodeGen/BinaryObject.h"
#include "llvm/CodeGen/MachineRelocation.h"
#include "llvm/Support/ELF.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
namespace llvm {
class GlobalValue;
diff --git a/contrib/llvm/lib/CodeGen/ELFWriter.cpp b/contrib/llvm/lib/CodeGen/ELFWriter.cpp
index d14728d..0fd1e8e 100644
--- a/contrib/llvm/lib/CodeGen/ELFWriter.cpp
+++ b/contrib/llvm/lib/CodeGen/ELFWriter.cpp
@@ -45,6 +45,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetELFWriterInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -64,7 +65,7 @@ char ELFWriter::ID = 0;
ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
: MachineFunctionPass(ID), O(o), TM(tm),
- OutContext(*new MCContext(*TM.getMCAsmInfo())),
+ OutContext(*new MCContext(*TM.getMCAsmInfo(), new TargetAsmInfo(tm))),
TLOF(TM.getTargetLowering()->getObjFileLowering()),
is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
isLittleEndian(TM.getTargetData()->isLittleEndian()),
@@ -327,6 +328,18 @@ void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
}
}
+/// HasCommonSymbols - True if this section holds common symbols, this is
+/// indicated on the ELF object file by a symbol with SHN_COMMON section
+/// header index.
+static bool HasCommonSymbols(const MCSectionELF &S) {
+ // FIXME: this is wrong, a common symbol can be in .data for example.
+ if (StringRef(S.getSectionName()).startswith(".gnu.linkonce."))
+ return true;
+
+ return false;
+}
+
+
// EmitGlobal - Choose the right section for global and emit it
void ELFWriter::EmitGlobal(const GlobalValue *GV) {
@@ -363,7 +376,7 @@ void ELFWriter::EmitGlobal(const GlobalValue *GV) {
unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
GblSym->Size = Size;
- if (S->HasCommonSymbols()) { // Symbol must go to a common section
+ if (HasCommonSymbols(*S)) { // Symbol must go to a common section
GblSym->SectionIdx = ELF::SHN_COMMON;
// A new linkonce section is created for each global in the
diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
new file mode 100644
index 0000000..aed8bc9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -0,0 +1,86 @@
+//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the EdgeBundles analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ViewEdgeBundles("view-edge-bundles", cl::Hidden,
+ cl::desc("Pop up a window to show edge bundle graphs"));
+
+char EdgeBundles::ID = 0;
+
+INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
+ /* cfg = */true, /* analysis = */ true)
+
+char &llvm::EdgeBundlesID = EdgeBundles::ID;
+
+void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ EC.clear();
+ EC.grow(2 * MF->size());
+
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+ ++I) {
+ const MachineBasicBlock &MBB = *I;
+ unsigned OutE = 2 * MBB.getNumber() + 1;
+ // Join the outgoing bundle with the ingoing bundles of all successors.
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ EC.join(OutE, 2 * (*SI)->getNumber());
+ }
+ EC.compress();
+ if (ViewEdgeBundles)
+ view();
+ return false;
+}
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+ ViewGraph(*this, "EdgeBundles");
+}
+
+/// Specialize WriteGraph, the standard implementation won't work.
+raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
+ bool ShortNames,
+ const std::string &Title) {
+ const MachineFunction *MF = G.getMachineFunction();
+
+ O << "digraph {\n";
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ unsigned BB = I->getNumber();
+ O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
+ << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
+ << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+ for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(),
+ SE = I->succ_end(); SI != SE; ++SI)
+ O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
+ << "\" [ color=lightgray ]\n";
+ }
+ O << "}\n";
+ return O;
+}
+
+
diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
new file mode 100644
index 0000000..b5ec303
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
@@ -0,0 +1,82 @@
+//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand Psuedo-instructions produced by ISel. These are usually to allow
+// the expansion to contain control flow, such as a conditional move
+// implemented with a conditional branch and a phi, or an atomic operation
+// implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+ class ExpandISelPseudos : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandISelPseudos() : MachineFunctionPass(ID) {}
+
+ private:
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "Expand ISel Pseudo-instructions";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+} // end anonymous namespace
+
+char ExpandISelPseudos::ID = 0;
+INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+ "Expand CodeGen Pseudo-instructions", false, false)
+
+FunctionPass *llvm::createExpandISelPseudosPass() {
+ return new ExpandISelPseudos();
+}
+
+bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ const TargetLowering *TLI = MF.getTarget().getTargetLowering();
+
+ // Iterate through each instruction in the function, looking for pseudos.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE; ) {
+ MachineInstr *MI = MBBI++;
+
+ // If MI is a pseudo, expand it.
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.usesCustomInsertionHook()) {
+ Changed = true;
+ MachineBasicBlock *NewMBB =
+ TLI->EmitInstrWithCustomInserter(MI, MBB);
+ // The expansion may involve new basic blocks.
+ if (NewMBB != MBB) {
+ MBB = NewMBB;
+ I = NewMBB;
+ MBBI = NewMBB->begin();
+ MBBE = NewMBB->end();
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
index 0f6e882..d757cf4 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -30,7 +30,6 @@ namespace {
raw_ostream &OS;
public:
- Printer() : FunctionPass(ID), OS(errs()) {}
explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
@@ -56,7 +55,7 @@ namespace {
}
INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
- "Create Garbage Collector Module Metadata", false, false);
+ "Create Garbage Collector Module Metadata", false, false)
// -----------------------------------------------------------------------------
@@ -70,7 +69,9 @@ GCFunctionInfo::~GCFunctionInfo() {}
char GCModuleInfo::ID = 0;
GCModuleInfo::GCModuleInfo()
- : ImmutablePass(ID) {}
+ : ImmutablePass(ID) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+}
GCModuleInfo::~GCModuleInfo() {
clear();
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
index 719fa19..766c6ee 100644
--- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -19,11 +19,12 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -123,6 +124,11 @@ GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
// -----------------------------------------------------------------------------
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
FunctionPass *llvm::createGCLoweringPass() {
return new LowerIntrinsics();
}
@@ -130,7 +136,9 @@ FunctionPass *llvm::createGCLoweringPass() {
char LowerIntrinsics::ID = 0;
LowerIntrinsics::LowerIntrinsics()
- : FunctionPass(ID) {}
+ : FunctionPass(ID) {
+ initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+ }
const char *LowerIntrinsics::getPassName() const {
return "Lower Garbage Collection Instructions";
@@ -139,6 +147,7 @@ const char *LowerIntrinsics::getPassName() const {
void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<DominatorTree>();
}
/// doInitialization - If this module uses the GC intrinsics, find them now.
@@ -249,9 +258,16 @@ bool LowerIntrinsics::runOnFunction(Function &F) {
if (NeedsDefaultLoweringPass(S))
MadeChange |= PerformDefaultLowering(F, S);
- if (NeedsCustomLoweringPass(S))
+ bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
+ if (UseCustomLoweringPass)
MadeChange |= S.performCustomLowering(F);
-
+
+ // Custom lowering may modify the CFG, so dominators must be recomputed.
+ if (UseCustomLoweringPass) {
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+ DT->DT->recalculate(F);
+ }
+
return MadeChange;
}
@@ -345,13 +361,15 @@ void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
MachineBasicBlock::iterator RAI = CI;
++RAI;
- if (FI->getStrategy().needsSafePoint(GC::PreCall))
- FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI,
- CI->getDebugLoc()));
+ if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+ MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+ }
- if (FI->getStrategy().needsSafePoint(GC::PostCall))
- FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI,
- CI->getDebugLoc()));
+ if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+ MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+ }
}
void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
@@ -364,12 +382,12 @@ void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
}
void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
- const TargetRegisterInfo *TRI = TM->getRegisterInfo();
- assert(TRI && "TargetRegisterInfo not available!");
+ const TargetFrameLowering *TFI = TM->getFrameLowering();
+ assert(TFI && "TargetRegisterInfo not available!");
for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
RE = FI->roots_end(); RI != RE; ++RI)
- RI->StackOffset = TRI->getFrameIndexOffset(MF, RI->Num);
+ RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
}
bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index 0ea30d7..db53b04 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -17,7 +17,9 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -26,6 +28,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
@@ -91,6 +94,8 @@ namespace {
/// ClobbersPred - True if BB could modify predicates (e.g. has
/// cmp, call, etc.)
/// NonPredSize - Number of non-predicated instructions.
+ /// ExtraCost - Extra cost for multi-cycle instructions.
+ /// ExtraCost2 - Some instructions are slower when predicated
/// BB - Corresponding MachineBasicBlock.
/// TrueBB / FalseBB- See AnalyzeBranch().
/// BrCond - Conditions for end of block conditional branches.
@@ -106,6 +111,8 @@ namespace {
bool CannotBeCopied : 1;
bool ClobbersPred : 1;
unsigned NonPredSize;
+ unsigned ExtraCost;
+ unsigned ExtraCost2;
MachineBasicBlock *BB;
MachineBasicBlock *TrueBB;
MachineBasicBlock *FalseBB;
@@ -115,7 +122,7 @@ namespace {
IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
HasFallThrough(false), IsUnpredicable(false),
CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
- BB(0), TrueBB(0), FalseBB(0) {}
+ ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
};
/// IfcvtToken - Record information about pending if-conversions to attempt:
@@ -150,20 +157,31 @@ namespace {
const TargetLowering *TLI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const InstrItineraryData *InstrItins;
+ const MachineLoopInfo *MLI;
bool MadeChange;
int FnNum;
public:
static char ID;
- IfConverter() : MachineFunctionPass(ID), FnNum(-1) {}
+ IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
+ initializeIfConverterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const { return "If Converter"; }
private:
bool ReverseBranchCondition(BBInfo &BBI);
- bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const;
+ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ float Prediction, float Confidence) const;
bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
- bool FalseBranch, unsigned &Dups) const;
+ bool FalseBranch, unsigned &Dups,
+ float Prediction, float Confidence) const;
bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
unsigned &Dups1, unsigned &Dups2) const;
void ScanInstructions(BBInfo &BBI);
@@ -188,14 +206,21 @@ namespace {
bool IgnoreBr = false);
void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
- bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size) const {
- return Size > 0 && TII->isProfitableToIfCvt(BB, Size);
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+ unsigned Cycle, unsigned Extra,
+ float Prediction, float Confidence) const {
+ return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+ Prediction, Confidence);
}
- bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize,
- MachineBasicBlock &FBB, unsigned FSize) const {
- return TSize > 0 && FSize > 0 &&
- TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize);
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+ unsigned TCycle, unsigned TExtra,
+ MachineBasicBlock &FBB,
+ unsigned FCycle, unsigned FExtra,
+ float Prediction, float Confidence) const {
+ return TCycle > 0 && FCycle > 0 &&
+ TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
+ Prediction, Confidence);
}
// blockAlwaysFallThrough - Block ends without a terminator.
@@ -230,7 +255,9 @@ namespace {
char IfConverter::ID = 0;
}
-INITIALIZE_PASS(IfConverter, "if-converter", "If Converter", false, false);
+INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
@@ -238,6 +265,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TLI = MF.getTarget().getTargetLowering();
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ InstrItins = MF.getTarget().getInstrItineraryData();
if (!TII) return false;
// Tail merge tend to expose more if-conversion opportunities.
@@ -431,7 +460,8 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
/// predecessor) forms a valid simple shape for ifcvt. It also returns the
/// number of instructions that the ifcvt would need to duplicate if performed
/// in Dups.
-bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ float Prediction, float Confidence) const {
Dups = 0;
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
return false;
@@ -441,7 +471,8 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
if (TrueBBI.BB->pred_size() > 1) {
if (TrueBBI.CannotBeCopied ||
- !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize))
+ !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
+ Prediction, Confidence))
return false;
Dups = TrueBBI.NonPredSize;
}
@@ -456,7 +487,8 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
/// returns the number of instructions that the ifcvt would need to duplicate
/// if performed in 'Dups'.
bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
- bool FalseBranch, unsigned &Dups) const {
+ bool FalseBranch, unsigned &Dups,
+ float Prediction, float Confidence) const {
Dups = 0;
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
return false;
@@ -478,7 +510,8 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
++Size;
}
}
- if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size))
+ if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size,
+ Prediction, Confidence))
return false;
Dups = Size;
}
@@ -493,18 +526,6 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
return TExit && TExit == FalseBBI.BB;
}
-static
-MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB,
- const TargetInstrInfo *TII) {
- MachineBasicBlock::iterator I = BB->end();
- while (I != BB->begin()) {
- --I;
- if (!I->getDesc().isBranch())
- break;
- }
- return I;
-}
-
/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
/// with their common predecessor) forms a valid diamond shape for ifcvt.
bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
@@ -533,64 +554,70 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
(TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
return false;
- MachineBasicBlock::iterator TI = TrueBBI.BB->begin();
- MachineBasicBlock::iterator FI = FalseBBI.BB->begin();
+ // Count duplicate instructions at the beginning of the true and false blocks.
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
- // Skip dbg_value instructions
- while (TI != TIE && TI->isDebugValue())
- ++TI;
- while (FI != FIE && FI->isDebugValue())
- ++FI;
- while (TI != TIE && FI != FIE) {
+ while (TIB != TIE && FIB != FIE) {
// Skip dbg_value instructions. These do not count.
- if (TI->isDebugValue()) {
- while (TI != TIE && TI->isDebugValue())
- ++TI;
- if (TI == TIE)
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
break;
}
- if (FI->isDebugValue()) {
- while (FI != FIE && FI->isDebugValue())
- ++FI;
- if (FI == FIE)
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
break;
}
- if (!TI->isIdenticalTo(FI))
+ if (!TIB->isIdenticalTo(FIB))
break;
++Dups1;
- ++TI;
- ++FI;
+ ++TIB;
+ ++FIB;
}
- TI = firstNonBranchInst(TrueBBI.BB, TII);
- FI = firstNonBranchInst(FalseBBI.BB, TII);
- MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
- MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
- // Skip dbg_value instructions at end of the bb's.
- while (TI != TIB && TI->isDebugValue())
- --TI;
- while (FI != FIB && FI->isDebugValue())
- --FI;
- while (TI != TIB && FI != FIB) {
+ // Now, in preparation for counting duplicate instructions at the ends of the
+ // blocks, move the end iterators up past any branch instructions.
+ while (TIE != TIB) {
+ --TIE;
+ if (!TIE->getDesc().isBranch())
+ break;
+ }
+ while (FIE != FIB) {
+ --FIE;
+ if (!FIE->getDesc().isBranch())
+ break;
+ }
+
+ // If Dups1 includes all of a block, then don't count duplicate
+ // instructions at the end of the blocks.
+ if (TIB == TIE || FIB == FIE)
+ return true;
+
+ // Count duplicate instructions at the ends of the blocks.
+ while (TIE != TIB && FIE != FIB) {
// Skip dbg_value instructions. These do not count.
- if (TI->isDebugValue()) {
- while (TI != TIB && TI->isDebugValue())
- --TI;
- if (TI == TIB)
+ if (TIE->isDebugValue()) {
+ while (TIE != TIB && TIE->isDebugValue())
+ --TIE;
+ if (TIE == TIB)
break;
}
- if (FI->isDebugValue()) {
- while (FI != FIB && FI->isDebugValue())
- --FI;
- if (FI == FIB)
+ if (FIE->isDebugValue()) {
+ while (FIE != FIB && FIE->isDebugValue())
+ --FIE;
+ if (FIE == FIB)
break;
}
- if (!TI->isIdenticalTo(FI))
+ if (!TIE->isIdenticalTo(FIE))
break;
++Dups2;
- --TI;
- --FI;
+ --TIE;
+ --FIE;
}
return true;
@@ -627,6 +654,8 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
// Then scan all the instructions.
BBI.NonPredSize = 0;
+ BBI.ExtraCost = 0;
+ BBI.ExtraCost2 = 0;
BBI.ClobbersPred = false;
for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
I != E; ++I) {
@@ -641,9 +670,15 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch();
if (!isCondBr) {
- if (!isPredicated)
+ if (!isPredicated) {
BBI.NonPredSize++;
- else if (!AlreadyPredicated) {
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+ &ExtraPredCost);
+ if (NumCycles > 1)
+ BBI.ExtraCost += NumCycles-1;
+ BBI.ExtraCost2 += ExtraPredCost;
+ } else if (!AlreadyPredicated) {
// FIXME: This instruction is already predicated before the
// if-conversion pass. It's probably something like a conditional move.
// Mark this block unpredicable for now.
@@ -765,9 +800,35 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
bool TNeedSub = TrueBBI.Predicate.size() > 0;
bool FNeedSub = FalseBBI.Predicate.size() > 0;
bool Enqueued = false;
+
+ // Try to predict the branch, using loop info to guide us.
+ // General heuristics are:
+ // - backedge -> 90% taken
+ // - early exit -> 20% taken
+ // - branch predictor confidence -> 90%
+ float Prediction = 0.5f;
+ float Confidence = 0.9f;
+ MachineLoop *Loop = MLI->getLoopFor(BB);
+ if (Loop) {
+ if (TrueBBI.BB == Loop->getHeader())
+ Prediction = 0.9f;
+ else if (FalseBBI.BB == Loop->getHeader())
+ Prediction = 0.1f;
+
+ MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB);
+ MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB);
+ if (!TrueLoop || TrueLoop->getParentLoop() == Loop)
+ Prediction = 0.2f;
+ else if (!FalseLoop || FalseLoop->getParentLoop() == Loop)
+ Prediction = 0.8f;
+ }
+
if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2),
- *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2)) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+ TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+ *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+ FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+ Prediction, Confidence) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
// Diamond:
@@ -783,8 +844,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
Enqueued = true;
}
- if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction, Confidence) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
// Triangle:
// EBB
@@ -797,15 +859,17 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
Enqueued = true;
}
- if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction, Confidence) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
Enqueued = true;
}
- if (ValidSimple(TrueBBI, Dups) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
+ if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction, Confidence) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
// Simple (split, no rejoin):
// EBB
@@ -820,22 +884,30 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
if (CanRevCond) {
// Try the other path...
- if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) &&
- MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
+ if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+ 1.0-Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
FeasibilityAnalysis(FalseBBI, RevCond, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
Enqueued = true;
}
- if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) &&
- MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
+ if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+ 1.0-Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
Enqueued = true;
}
- if (ValidSimple(FalseBBI, Dups) &&
- MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
+ if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
Enqueued = true;
@@ -1365,6 +1437,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
MachineInstr *MI = MF.CloneMachineInstr(I);
ToBBI.BB->insert(ToBBI.BB->end(), MI);
ToBBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+ if (NumCycles > 1)
+ ToBBI.ExtraCost += NumCycles-1;
+ ToBBI.ExtraCost2 += ExtraPredCost;
if (!TII->isPredicated(I) && !MI->isDebugValue()) {
if (!TII->PredicateInstruction(MI, Cond)) {
@@ -1438,7 +1515,11 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
FromBBI.Predicate.clear();
ToBBI.NonPredSize += FromBBI.NonPredSize;
+ ToBBI.ExtraCost += FromBBI.ExtraCost;
+ ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
FromBBI.NonPredSize = 0;
+ FromBBI.ExtraCost = 0;
+ FromBBI.ExtraCost2 = 0;
ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
ToBBI.HasFallThrough = FromBBI.HasFallThrough;
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index b965bfd..a1bd972 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -12,28 +12,34 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "spiller"
+#define DEBUG_TYPE "regalloc"
#include "Spiller.h"
-#include "SplitKit.h"
+#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<bool>
+VerifySpills("verify-spills", cl::desc("Verify after each spill/split"));
+
namespace {
class InlineSpiller : public Spiller {
MachineFunctionPass &pass_;
MachineFunction &mf_;
LiveIntervals &lis_;
- MachineLoopInfo &loops_;
+ LiveStacks &lss_;
+ AliasAnalysis *aa_;
VirtRegMap &vrm_;
MachineFrameInfo &mfi_;
MachineRegisterInfo &mri_;
@@ -41,19 +47,12 @@ class InlineSpiller : public Spiller {
const TargetRegisterInfo &tri_;
const BitVector reserved_;
- SplitAnalysis splitAnalysis_;
-
// Variables that are valid during spill(), but used by multiple methods.
- LiveInterval *li_;
- SmallVectorImpl<LiveInterval*> *newIntervals_;
+ LiveRangeEdit *edit_;
const TargetRegisterClass *rc_;
int stackSlot_;
- const SmallVectorImpl<LiveInterval*> *spillIs_;
- // Values of the current interval that can potentially remat.
- SmallPtrSet<VNInfo*, 8> reMattable_;
-
- // Values in reMattable_ that failed to remat at some point.
+ // Values that failed to remat at some point.
SmallPtrSet<VNInfo*, 8> usedValues_;
~InlineSpiller() {}
@@ -65,30 +64,29 @@ public:
: pass_(pass),
mf_(mf),
lis_(pass.getAnalysis<LiveIntervals>()),
- loops_(pass.getAnalysis<MachineLoopInfo>()),
+ lss_(pass.getAnalysis<LiveStacks>()),
+ aa_(&pass.getAnalysis<AliasAnalysis>()),
vrm_(vrm),
mfi_(*mf.getFrameInfo()),
mri_(mf.getRegInfo()),
tii_(*mf.getTarget().getInstrInfo()),
tri_(*mf.getTarget().getRegisterInfo()),
- reserved_(tri_.getReservedRegs(mf_)),
- splitAnalysis_(mf, lis_, loops_) {}
+ reserved_(tri_.getReservedRegs(mf_)) {}
void spill(LiveInterval *li,
SmallVectorImpl<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs);
+ const SmallVectorImpl<LiveInterval*> &spillIs);
-private:
- bool split();
+ void spill(LiveRangeEdit &);
- bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
- SlotIndex UseIdx);
+private:
bool reMaterializeFor(MachineBasicBlock::iterator MI);
void reMaterializeAll();
bool coalesceStackAccess(MachineInstr *MI);
bool foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops);
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI = 0);
void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
};
@@ -98,106 +96,41 @@ namespace llvm {
Spiller *createInlineSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm) {
+ if (VerifySpills)
+ mf.verify(&pass, "When creating inline spiller");
return new InlineSpiller(pass, mf, vrm);
}
}
-/// split - try splitting the current interval into pieces that may allocate
-/// separately. Return true if successful.
-bool InlineSpiller::split() {
- splitAnalysis_.analyze(li_);
-
- if (const MachineLoop *loop = splitAnalysis_.getBestSplitLoop()) {
- // We can split, but li_ may be left intact with fewer uses.
- if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
- .splitAroundLoop(loop))
- return true;
- }
-
- // Try splitting into single block intervals.
- SplitAnalysis::BlockPtrSet blocks;
- if (splitAnalysis_.getMultiUseBlocks(blocks)) {
- if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
- .splitSingleBlocks(blocks))
- return true;
- }
-
- // Try splitting inside a basic block.
- if (const MachineBasicBlock *MBB = splitAnalysis_.getBlockForInsideSplit()) {
- if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
- .splitInsideBlock(MBB))
- return true;
- }
-
- // We may have been able to split out some uses, but the original interval is
- // intact, and it should still be spilled.
- return false;
-}
-
-/// allUsesAvailableAt - Return true if all registers used by OrigMI at
-/// OrigIdx are also available with the same value at UseIdx.
-bool InlineSpiller::allUsesAvailableAt(const MachineInstr *OrigMI,
- SlotIndex OrigIdx,
- SlotIndex UseIdx) {
- OrigIdx = OrigIdx.getUseIndex();
- UseIdx = UseIdx.getUseIndex();
- for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = OrigMI->getOperand(i);
- if (!MO.isReg() || !MO.getReg() || MO.getReg() == li_->reg)
- continue;
- // Reserved registers are OK.
- if (MO.isUndef() || !lis_.hasInterval(MO.getReg()))
- continue;
- // We don't want to move any defs.
- if (MO.isDef())
- return false;
- // We cannot depend on virtual registers in spillIs_. They will be spilled.
- for (unsigned si = 0, se = spillIs_->size(); si != se; ++si)
- if ((*spillIs_)[si]->reg == MO.getReg())
- return false;
-
- LiveInterval &LI = lis_.getInterval(MO.getReg());
- const VNInfo *OVNI = LI.getVNInfoAt(OrigIdx);
- if (!OVNI)
- continue;
- if (OVNI != LI.getVNInfoAt(UseIdx))
- return false;
- }
- return true;
-}
-
-/// reMaterializeFor - Attempt to rematerialize li_->reg before MI instead of
+/// reMaterializeFor - Attempt to rematerialize edit_->getReg() before MI instead of
/// reloading it.
bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex();
- VNInfo *OrigVNI = li_->getVNInfoAt(UseIdx);
+ VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx);
+
if (!OrigVNI) {
DEBUG(dbgs() << "\tadding <undef> flags: ");
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg)
+ if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg())
MO.setIsUndef();
}
DEBUG(dbgs() << UseIdx << '\t' << *MI);
return true;
}
- if (!reMattable_.count(OrigVNI)) {
- DEBUG(dbgs() << "\tusing non-remat valno " << OrigVNI->id << ": "
- << UseIdx << '\t' << *MI);
- return false;
- }
- MachineInstr *OrigMI = lis_.getInstructionFromIndex(OrigVNI->def);
- if (!allUsesAvailableAt(OrigMI, OrigVNI->def, UseIdx)) {
+
+ LiveRangeEdit::Remat RM(OrigVNI);
+ if (!edit_->canRematerializeAt(RM, UseIdx, false, lis_)) {
usedValues_.insert(OrigVNI);
DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
return false;
}
- // If the instruction also writes li_->reg, it had better not require the same
- // register for uses and defs.
+ // If the instruction also writes edit_->getReg(), it had better not require
+ // the same register for uses and defs.
bool Reads, Writes;
SmallVector<unsigned, 8> Ops;
- tie(Reads, Writes) = MI->readsWritesVirtualRegister(li_->reg, &Ops);
+ tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit_->getReg(), &Ops);
if (Writes) {
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(Ops[i]);
@@ -209,62 +142,57 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
}
}
+ // Before rematerializing into a register for a single instruction, try to
+ // fold a load into the instruction. That avoids allocating a new register.
+ if (RM.OrigMI->getDesc().canFoldAsLoad() &&
+ foldMemoryOperand(MI, Ops, RM.OrigMI)) {
+ edit_->markRematerialized(RM.ParentVNI);
+ return true;
+ }
+
// Alocate a new register for the remat.
- unsigned NewVReg = mri_.createVirtualRegister(rc_);
- vrm_.grow();
- LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+ LiveInterval &NewLI = edit_->create(mri_, lis_, vrm_);
NewLI.markNotSpillable();
- newIntervals_->push_back(&NewLI);
+
+ // Rematting for a copy: Set allocation hint to be the destination register.
+ if (MI->isCopy())
+ mri_.setRegAllocationHint(NewLI.reg, 0, MI->getOperand(0).getReg());
// Finally we can rematerialize OrigMI before MI.
- MachineBasicBlock &MBB = *MI->getParent();
- tii_.reMaterialize(MBB, MI, NewLI.reg, 0, OrigMI, tri_);
- MachineBasicBlock::iterator RematMI = MI;
- SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(--RematMI).getDefIndex();
- DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *RematMI);
+ SlotIndex DefIdx = edit_->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
+ lis_, tii_, tri_);
+ DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
+ << *lis_.getInstructionFromIndex(DefIdx));
// Replace operands
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(Ops[i]);
- if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) {
- MO.setReg(NewVReg);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) {
+ MO.setReg(NewLI.reg);
MO.setIsKill();
}
}
DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI);
- VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, true,
- lis_.getVNInfoAllocator());
+ VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, lis_.getVNInfoAllocator());
NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
return true;
}
-/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible,
+/// reMaterializeAll - Try to rematerialize as many uses as possible,
/// and trim the live ranges after.
void InlineSpiller::reMaterializeAll() {
// Do a quick scan of the interval values to find if any are remattable.
- reMattable_.clear();
- usedValues_.clear();
- for (LiveInterval::const_vni_iterator I = li_->vni_begin(),
- E = li_->vni_end(); I != E; ++I) {
- VNInfo *VNI = *I;
- if (VNI->isUnused() || !VNI->isDefAccurate())
- continue;
- MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
- if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI))
- continue;
- reMattable_.insert(VNI);
- }
-
- // Often, no defs are remattable.
- if (reMattable_.empty())
+ if (!edit_->anyRematerializable(lis_, tii_, aa_))
return;
- // Try to remat before all uses of li_->reg.
+ usedValues_.clear();
+
+ // Try to remat before all uses of edit_->getReg().
bool anyRemat = false;
for (MachineRegisterInfo::use_nodbg_iterator
- RI = mri_.use_nodbg_begin(li_->reg);
+ RI = mri_.use_nodbg_begin(edit_->getReg());
MachineInstr *MI = RI.skipInstruction();)
anyRemat |= reMaterializeFor(MI);
@@ -273,33 +201,35 @@ void InlineSpiller::reMaterializeAll() {
// Remove any values that were completely rematted.
bool anyRemoved = false;
- for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(),
- E = reMattable_.end(); I != E; ++I) {
+ for (LiveInterval::vni_iterator I = edit_->getParent().vni_begin(),
+ E = edit_->getParent().vni_end(); I != E; ++I) {
VNInfo *VNI = *I;
- if (VNI->hasPHIKill() || usedValues_.count(VNI))
+ if (VNI->hasPHIKill() || !edit_->didRematerialize(VNI) ||
+ usedValues_.count(VNI))
continue;
MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI);
lis_.RemoveMachineInstrFromMaps(DefMI);
vrm_.RemoveMachineInstrFromMaps(DefMI);
DefMI->eraseFromParent();
- VNI->setIsDefAccurate(false);
+ VNI->def = SlotIndex();
anyRemoved = true;
}
if (!anyRemoved)
return;
- // Removing values may cause debug uses where li_ is not live.
- for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg);
+ // Removing values may cause debug uses where parent is not live.
+ for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(edit_->getReg());
MachineInstr *MI = RI.skipInstruction();) {
if (!MI->isDebugValue())
continue;
- // Try to preserve the debug value if li_ is live immediately after it.
+ // Try to preserve the debug value if parent is live immediately after it.
MachineBasicBlock::iterator NextMI = MI;
++NextMI;
if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
- VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI));
+ SlotIndex Idx = lis_.getInstructionIndex(NextMI);
+ VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx);
if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI)))
continue;
}
@@ -317,7 +247,7 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
return false;
// We have a stack access. Is it the right register and slot?
- if (reg != li_->reg || FI != stackSlot_)
+ if (reg != edit_->getReg() || FI != stackSlot_)
return false;
DEBUG(dbgs() << "Coalescing stack access: " << *MI);
@@ -327,9 +257,13 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
}
/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
-/// Return true on success, and MI will be erased.
+/// @param MI Instruction using or defining the current register.
+/// @param Ops Operand indices from readsWritesVirtualRegister().
+/// @param LoadMI Load instruction to use instead of stack slot when non-null.
+/// @return True on success, and MI will be erased.
bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops) {
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) {
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
// operands.
SmallVector<unsigned, 8> FoldOps;
@@ -341,16 +275,22 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
// FIXME: Teach targets to deal with subregs.
if (MO.getSubReg())
return false;
+ // We cannot fold a load instruction into a def.
+ if (LoadMI && MO.isDef())
+ return false;
// Tied use operands should not be passed to foldMemoryOperand.
if (!MI->isRegTiedToDefOperand(Idx))
FoldOps.push_back(Idx);
}
- MachineInstr *FoldMI = tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
+ MachineInstr *FoldMI =
+ LoadMI ? tii_.foldMemoryOperand(MI, FoldOps, LoadMI)
+ : tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
if (!FoldMI)
return false;
lis_.ReplaceMachineInstrInMaps(MI, FoldMI);
- vrm_.addSpillSlotUse(stackSlot_, FoldMI);
+ if (!LoadMI)
+ vrm_.addSpillSlotUse(stackSlot_, FoldMI);
MI->eraseFromParent();
DEBUG(dbgs() << "\tfolded: " << *FoldMI);
return true;
@@ -366,7 +306,7 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
vrm_.addSpillSlotUse(stackSlot_, MI);
DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
- VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, true,
+ VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
lis_.getVNInfoAllocator());
NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
}
@@ -375,44 +315,58 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
void InlineSpiller::insertSpill(LiveInterval &NewLI,
MachineBasicBlock::iterator MI) {
MachineBasicBlock &MBB = *MI->getParent();
+
+ // Get the defined value. It could be an early clobber so keep the def index.
SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
+ VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx);
+ assert(VNI && VNI->def.getDefIndex() == Idx && "Inconsistent VNInfo");
+ Idx = VNI->def;
+
tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_);
--MI; // Point to store instruction.
SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
vrm_.addSpillSlotUse(stackSlot_, MI);
DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
- VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, true,
- lis_.getVNInfoAllocator());
+ VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, lis_.getVNInfoAllocator());
NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
}
void InlineSpiller::spill(LiveInterval *li,
SmallVectorImpl<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs) {
- DEBUG(dbgs() << "Inline spilling " << *li << "\n");
- assert(li->isSpillable() && "Attempting to spill already spilled value.");
- assert(!li->isStackSlot() && "Trying to spill a stack slot.");
-
- li_ = li;
- newIntervals_ = &newIntervals;
- rc_ = mri_.getRegClass(li->reg);
- spillIs_ = &spillIs;
+ const SmallVectorImpl<LiveInterval*> &spillIs) {
+ LiveRangeEdit edit(*li, newIntervals, spillIs);
+ spill(edit);
+ if (VerifySpills)
+ mf_.verify(&pass_, "After inline spill");
+}
- if (split())
- return;
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+ edit_ = &edit;
+ assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
+ && "Trying to spill a stack slot.");
+ DEBUG(dbgs() << "Inline spilling "
+ << mri_.getRegClass(edit.getReg())->getName()
+ << ':' << edit.getParent() << "\n");
+ assert(edit.getParent().isSpillable() &&
+ "Attempting to spill already spilled value.");
reMaterializeAll();
// Remat may handle everything.
- if (li_->empty())
+ if (edit_->getParent().empty())
return;
- stackSlot_ = vrm_.getStackSlot(li->reg);
- if (stackSlot_ == VirtRegMap::NO_STACK_SLOT)
- stackSlot_ = vrm_.assignVirt2StackSlot(li->reg);
+ rc_ = mri_.getRegClass(edit.getReg());
+ stackSlot_ = vrm_.assignVirt2StackSlot(edit_->getReg());
+
+ // Update LiveStacks now that we are committed to spilling.
+ LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_);
+ assert(stacklvr.empty() && "Just created stack slot not empty");
+ stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator());
+ stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0));
// Iterate over instructions using register.
- for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg);
+ for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(edit.getReg());
MachineInstr *MI = RI.skipInstruction();) {
// Debug values are not allowed to affect codegen.
@@ -440,7 +394,7 @@ void InlineSpiller::spill(LiveInterval *li,
// Analyze instruction.
bool Reads, Writes;
SmallVector<unsigned, 8> Ops;
- tie(Reads, Writes) = MI->readsWritesVirtualRegister(li->reg, &Ops);
+ tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit.getReg(), &Ops);
// Attempt to fold memory ops.
if (foldMemoryOperand(MI, Ops))
@@ -448,9 +402,7 @@ void InlineSpiller::spill(LiveInterval *li,
// Allocate interval around instruction.
// FIXME: Infer regclass from instruction alone.
- unsigned NewVReg = mri_.createVirtualRegister(rc_);
- vrm_.grow();
- LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+ LiveInterval &NewLI = edit.create(mri_, lis_, vrm_);
NewLI.markNotSpillable();
if (Reads)
@@ -460,7 +412,7 @@ void InlineSpiller::spill(LiveInterval *li,
bool hasLiveDef = false;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(Ops[i]);
- MO.setReg(NewVReg);
+ MO.setReg(NewLI.reg);
if (MO.isUse()) {
if (!MI->isRegTiedToDefOperand(Ops[i]))
MO.setIsKill();
@@ -475,6 +427,5 @@ void InlineSpiller::spill(LiveInterval *li,
insertSpill(NewLI, MI);
DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
- newIntervals.push_back(&NewLI);
}
}
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 3852eba..3861dda 100644
--- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -85,9 +85,11 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
}
// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
#endif
void IntrinsicLowering::AddPrototypes(Module &M) {
@@ -536,3 +538,27 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
"Lowering should have eliminated any uses of the intrinsic call!");
CI->eraseFromParent();
}
+
+bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
+ // Verify this is a simple bswap.
+ if (CI->getNumArgOperands() != 1 ||
+ CI->getType() != CI->getArgOperand(0)->getType() ||
+ !CI->getType()->isIntegerTy())
+ return false;
+
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty)
+ return false;
+
+ // Okay, we can do this xform, do so now.
+ const Type *Tys[] = { Ty };
+ Module *M = CI->getParent()->getParent()->getParent();
+ Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+
+ Value *Op = CI->getArgOperand(0);
+ Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+ CI->replaceAllUsesWith(Op);
+ CI->eraseFromParent();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 3603802..80dfc76 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -20,9 +20,11 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Transforms/Scalar.h"
@@ -30,6 +32,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/StandardPasses.h"
using namespace llvm;
namespace llvm {
@@ -140,13 +143,19 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
// Create a code emitter if asked to show the encoding.
MCCodeEmitter *MCE = 0;
- if (ShowMCEncoding)
+ TargetAsmBackend *TAB = 0;
+ if (ShowMCEncoding) {
MCE = getTarget().createCodeEmitter(*this, *Context);
-
- AsmStreamer.reset(createAsmStreamer(*Context, Out,
- getTargetData()->isLittleEndian(),
- getVerboseAsm(), InstPrinter,
- MCE, ShowMCInst));
+ TAB = getTarget().createAsmBackend(TargetTriple);
+ }
+
+ MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+ getVerboseAsm(),
+ hasMCUseLoc(),
+ InstPrinter,
+ MCE, TAB,
+ ShowMCInst);
+ AsmStreamer.reset(S);
break;
}
case CGFT_ObjectFile: {
@@ -159,7 +168,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context,
*TAB, Out, MCE,
- hasMCRelaxAll()));
+ hasMCRelaxAll(),
+ hasMCNoExecStack()));
+ AsmStreamer.get()->InitSections();
break;
}
case CGFT_Null:
@@ -241,7 +252,7 @@ static void printAndVerify(PassManagerBase &PM,
PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
if (VerifyMachineCode)
- PM.add(createMachineVerifierPass());
+ PM.add(createMachineVerifierPass(Banner));
}
/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
@@ -253,6 +264,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
MCContext *&OutContext) {
// Standard LLVM-Level Passes.
+ // Basic AliasAnalysis support.
+ createStandardAliasAnalysisPasses(&PM);
+
// Before running any passes, run the verifier to determine if the input
// coming from the front-end and/or optimizer is valid.
if (!DisableVerify)
@@ -288,7 +302,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// edge from elsewhere.
PM.add(createSjLjEHPass(getTargetLowering()));
// FALLTHROUGH
- case ExceptionHandling::Dwarf:
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::DwarfTable:
PM.add(createDwarfEHPass(this));
break;
case ExceptionHandling::None:
@@ -320,7 +335,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// Install a MachineModuleInfo class, which is an immutable pass that holds
// all the per-module stuff we're generating, including MCContext.
- MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo());
+ TargetAsmInfo *TAI = new TargetAsmInfo(*this);
+ MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), TAI);
PM.add(MMI);
OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
@@ -339,6 +355,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// Print the instruction selected machine code...
printAndVerify(PM, "After Instruction Selection");
+ // Expand pseudo-instructions emitted by ISel.
+ PM.add(createExpandISelPseudosPass());
+
// Optimize PHIs before DCE: removing dead PHI cycles may make more
// instructions dead.
if (OptLevel != CodeGenOpt::None)
@@ -356,13 +375,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
PM.add(createDeadMachineInstructionElimPass());
printAndVerify(PM, "After codegen DCE pass");
- PM.add(createPeepholeOptimizerPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
PM.add(createMachineCSEPass());
if (!DisableMachineSink)
PM.add(createMachineSinkingPass());
printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
+
+ PM.add(createPeepholeOptimizerPass());
+ printAndVerify(PM, "After codegen peephole optimization pass");
}
// Pre-ra tail duplication.
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index b9527fa..0eb009d 100644
--- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -16,6 +16,7 @@
#define DEBUG_TYPE "scheduler"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
@@ -35,14 +36,14 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
unsigned RHSLatency = PQ->getLatency(RHSNum);
if (LHSLatency < RHSLatency) return true;
if (LHSLatency > RHSLatency) return false;
-
+
// After that, if two nodes have identical latencies, look to see if one will
// unblock more other nodes than the other.
unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
if (LHSBlocked < RHSBlocked) return true;
if (LHSBlocked > RHSBlocked) return false;
-
+
// Finally, just to provide a stable ordering, use the node number as a
// deciding factor.
return LHSNum < RHSNum;
@@ -64,7 +65,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
OnlyAvailablePred = &Pred;
}
}
-
+
return OnlyAvailablePred;
}
@@ -78,7 +79,7 @@ void LatencyPriorityQueue::push(SUnit *SU) {
++NumNodesBlocking;
}
NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
-
+
Queue.push_back(SU);
}
@@ -102,10 +103,10 @@ void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
/// node of the same priority that will not make a node available.
void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
if (SU->isAvailable) return; // All preds scheduled.
-
+
SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
-
+
// Okay, we found a single predecessor that is available, but not scheduled.
// Since it is available, it must be in the priority queue. First remove it.
remove(OnlyAvailablePred);
@@ -136,3 +137,16 @@ void LatencyPriorityQueue::remove(SUnit *SU) {
std::swap(*I, Queue.back());
Queue.pop_back();
}
+
+#ifdef NDEBUG
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
+ LatencyPriorityQueue q = *this;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(DAG);
+ }
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
new file mode 100644
index 0000000..853ec1a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -0,0 +1,711 @@
+//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveDebugVariables analysis.
+//
+// Remove all DBG_VALUE instructions referencing virtual registers and replace
+// them with a data structure tracking where live user variables are kept - in a
+// virtual register or in a stack slot.
+//
+// Allow the data structure to be updated during register allocation when values
+// are moved between registers and stack slots. Finally emit new DBG_VALUE
+// instructions after register allocation is complete.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livedebug"
+#include "LiveDebugVariables.h"
+#include "VirtRegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableLDV("live-debug-variables", cl::init(true),
+ cl::desc("Enable the live debug variables pass"), cl::Hidden);
+
+char LiveDebugVariables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+
+void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+}
+
+/// LocMap - Map of where a user value is live, and its location.
+typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+
+/// UserValue - A user value is a part of a debug info user variable.
+///
+/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
+/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
+namespace {
+class UserValue {
+ const MDNode *variable; ///< The debug info variable we are part of.
+ unsigned offset; ///< Byte offset into variable.
+ DebugLoc dl; ///< The debug location for the variable. This is
+ ///< used by dwarf writer to find lexical scope.
+ UserValue *leader; ///< Equivalence class leader.
+ UserValue *next; ///< Next value in equivalence class, or null.
+
+ /// Numbered locations referenced by locmap.
+ SmallVector<MachineOperand, 4> locations;
+
+ /// Map of slot indices where this value is live.
+ LocMap locInts;
+
+ /// coalesceLocation - After LocNo was changed, check if it has become
+ /// identical to another location, and coalesce them. This may cause LocNo or
+ /// a later location to be erased, but no earlier location will be erased.
+ void coalesceLocation(unsigned LocNo);
+
+ /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+ void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
+ LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+ /// insertDebugKill - Insert an undef DBG_VALUE into MBB at Idx.
+ void insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+public:
+ /// UserValue - Create a new UserValue.
+ UserValue(const MDNode *var, unsigned o, DebugLoc L,
+ LocMap::Allocator &alloc)
+ : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc)
+ {}
+
+ /// getLeader - Get the leader of this value's equivalence class.
+ UserValue *getLeader() {
+ UserValue *l = leader;
+ while (l != l->leader)
+ l = l->leader;
+ return leader = l;
+ }
+
+ /// getNext - Return the next UserValue in the equivalence class.
+ UserValue *getNext() const { return next; }
+
+ /// match - Does this UserValue match the aprameters?
+ bool match(const MDNode *Var, unsigned Offset) const {
+ return Var == variable && Offset == offset;
+ }
+
+ /// merge - Merge equivalence classes.
+ static UserValue *merge(UserValue *L1, UserValue *L2) {
+ L2 = L2->getLeader();
+ if (!L1)
+ return L2;
+ L1 = L1->getLeader();
+ if (L1 == L2)
+ return L1;
+ // Splice L2 before L1's members.
+ UserValue *End = L2;
+ while (End->next)
+ End->leader = L1, End = End->next;
+ End->leader = L1;
+ End->next = L1->next;
+ L1->next = L2;
+ return L1;
+ }
+
+ /// getLocationNo - Return the location number that matches Loc.
+ unsigned getLocationNo(const MachineOperand &LocMO) {
+ if (LocMO.isReg() && LocMO.getReg() == 0)
+ return ~0u;
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (LocMO.isIdenticalTo(locations[i]))
+ return i;
+ locations.push_back(LocMO);
+ // We are storing a MachineOperand outside a MachineInstr.
+ locations.back().clearParent();
+ return locations.size() - 1;
+ }
+
+ /// addDef - Add a definition point to this value.
+ void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+ // Add a singular (Idx,Idx) -> Loc mapping.
+ LocMap::iterator I = locInts.find(Idx);
+ if (!I.valid() || I.start() != Idx)
+ I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+ }
+
+ /// extendDef - Extend the current definition as far as possible down the
+ /// dominator tree. Stop when meeting an existing def or when leaving the live
+ /// range of VNI.
+ /// @param Idx Starting point for the definition.
+ /// @param LocNo Location number to propagate.
+ /// @param LI Restrict liveness to where LI has the value VNI. May be null.
+ /// @param VNI When LI is not null, this is the value to restrict to.
+ /// @param LIS Live intervals analysis.
+ /// @param MDT Dominator tree.
+ void extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveInterval *LI, const VNInfo *VNI,
+ LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+ /// computeIntervals - Compute the live intervals of all locations after
+ /// collecting all their def points.
+ void computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+ /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+ const TargetRegisterInfo *TRI);
+
+ /// rewriteLocations - Rewrite virtual register locations according to the
+ /// provided virtual register map.
+ void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+
+ /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM,
+ LiveIntervals &LIS, const TargetInstrInfo &TRI);
+
+ /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
+ /// variable may have more than one corresponding DBG_VALUE instructions.
+ /// Only first one needs DebugLoc to identify variable's lexical scope
+ /// in source file.
+ DebugLoc findDebugLoc();
+ void print(raw_ostream&, const TargetRegisterInfo*);
+};
+} // namespace
+
+/// LDVImpl - Implementation of the LiveDebugVariables pass.
+namespace {
+class LDVImpl {
+ LiveDebugVariables &pass;
+ LocMap::Allocator allocator;
+ MachineFunction *MF;
+ LiveIntervals *LIS;
+ MachineDominatorTree *MDT;
+ const TargetRegisterInfo *TRI;
+
+ /// userValues - All allocated UserValue instances.
+ SmallVector<UserValue*, 8> userValues;
+
+ /// Map virtual register to eq class leader.
+ typedef DenseMap<unsigned, UserValue*> VRMap;
+ VRMap virtRegToEqClass;
+
+ /// Map user variable to eq class leader.
+ typedef DenseMap<const MDNode *, UserValue*> UVMap;
+ UVMap userVarMap;
+
+ /// getUserValue - Find or create a UserValue.
+ UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL);
+
+ /// lookupVirtReg - Find the EC leader for VirtReg or null.
+ UserValue *lookupVirtReg(unsigned VirtReg);
+
+ /// mapVirtReg - Map virtual register to an equivalence class.
+ void mapVirtReg(unsigned VirtReg, UserValue *EC);
+
+ /// handleDebugValue - Add DBG_VALUE instruction to our maps.
+ /// @param MI DBG_VALUE instruction
+ /// @param Idx Last valid SLotIndex before instruction.
+ /// @return True if the DBG_VALUE instruction should be deleted.
+ bool handleDebugValue(MachineInstr *MI, SlotIndex Idx);
+
+ /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
+ /// a UserValue def for each instruction.
+ /// @param mf MachineFunction to be scanned.
+ /// @return True if any debug values were found.
+ bool collectDebugValues(MachineFunction &mf);
+
+ /// computeIntervals - Compute the live intervals of all user values after
+ /// collecting all their def points.
+ void computeIntervals();
+
+public:
+ LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+ bool runOnMachineFunction(MachineFunction &mf);
+
+ /// clear - Relase all memory.
+ void clear() {
+ DeleteContainerPointers(userValues);
+ userValues.clear();
+ virtRegToEqClass.clear();
+ userVarMap.clear();
+ }
+
+ /// renameRegister - Replace all references to OldReg wiht NewReg:SubIdx.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+ /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ void print(raw_ostream&);
+};
+} // namespace
+
+void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+ if (const MDString *MDS = dyn_cast<MDString>(variable->getOperand(2)))
+ OS << "!\"" << MDS->getString() << "\"\t";
+ if (offset)
+ OS << '+' << offset;
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+ OS << " [" << I.start() << ';' << I.stop() << "):";
+ if (I.value() == ~0u)
+ OS << "undef";
+ else
+ OS << I.value();
+ }
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ OS << " Loc" << i << '=' << locations[i];
+ OS << '\n';
+}
+
+void LDVImpl::print(raw_ostream &OS) {
+ OS << "********** DEBUG VARIABLES **********\n";
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+ userValues[i]->print(OS, TRI);
+}
+
+void UserValue::coalesceLocation(unsigned LocNo) {
+ unsigned KeepLoc = 0;
+ for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
+ if (KeepLoc == LocNo)
+ continue;
+ if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
+ break;
+ }
+ // No matches.
+ if (KeepLoc == locations.size())
+ return;
+
+ // Keep the smaller location, erase the larger one.
+ unsigned EraseLoc = LocNo;
+ if (KeepLoc > EraseLoc)
+ std::swap(KeepLoc, EraseLoc);
+ locations.erase(locations.begin() + EraseLoc);
+
+ // Rewrite values.
+ for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+ unsigned v = I.value();
+ if (v == EraseLoc)
+ I.setValue(KeepLoc); // Coalesce when possible.
+ else if (v > EraseLoc)
+ I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
+ }
+}
+
+UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
+ DebugLoc DL) {
+ UserValue *&Leader = userVarMap[Var];
+ if (Leader) {
+ UserValue *UV = Leader->getLeader();
+ Leader = UV;
+ for (; UV; UV = UV->getNext())
+ if (UV->match(Var, Offset))
+ return UV;
+ }
+
+ UserValue *UV = new UserValue(Var, Offset, DL, allocator);
+ userValues.push_back(UV);
+ Leader = UserValue::merge(Leader, UV);
+ return UV;
+}
+
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+ UserValue *&Leader = virtRegToEqClass[VirtReg];
+ Leader = UserValue::merge(Leader, EC);
+}
+
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+ if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+ return UV->getLeader();
+ return 0;
+}
+
+bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
+ // DBG_VALUE loc, offset, variable
+ if (MI->getNumOperands() != 3 ||
+ !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) {
+ DEBUG(dbgs() << "Can't handle " << *MI);
+ return false;
+ }
+
+ // Get or create the UserValue for (variable,offset).
+ unsigned Offset = MI->getOperand(1).getImm();
+ const MDNode *Var = MI->getOperand(2).getMetadata();
+ UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc());
+
+ // If the location is a virtual register, make sure it is mapped.
+ if (MI->getOperand(0).isReg()) {
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ mapVirtReg(Reg, UV);
+ }
+
+ UV->addDef(Idx, MI->getOperand(0));
+ return true;
+}
+
+bool LDVImpl::collectDebugValues(MachineFunction &mf) {
+ bool Changed = false;
+ for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE;) {
+ if (!MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+ // DBG_VALUE has no slot index, use the previous instruction instead.
+ SlotIndex Idx = MBBI == MBB->begin() ?
+ LIS->getMBBStartIdx(MBB) :
+ LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex();
+ // Handle consecutive DBG_VALUE instructions with the same slot index.
+ do {
+ if (handleDebugValue(MBBI, Idx)) {
+ MBBI = MBB->erase(MBBI);
+ Changed = true;
+ } else
+ ++MBBI;
+ } while (MBBI != MBBE && MBBI->isDebugValue());
+ }
+ }
+ return Changed;
+}
+
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveInterval *LI, const VNInfo *VNI,
+ LiveIntervals &LIS, MachineDominatorTree &MDT) {
+ SmallVector<SlotIndex, 16> Todo;
+ Todo.push_back(Idx);
+
+ do {
+ SlotIndex Start = Todo.pop_back_val();
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+ LocMap::iterator I = locInts.find(Start);
+
+ // Limit to VNI's live range.
+ bool ToEnd = true;
+ if (LI && VNI) {
+ LiveRange *Range = LI->getLiveRangeContaining(Start);
+ if (!Range || Range->valno != VNI)
+ continue;
+ if (Range->end < Stop)
+ Stop = Range->end, ToEnd = false;
+ }
+
+ // There could already be a short def at Start.
+ if (I.valid() && I.start() <= Start) {
+ // Stop when meeting a different location or an already extended interval.
+ Start = Start.getNextSlot();
+ if (I.value() != LocNo || I.stop() != Start)
+ continue;
+ // This is a one-slot placeholder. Just skip it.
+ ++I;
+ }
+
+ // Limited by the next def.
+ if (I.valid() && I.start() < Stop)
+ Stop = I.start(), ToEnd = false;
+
+ if (Start >= Stop)
+ continue;
+
+ I.insert(Start, Stop, LocNo);
+
+ // If we extended to the MBB end, propagate down the dominator tree.
+ if (!ToEnd)
+ continue;
+ const std::vector<MachineDomTreeNode*> &Children =
+ MDT.getNode(MBB)->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ Todo.push_back(LIS.getMBBStartIdx(Children[i]->getBlock()));
+ } while (!Todo.empty());
+}
+
+void
+UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) {
+ SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+
+ // Collect all defs to be extended (Skipping undefs).
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
+ if (I.value() != ~0u)
+ Defs.push_back(std::make_pair(I.start(), I.value()));
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ SlotIndex Idx = Defs[i].first;
+ unsigned LocNo = Defs[i].second;
+ const MachineOperand &Loc = locations[LocNo];
+
+ // Register locations are constrained to where the register value is live.
+ if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) {
+ LiveInterval *LI = &LIS.getInterval(Loc.getReg());
+ const VNInfo *VNI = LI->getVNInfoAt(Idx);
+ extendDef(Idx, LocNo, LI, VNI, LIS, MDT);
+ } else
+ extendDef(Idx, LocNo, 0, 0, LIS, MDT);
+ }
+
+ // Finally, erase all the undefs.
+ for (LocMap::iterator I = locInts.begin(); I.valid();)
+ if (I.value() == ~0u)
+ I.erase();
+ else
+ ++I;
+}
+
+void LDVImpl::computeIntervals() {
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+ userValues[i]->computeIntervals(*LIS, *MDT);
+}
+
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ LIS = &pass.getAnalysis<LiveIntervals>();
+ MDT = &pass.getAnalysis<MachineDominatorTree>();
+ TRI = mf.getTarget().getRegisterInfo();
+ clear();
+ DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+ << ((Value*)mf.getFunction())->getName()
+ << " **********\n");
+
+ bool Changed = collectDebugValues(mf);
+ computeIntervals();
+ DEBUG(print(dbgs()));
+ return Changed;
+}
+
+bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
+ if (!EnableLDV)
+ return false;
+ if (!pImpl)
+ pImpl = new LDVImpl(this);
+ return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
+}
+
+void LiveDebugVariables::releaseMemory() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->clear();
+}
+
+LiveDebugVariables::~LiveDebugVariables() {
+ if (pImpl)
+ delete static_cast<LDVImpl*>(pImpl);
+}
+
+void UserValue::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+ const TargetRegisterInfo *TRI) {
+ for (unsigned i = locations.size(); i; --i) {
+ unsigned LocNo = i - 1;
+ MachineOperand &Loc = locations[LocNo];
+ if (!Loc.isReg() || Loc.getReg() != OldReg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ Loc.substPhysReg(NewReg, *TRI);
+ else
+ Loc.substVirtReg(NewReg, SubIdx, *TRI);
+ coalesceLocation(LocNo);
+ }
+}
+
+void LDVImpl::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+ UserValue *UV = lookupVirtReg(OldReg);
+ if (!UV)
+ return;
+
+ if (TargetRegisterInfo::isVirtualRegister(NewReg))
+ mapVirtReg(NewReg, UV);
+ virtRegToEqClass.erase(OldReg);
+
+ do {
+ UV->renameRegister(OldReg, NewReg, SubIdx, TRI);
+ UV = UV->getNext();
+ } while (UV);
+}
+
+void LiveDebugVariables::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx);
+}
+
+void
+UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
+ // Iterate over locations in reverse makes it easier to handle coalescing.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ MachineOperand &Loc = locations[LocNo];
+ // Only virtual registers are rewritten.
+ if (!Loc.isReg() || !Loc.getReg() ||
+ !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
+ continue;
+ unsigned VirtReg = Loc.getReg();
+ if (VRM.isAssignedReg(VirtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+ Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+ } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
+ VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
+ // FIXME: Translate SubIdx to a stackslot offset.
+ Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+ } else {
+ Loc.setReg(0);
+ Loc.setSubReg(0);
+ }
+ coalesceLocation(LocNo);
+ }
+ DEBUG(print(dbgs(), &TRI));
+}
+
+/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
+/// instruction.
+static MachineBasicBlock::iterator
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS) {
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ Idx = Idx.getBaseIndex();
+
+ // Try to find an insert location by going backwards from Idx.
+ MachineInstr *MI;
+ while (!(MI = LIS.getInstructionFromIndex(Idx))) {
+ // We've reached the beginning of MBB.
+ if (Idx == Start) {
+ MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+ return I;
+ }
+ Idx = Idx.getPrevIndex();
+ }
+
+ // Don't insert anything after the first terminator, though.
+ return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
+ llvm::next(MachineBasicBlock::iterator(MI));
+}
+
+DebugLoc UserValue::findDebugLoc() {
+ DebugLoc D = dl;
+ dl = DebugLoc();
+ return D;
+}
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
+ unsigned LocNo,
+ LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+ MachineOperand &Loc = locations[LocNo];
+
+ // Frame index locations may require a target callback.
+ if (Loc.isFI()) {
+ MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(),
+ Loc.getIndex(), offset, variable,
+ findDebugLoc());
+ if (MI) {
+ MBB->insert(I, MI);
+ return;
+ }
+ }
+ // This is not a frame index, or the target is happy with a standard FI.
+ BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+ .addOperand(Loc).addImm(offset).addMetadata(variable);
+}
+
+void UserValue::insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS, const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+ BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)).addReg(0)
+ .addImm(offset).addMetadata(variable);
+}
+
+void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
+
+ for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
+ SlotIndex Start = I.start();
+ SlotIndex Stop = I.stop();
+ unsigned LocNo = I.value();
+ DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(MBB, Start, LocNo, LIS, TII);
+
+ // This interval may span multiple basic blocks.
+ // Insert a DBG_VALUE into each one.
+ while(Stop > MBBEnd) {
+ // Move to the next block.
+ Start = MBBEnd;
+ if (++MBB == MFEnd)
+ break;
+ MBBEnd = LIS.getMBBEndIdx(MBB);
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ }
+ DEBUG(dbgs() << '\n');
+ if (MBB == MFEnd)
+ break;
+
+ ++I;
+ if (Stop == MBBEnd)
+ continue;
+ // The current interval ends before MBB.
+ // Insert a kill if there is a gap.
+ if (!I.valid() || I.start() > Stop)
+ insertDebugKill(MBB, Stop, LIS, TII);
+ }
+}
+
+void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
+ DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ userValues[i]->rewriteLocations(*VRM, *TRI);
+ userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+ }
+}
+
+void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
+}
+
+
+#ifndef NDEBUG
+void LiveDebugVariables::dump() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->print(dbgs());
+}
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
new file mode 100644
index 0000000..a6e40a1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -0,0 +1,63 @@
+//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the LiveDebugVariables analysis.
+//
+// The analysis removes DBG_VALUE instructions for virtual registers and tracks
+// live user variables in a data structure that can be updated during register
+// allocation.
+//
+// After register allocation new DBG_VALUE instructions are emitted to reflect
+// the new locations of user variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class VirtRegMap;
+
+class LiveDebugVariables : public MachineFunctionPass {
+ void *pImpl;
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ LiveDebugVariables();
+ ~LiveDebugVariables();
+
+ /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
+ /// @param OldReg Old virtual register that is going away.
+ /// @param NewReg New register holding the user variables.
+ /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
+ /// register.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+ /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
+ /// that happened during register allocation.
+ /// @param VRM Rename virtual registers according to map.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ /// dump - Print data structures to dbgs().
+ void dump();
+
+private:
+
+ virtual bool runOnMachineFunction(MachineFunction &);
+ virtual void releaseMemory();
+ virtual void getAnalysisUsage(AnalysisUsage &) const;
+
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 59f380a..c2dbd6a 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -30,58 +30,19 @@
#include <algorithm>
using namespace llvm;
-// An example for liveAt():
-//
-// this = [1,4), liveAt(0) will return false. The instruction defining this
-// spans slots [0,3]. The interval belongs to an spilled definition of the
-// variable it represents. This is because slot 1 is used (def slot) and spans
-// up to slot 3 (store slot).
-//
-bool LiveInterval::liveAt(SlotIndex I) const {
- Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
-
- if (r == ranges.begin())
- return false;
-
- --r;
- return r->contains(I);
-}
-
-// liveBeforeAndAt - Check if the interval is live at the index and the index
-// just before it. If index is liveAt, check if it starts a new live range.
-// If it does, then check if the previous live range ends at index-1.
-bool LiveInterval::liveBeforeAndAt(SlotIndex I) const {
- Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
-
- if (r == ranges.begin())
- return false;
-
- --r;
- if (!r->contains(I))
- return false;
- if (I != r->start)
- return true;
- // I is the start of a live range. Check if the previous live range ends
- // at I-1.
- if (r == ranges.begin())
- return false;
- return r->end == I;
+// CompEnd - Compare LiveRange ends.
+namespace {
+struct CompEnd {
+ bool operator()(const LiveRange &A, const LiveRange &B) const {
+ return A.end < B.end;
+ }
+};
}
-/// killedAt - Return true if a live range ends at index. Note that the kill
-/// point is not contained in the half-open live range. It is usually the
-/// getDefIndex() slot following its last use.
-bool LiveInterval::killedAt(SlotIndex I) const {
- Ranges::const_iterator r = std::lower_bound(ranges.begin(), ranges.end(), I);
-
- // Now r points to the first interval with start >= I, or ranges.end().
- if (r == ranges.begin())
- return false;
-
- --r;
- // Now r points to the last interval with end <= I.
- // r->end is the kill point.
- return r->end == I;
+LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
+ assert(Pos.isValid() && "Cannot search for an invalid index");
+ return std::upper_bound(begin(), end(), LiveRange(SlotIndex(), Pos, 0),
+ CompEnd());
}
/// killedInRange - Return true if the interval has kills in [Start,End).
@@ -330,25 +291,14 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
return ranges.insert(it, LR);
}
-/// isInOneLiveRange - Return true if the range specified is entirely in
-/// a single LiveRange of the live interval.
-bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) {
- Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
- if (I == ranges.begin())
- return false;
- --I;
- return I->containsRange(Start, End);
-}
-
/// removeRange - Remove the specified range from this interval. Note that
/// the range must be in a single LiveRange in its entirety.
void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
bool RemoveDeadValNo) {
// Find the LiveRange containing this span.
- Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
- assert(I != ranges.begin() && "Range is not in interval!");
- --I;
+ Ranges::iterator I = find(Start);
+ assert(I != ranges.end() && "Range is not in interval!");
assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
// If the span we are removing is at the start of the LiveRange, adjust it.
@@ -405,32 +355,6 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
markValNoForDeletion(ValNo);
}
-/// getLiveRangeContaining - Return the live range that contains the
-/// specified index, or null if there is none.
-LiveInterval::const_iterator
-LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const {
- const_iterator It = std::upper_bound(begin(), end(), Idx);
- if (It != ranges.begin()) {
- --It;
- if (It->contains(Idx))
- return It;
- }
-
- return end();
-}
-
-LiveInterval::iterator
-LiveInterval::FindLiveRangeContaining(SlotIndex Idx) {
- iterator It = std::upper_bound(begin(), end(), Idx);
- if (It != begin()) {
- --It;
- if (It->contains(Idx))
- return It;
- }
-
- return end();
-}
-
/// findDefinedVNInfo - Find the VNInfo defined by the specified
/// index (register interval).
VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
@@ -443,17 +367,6 @@ VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
return 0;
}
-/// findDefinedVNInfo - Find the VNInfo defined by the specified
-/// register (stack inteval).
-VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const {
- for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
- i != e; ++i) {
- if ((*i)->getReg() == reg)
- return *i;
- }
- return 0;
-}
-
/// join - Join two live intervals (this, and other) together. This applies
/// mappings to the value numbers in the LHS/RHS intervals as specified. If
/// the intervals are not joinable, this aborts.
@@ -616,103 +529,6 @@ void LiveInterval::MergeValueInAsValue(
}
-/// MergeInClobberRanges - For any live ranges that are not defined in the
-/// current interval, but are defined in the Clobbers interval, mark them
-/// used with an unknown definition value.
-void LiveInterval::MergeInClobberRanges(LiveIntervals &li_,
- const LiveInterval &Clobbers,
- VNInfo::Allocator &VNInfoAllocator) {
- if (Clobbers.empty()) return;
-
- DenseMap<VNInfo*, VNInfo*> ValNoMaps;
- VNInfo *UnusedValNo = 0;
- iterator IP = begin();
- for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) {
- // For every val# in the Clobbers interval, create a new "unknown" val#.
- VNInfo *ClobberValNo = 0;
- DenseMap<VNInfo*, VNInfo*>::iterator VI = ValNoMaps.find(I->valno);
- if (VI != ValNoMaps.end())
- ClobberValNo = VI->second;
- else if (UnusedValNo)
- ClobberValNo = UnusedValNo;
- else {
- UnusedValNo = ClobberValNo =
- getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
- ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
- }
-
- bool Done = false;
- SlotIndex Start = I->start, End = I->end;
- // If a clobber range starts before an existing range and ends after
- // it, the clobber range will need to be split into multiple ranges.
- // Loop until the entire clobber range is handled.
- while (!Done) {
- Done = true;
- IP = std::upper_bound(IP, end(), Start);
- SlotIndex SubRangeStart = Start;
- SlotIndex SubRangeEnd = End;
-
- // If the start of this range overlaps with an existing liverange, trim it.
- if (IP != begin() && IP[-1].end > SubRangeStart) {
- SubRangeStart = IP[-1].end;
- // Trimmed away the whole range?
- if (SubRangeStart >= SubRangeEnd) continue;
- }
- // If the end of this range overlaps with an existing liverange, trim it.
- if (IP != end() && SubRangeEnd > IP->start) {
- // If the clobber live range extends beyond the existing live range,
- // it'll need at least another live range, so set the flag to keep
- // iterating.
- if (SubRangeEnd > IP->end) {
- Start = IP->end;
- Done = false;
- }
- SubRangeEnd = IP->start;
- // If this trimmed away the whole range, ignore it.
- if (SubRangeStart == SubRangeEnd) continue;
- }
-
- // Insert the clobber interval.
- IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo),
- IP);
- UnusedValNo = 0;
- }
- }
-
- if (UnusedValNo) {
- // Delete the last unused val#.
- valnos.pop_back();
- }
-}
-
-void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
- SlotIndex Start,
- SlotIndex End,
- VNInfo::Allocator &VNInfoAllocator) {
- // Find a value # to use for the clobber ranges. If there is already a value#
- // for unknown values, use it.
- VNInfo *ClobberValNo =
- getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
-
- iterator IP = begin();
- IP = std::upper_bound(IP, end(), Start);
-
- // If the start of this range overlaps with an existing liverange, trim it.
- if (IP != begin() && IP[-1].end > Start) {
- Start = IP[-1].end;
- // Trimmed away the whole range?
- if (Start >= End) return;
- }
- // If the end of this range overlaps with an existing liverange, trim it.
- if (IP != end() && End > IP->start) {
- End = IP->start;
- // If this trimmed away the whole range, ignore it.
- if (Start == End) return;
- }
-
- // Insert the clobber interval.
- addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
-}
/// MergeValueNumberInto - This method is called when two value nubmers
/// are found to be equivalent. This eliminates V1, replacing all
@@ -767,6 +583,9 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
}
}
+ // Merge the relevant flags.
+ V2->mergeFlags(V1);
+
// Now that V1 is dead, remove it.
markValNoForDeletion(V1);
@@ -831,14 +650,9 @@ void LiveRange::dump() const {
}
void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
- if (isStackSlot())
- OS << "SS#" << getStackSlotIndex();
- else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg))
- OS << TRI->getName(reg);
- else
- OS << "%reg" << reg;
-
- OS << ',' << weight;
+ OS << PrintReg(reg, TRI);
+ if (weight != 0)
+ OS << ',' << weight;
if (empty())
OS << " EMPTY";
@@ -863,10 +677,9 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
if (vni->isUnused()) {
OS << "x";
} else {
- if (!vni->isDefAccurate() && !vni->isPHIDef())
- OS << "?";
- else
- OS << vni->def;
+ OS << vni->def;
+ if (vni->isPHIDef())
+ OS << "-phidef";
if (vni->hasPHIKill())
OS << "-phikill";
if (vni->hasRedefByEC())
@@ -884,3 +697,84 @@ void LiveInterval::dump() const {
void LiveRange::print(raw_ostream &os) const {
os << *this;
}
+
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+ // Create initial equivalence classes.
+ eqClass_.clear();
+ eqClass_.grow(LI->getNumValNums());
+
+ const VNInfo *used = 0, *unused = 0;
+
+ // Determine connections.
+ for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end();
+ I != E; ++I) {
+ const VNInfo *VNI = *I;
+ // Group all unused values into one class.
+ if (VNI->isUnused()) {
+ if (unused)
+ eqClass_.join(unused->id, VNI->id);
+ unused = VNI;
+ continue;
+ }
+ used = VNI;
+ if (VNI->isPHIDef()) {
+ const MachineBasicBlock *MBB = lis_.getMBBFromIndex(VNI->def);
+ assert(MBB && "Phi-def has no defining MBB");
+ // Connect to values live out of predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI)
+ if (const VNInfo *PVNI =
+ LI->getVNInfoAt(lis_.getMBBEndIdx(*PI).getPrevSlot()))
+ eqClass_.join(VNI->id, PVNI->id);
+ } else {
+ // Normal value defined by an instruction. Check for two-addr redef.
+ // FIXME: This could be coincidental. Should we really check for a tied
+ // operand constraint?
+ // Note that VNI->def may be a use slot for an early clobber def.
+ if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
+ eqClass_.join(VNI->id, UVNI->id);
+ }
+ }
+
+ // Lump all the unused values in with the last used value.
+ if (used && unused)
+ eqClass_.join(used->id, unused->id);
+
+ eqClass_.compress();
+ return eqClass_.getNumClasses();
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) {
+ assert(LIV[0] && "LIV[0] must be set");
+ LiveInterval &LI = *LIV[0];
+
+ // First move runs to new intervals.
+ LiveInterval::iterator J = LI.begin(), E = LI.end();
+ while (J != E && eqClass_[J->valno->id] == 0)
+ ++J;
+ for (LiveInterval::iterator I = J; I != E; ++I) {
+ if (unsigned eq = eqClass_[I->valno->id]) {
+ assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
+ "New intervals should be empty");
+ LIV[eq]->ranges.push_back(*I);
+ } else
+ *J++ = *I;
+ }
+ LI.ranges.erase(J, E);
+
+ // Transfer VNInfos to their new owners and renumber them.
+ unsigned j = 0, e = LI.getNumValNums();
+ while (j != e && eqClass_[j] == 0)
+ ++j;
+ for (unsigned i = j; i != e; ++i) {
+ VNInfo *VNI = LI.getValNumInfo(i);
+ if (unsigned eq = eqClass_[i]) {
+ VNI->id = LIV[eq]->getNumValNums();
+ LIV[eq]->valnos.push_back(VNI);
+ } else {
+ VNI->id = j;
+ LI.valnos[j++] = VNI;
+ }
+ }
+ LI.valnos.resize(j);
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index 2726fc3..aef5b5f 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -20,6 +20,7 @@
#include "VirtRegMap.h"
#include "llvm/Value.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -55,8 +56,17 @@ STATISTIC(numFolds , "Number of loads/stores folded into instructions");
STATISTIC(numSplits , "Number of intervals split");
char LiveIntervals::ID = 0;
-INITIALIZE_PASS(LiveIntervals, "liveintervals",
- "Live Interval Analysis", false, false);
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -132,19 +142,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
void LiveIntervals::printInstrs(raw_ostream &OS) const {
OS << "********** MACHINEINSTRS **********\n";
-
- for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
- mbbi != mbbe; ++mbbi) {
- OS << "BB#" << mbbi->getNumber()
- << ":\t\t# derived from " << mbbi->getName() << "\n";
- for (MachineBasicBlock::iterator mii = mbbi->begin(),
- mie = mbbi->end(); mii != mie; ++mii) {
- if (mii->isDebugValue())
- OS << " \t" << *mii;
- else
- OS << getInstructionIndex(mii) << '\t' << *mii;
- }
- }
+ mf_->print(OS, indexes_);
}
void LiveIntervals::dumpInstrs() const {
@@ -248,15 +246,6 @@ bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
return false;
}
-#ifndef NDEBUG
-static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
- if (TargetRegisterInfo::isPhysicalRegister(reg))
- dbgs() << tri_->getName(reg);
- else
- dbgs() << "%reg" << reg;
-}
-#endif
-
static
bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
unsigned Reg = MI.getOperand(MOIdx).getReg();
@@ -285,8 +274,8 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
SlotIndex RedefIndex = MIIdx.getDefIndex();
const LiveRange *OldLR =
interval.getLiveRangeContaining(RedefIndex.getUseIndex());
- if (OldLR->valno->isDefAccurate()) {
- MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+ MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+ if (DefMI != 0) {
return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
}
return false;
@@ -298,10 +287,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
MachineOperand& MO,
unsigned MOIdx,
LiveInterval &interval) {
- DEBUG({
- dbgs() << "\t\tregister: ";
- printRegName(interval.reg, tri_);
- });
+ DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
// Virtual registers may be defined multiple times (due to phi
// elimination and 2-addr elimination). Much of what we do only has to be
@@ -326,8 +312,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
CopyMI = mi;
}
- VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true,
- VNInfoAllocator);
+ VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
assert(ValNo->id == 0 && "First value in interval is not 0?");
// Loop over all of the blocks that the vreg is defined in. There are
@@ -393,8 +378,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// Create interval with one of a NEW value number. Note that this value
// number isn't actually defined by an instruction, weird huh? :)
if (PHIJoin) {
- ValNo = interval.getNextValue(SlotIndex(Start, true), 0, false,
- VNInfoAllocator);
+ assert(getInstructionFromIndex(Start) == 0 &&
+ "PHI def index points at actual instruction.");
+ ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
ValNo->setIsPHIDef(true);
}
LiveRange LR(Start, killIdx, ValNo);
@@ -440,10 +426,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// The new value number (#1) is defined by the instruction we claimed
// defined value #0.
- VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(),
- false, // update at *
- VNInfoAllocator);
- ValNo->setFlags(OldValNo->getFlags()); // * <- updating here
+ VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
// Value#0 is now defined by the 2-addr instruction.
OldValNo->def = RedefIndex;
@@ -481,7 +464,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
MachineInstr *CopyMI = NULL;
if (mi->isCopyLike())
CopyMI = mi;
- ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
+ ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
@@ -504,10 +487,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
MachineInstr *CopyMI) {
// A physical register cannot be live across basic block, so its
// lifetime must end somewhere in its defining basic block.
- DEBUG({
- dbgs() << "\t\tregister: ";
- printRegName(interval.reg, tri_);
- });
+ DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
SlotIndex baseIndex = MIIdx;
SlotIndex start = baseIndex.getDefIndex();
@@ -573,11 +553,11 @@ exit:
assert(start < end && "did not find end of interval?");
// Already exists? Extend old live interval.
- LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
- bool Extend = OldLR != interval.end();
- VNInfo *ValNo = Extend
- ? OldLR->valno : interval.getNextValue(start, CopyMI, true, VNInfoAllocator);
- if (MO.isEarlyClobber() && Extend)
+ VNInfo *ValNo = interval.getVNInfoAt(start);
+ bool Extend = ValNo != 0;
+ if (!Extend)
+ ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator);
+ if (Extend && MO.isEarlyClobber())
ValNo->setHasRedefByEC(true);
LiveRange LR(start, end, ValNo);
interval.addRange(LR);
@@ -611,10 +591,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
SlotIndex MIIdx,
LiveInterval &interval, bool isAlias) {
- DEBUG({
- dbgs() << "\t\tlivein register: ";
- printRegName(interval.reg, tri_);
- });
+ DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
// Look for kills, if it reaches a def before it's killed, then it shouldn't
// be considered a livein.
@@ -672,9 +649,11 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
}
}
+ SlotIndex defIdx = getMBBStartIdx(MBB);
+ assert(getInstructionFromIndex(defIdx) == 0 &&
+ "PHI def index points at actual instruction.");
VNInfo *vni =
- interval.getNextValue(SlotIndex(getMBBStartIdx(MBB), true),
- 0, false, VNInfoAllocator);
+ interval.getNextValue(defIdx, 0, VNInfoAllocator);
vni->setIsPHIDef(true);
LiveRange LR(start, end, vni);
@@ -764,10 +743,177 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
return NewLI;
}
+/// shrinkToUses - After removing some uses of a register, shrink its live
+/// range to just the remaining uses. This method does not compute reaching
+/// defs for new uses, and it doesn't remove dead defs.
+void LiveIntervals::shrinkToUses(LiveInterval *li) {
+ DEBUG(dbgs() << "Shrink: " << *li << '\n');
+ assert(TargetRegisterInfo::isVirtualRegister(li->reg)
+ && "Can't only shrink physical registers");
+ // Find all the values used, including PHI kills.
+ SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
+
+ // Visit all instructions reading li->reg.
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+ continue;
+ SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
+ VNInfo *VNI = li->getVNInfoAt(Idx);
+ assert(VNI && "Live interval not live into reading instruction");
+ if (VNI->def == Idx) {
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early.
+ Idx = Idx.getPrevSlot();
+ VNI = li->getVNInfoAt(Idx);
+ assert(VNI && "Early-clobber tied value not available");
+ }
+ WorkList.push_back(std::make_pair(Idx, VNI));
+ }
+
+ // Create a new live interval with only minimal live segments per def.
+ LiveInterval NewLI(li->reg, 0);
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
+ }
+
+ // Extend intervals to reach all uses in WorkList.
+ while (!WorkList.empty()) {
+ SlotIndex Idx = WorkList.back().first;
+ VNInfo *VNI = WorkList.back().second;
+ WorkList.pop_back();
+
+ // Extend the live range for VNI to be live at Idx.
+ LiveInterval::iterator I = NewLI.find(Idx);
+
+ // Already got it?
+ if (I != NewLI.end() && I->start <= Idx) {
+ assert(I->valno == VNI && "Unexpected existing value number");
+ continue;
+ }
+
+ // Is there already a live range in the block containing Idx?
+ const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+ SlotIndex BlockStart = getMBBStartIdx(MBB);
+ DEBUG(dbgs() << "Shrink: Use val#" << VNI->id << " at " << Idx
+ << " in BB#" << MBB->getNumber() << '@' << BlockStart);
+ if (I != NewLI.begin() && (--I)->end > BlockStart) {
+ assert(I->valno == VNI && "Wrong reaching def");
+ DEBUG(dbgs() << " extend [" << I->start << ';' << I->end << ")\n");
+ // Is this the first use of a PHIDef in its defining block?
+ if (VNI->isPHIDef() && I->end == VNI->def.getNextSlot()) {
+ // The PHI is live, make sure the predecessors are live-out.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+ VNInfo *PVNI = li->getVNInfoAt(Stop);
+ // A predecessor is not required to have a live-out value for a PHI.
+ if (PVNI) {
+ assert(PVNI->hasPHIKill() && "Missing hasPHIKill flag");
+ WorkList.push_back(std::make_pair(Stop, PVNI));
+ }
+ }
+ }
+
+ // Extend the live range in the block to include Idx.
+ NewLI.addRange(LiveRange(I->end, Idx.getNextSlot(), VNI));
+ continue;
+ }
+
+ // VNI is live-in to MBB.
+ DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+ NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+
+ // Make sure VNI is live-out from the predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+ assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+ WorkList.push_back(std::make_pair(Stop, VNI));
+ }
+ }
+
+ // Handle dead values.
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
+ assert(LII != NewLI.end() && "Missing live range for PHI");
+ if (LII->end != VNI->def.getNextSlot())
+ continue;
+ if (!VNI->isPHIDef()) {
+ // This is a dead PHI. Remove it.
+ VNI->setIsUnused(true);
+ NewLI.removeRange(*LII);
+ } else {
+ // This is a dead def. Make sure the instruction knows.
+ MachineInstr *MI = getInstructionFromIndex(VNI->def);
+ assert(MI && "No instruction defining live value");
+ MI->addRegisterDead(li->reg, tri_);
+ }
+ }
+
+ // Move the trimmed ranges back.
+ li->ranges.swap(NewLI.ranges);
+ DEBUG(dbgs() << "Shrink: " << *li << '\n');
+}
+
+
//===----------------------------------------------------------------------===//
// Register allocator hooks.
//
+MachineBasicBlock::iterator
+LiveIntervals::getLastSplitPoint(const LiveInterval &li,
+ MachineBasicBlock *mbb) const {
+ const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor();
+
+ // If li is not live into a landing pad, we can insert spill code before the
+ // first terminator.
+ if (!lpad || !isLiveInToMBB(li, lpad))
+ return mbb->getFirstTerminator();
+
+ // When there is a landing pad, spill code must go before the call instruction
+ // that can throw.
+ MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
+ while (I != B) {
+ --I;
+ if (I->getDesc().isCall())
+ return I;
+ }
+ // The block contains no calls that can throw, so use the first terminator.
+ return mbb->getFirstTerminator();
+}
+
+void LiveIntervals::addKillFlags() {
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ unsigned Reg = I->first;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (mri_->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *LI = I->second;
+
+ // Every instruction that kills Reg corresponds to a live range end point.
+ for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
+ ++RI) {
+ // A LOAD index indicates an MBB edge.
+ if (RI->end.isLoad())
+ continue;
+ MachineInstr *MI = getInstructionFromIndex(RI->end);
+ if (!MI)
+ continue;
+ MI->addRegisterKilled(Reg, NULL);
+ }
+ }
+}
+
/// getReMatImplicitUse - If the remat definition MI has one (for now, we only
/// allow one) virtual register operand, then its uses are implicitly using
/// the register. Returns the virtual register.
@@ -800,18 +946,17 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
/// which reaches the given instruction also reaches the specified use index.
bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
SlotIndex UseIdx) const {
- SlotIndex Index = getInstructionIndex(MI);
- VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
- LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
- return UI != li.end() && UI->valno == ValNo;
+ VNInfo *UValNo = li.getVNInfoAt(UseIdx);
+ return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI));
}
/// isReMaterializable - Returns true if the definition MI of the specified
/// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
- const VNInfo *ValNo, MachineInstr *MI,
- SmallVectorImpl<LiveInterval*> &SpillIs,
- bool &isLoad) {
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const VNInfo *ValNo, MachineInstr *MI,
+ const SmallVectorImpl<LiveInterval*> &SpillIs,
+ bool &isLoad) {
if (DisableReMat)
return false;
@@ -829,7 +974,7 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
ri != re; ++ri) {
MachineInstr *UseMI = &*ri;
SlotIndex UseIdx = getInstructionIndex(UseMI);
- if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
+ if (li.getVNInfoAt(UseIdx) != ValNo)
continue;
if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
return false;
@@ -855,9 +1000,10 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
/// isReMaterializable - Returns true if every definition of MI of every
/// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
- SmallVectorImpl<LiveInterval*> &SpillIs,
- bool &isLoad) {
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const SmallVectorImpl<LiveInterval*> &SpillIs,
+ bool &isLoad) {
isLoad = false;
for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
i != e; ++i) {
@@ -865,9 +1011,9 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
if (VNI->isUnused())
continue; // Dead val#.
// Is the def for the val# rematerializable?
- if (!VNI->isDefAccurate())
- return false;
MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
+ if (!ReMatDefMI)
+ return false;
bool DefIsLoad = false;
if (!ReMatDefMI ||
!isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
@@ -1010,7 +1156,7 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (!vrm.isReMaterialized(Reg))
continue;
@@ -1044,7 +1190,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
if (!mop.isReg())
continue;
unsigned Reg = mop.getReg();
- if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (Reg != li.reg)
continue;
@@ -1140,11 +1286,14 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
rewriteImplicitOps(li, MI, NewVReg, vrm);
// Reuse NewVReg for other reads.
+ bool HasEarlyClobber = false;
for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
MachineOperand &mopj = MI->getOperand(Ops[j]);
mopj.setReg(NewVReg);
if (mopj.isImplicit())
rewriteImplicitOps(li, MI, NewVReg, vrm);
+ if (mopj.isEarlyClobber())
+ HasEarlyClobber = true;
}
if (CreatedNewVReg) {
@@ -1190,7 +1339,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
if (HasUse) {
if (CreatedNewVReg) {
LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
- nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
+ nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
DEBUG(dbgs() << " +" << LR);
nI.addRange(LR);
} else {
@@ -1203,8 +1352,12 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
}
}
if (HasDef) {
- LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
- nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
+ // An early clobber starts at the use slot, except for an early clobber
+ // tied to a use operand (yes, that is a thing).
+ LiveRange LR(HasEarlyClobber && !HasUse ?
+ index.getUseIndex() : index.getDefIndex(),
+ index.getStoreIndex(),
+ nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
DEBUG(dbgs() << " +" << LR);
nI.addRange(LR);
}
@@ -1554,15 +1707,15 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
return (isDef + isUse) * lc;
}
-void
-LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
+static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
- normalizeSpillWeight(*NewLIs[i]);
+ NewLIs[i]->weight =
+ normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize());
}
std::vector<LiveInterval*> LiveIntervals::
addIntervalsForSpills(const LiveInterval &li,
- SmallVectorImpl<LiveInterval*> &SpillIs,
+ const SmallVectorImpl<LiveInterval*> &SpillIs,
const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
assert(li.isSpillable() && "attempt to spill already spilled interval!");
@@ -1653,8 +1806,7 @@ addIntervalsForSpills(const LiveInterval &li,
if (VNI->isUnused())
continue; // Dead val#.
// Is the def for the val# rematerializable?
- MachineInstr *ReMatDefMI = VNI->isDefAccurate()
- ? getInstructionFromIndex(VNI->def) : 0;
+ MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
bool dummy;
if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
// Remember how to remat the def of this val#.
@@ -1926,6 +2078,9 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
unsigned PhysReg, VirtRegMap &vrm) {
unsigned SpillReg = getRepresentativeReg(PhysReg);
+ DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg)
+ << " represented by " << tri_->getName(SpillReg) << '\n');
+
for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
// If there are registers which alias PhysReg, but which are not a
// sub-register of the chosen representative super register. Assert
@@ -1937,15 +2092,16 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
SmallVector<unsigned, 4> PRegs;
if (hasInterval(SpillReg))
PRegs.push_back(SpillReg);
- else {
- SmallSet<unsigned, 4> Added;
- for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS)
- if (Added.insert(*AS) && hasInterval(*AS)) {
- PRegs.push_back(*AS);
- for (const unsigned* ASS = tri_->getSubRegisters(*AS); *ASS; ++ASS)
- Added.insert(*ASS);
- }
- }
+ for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR)
+ if (hasInterval(*SR))
+ PRegs.push_back(*SR);
+
+ DEBUG({
+ dbgs() << "Trying to spill:";
+ for (unsigned i = 0, e = PRegs.size(); i != e; ++i)
+ dbgs() << ' ' << tri_->getName(PRegs[i]);
+ dbgs() << '\n';
+ });
SmallPtrSet<MachineInstr*, 8> SeenMIs;
for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
@@ -1956,18 +2112,16 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
continue;
SeenMIs.insert(MI);
SlotIndex Index = getInstructionIndex(MI);
+ bool LiveReg = false;
for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
unsigned PReg = PRegs[i];
LiveInterval &pli = getInterval(PReg);
if (!pli.liveAt(Index))
continue;
- vrm.addEmergencySpill(PReg, MI);
+ LiveReg = true;
SlotIndex StartIdx = Index.getLoadIndex();
SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
- if (pli.isInOneLiveRange(StartIdx, EndIdx)) {
- pli.removeRange(StartIdx, EndIdx);
- Cut = true;
- } else {
+ if (!pli.isInOneLiveRange(StartIdx, EndIdx)) {
std::string msg;
raw_string_ostream Msg(msg);
Msg << "Ran out of registers during register allocation!";
@@ -1978,15 +2132,14 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
}
report_fatal_error(Msg.str());
}
- for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) {
- if (!hasInterval(*AS))
- continue;
- LiveInterval &spli = getInterval(*AS);
- if (spli.liveAt(Index))
- spli.removeRange(Index.getLoadIndex(),
- Index.getNextIndex().getBaseIndex());
- }
+ pli.removeRange(StartIdx, EndIdx);
+ LiveReg = true;
}
+ if (!LiveReg)
+ continue;
+ DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI);
+ vrm.addEmergencySpill(SpillReg, MI);
+ Cut = true;
}
return Cut;
}
@@ -1996,7 +2149,7 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
LiveInterval& Interval = getOrCreateInterval(reg);
VNInfo* VN = Interval.getNextValue(
SlotIndex(getInstructionIndex(startInst).getDefIndex()),
- startInst, true, getVNInfoAllocator());
+ startInst, getVNInfoAllocator());
VN->setHasPHIKill(true);
LiveRange LR(
SlotIndex(getInstructionIndex(startInst).getDefIndex()),
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
new file mode 100644
index 0000000..205f28a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -0,0 +1,315 @@
+//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion represents a coalesced set of live intervals. This may be
+// used during coalescing to represent a congruence class, or during register
+// allocation to model liveness of a physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+
+// Merge a LiveInterval's segments. Guarantee no overlaps.
+void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
+ if (VirtReg.empty())
+ return;
+ ++Tag;
+
+ // Insert each of the virtual register's live segments into the map.
+ LiveInterval::iterator RegPos = VirtReg.begin();
+ LiveInterval::iterator RegEnd = VirtReg.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ for (;;) {
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+ if (++RegPos == RegEnd)
+ return;
+ SegPos.advanceTo(RegPos->start);
+ }
+}
+
+// Remove a live virtual register's segments from this union.
+void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
+ if (VirtReg.empty())
+ return;
+ ++Tag;
+
+ // Remove each of the virtual register's live segments from the map.
+ LiveInterval::iterator RegPos = VirtReg.begin();
+ LiveInterval::iterator RegEnd = VirtReg.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ for (;;) {
+ assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
+ SegPos.erase();
+ if (!SegPos.valid())
+ return;
+
+ // Skip all segments that may have been coalesced.
+ RegPos = VirtReg.advanceTo(RegPos, SegPos.start());
+ if (RegPos == RegEnd)
+ return;
+
+ SegPos.advanceTo(RegPos->start);
+ }
+}
+
+void
+LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ OS << "LIU " << PrintReg(RepReg, TRI);
+ if (empty()) {
+ OS << " empty\n";
+ return;
+ }
+ for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+ OS << " [" << SI.start() << ' ' << SI.stop() << "):"
+ << PrintReg(SI.value()->reg, TRI);
+ }
+ OS << '\n';
+}
+
+void LiveIntervalUnion::InterferenceResult::print(raw_ostream &OS,
+ const TargetRegisterInfo *TRI) const {
+ OS << '[' << start() << ';' << stop() << "):"
+ << PrintReg(interference()->reg, TRI);
+}
+
+void LiveIntervalUnion::Query::print(raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ OS << "Interferences with ";
+ LiveUnion->print(OS, TRI);
+ InterferenceResult IR = firstInterference();
+ while (isInterference(IR)) {
+ OS << " ";
+ IR.print(OS, TRI);
+ OS << '\n';
+ nextInterference(IR);
+ }
+}
+
+#ifndef NDEBUG
+// Verify the live intervals in this union and add them to the visited set.
+void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
+ for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
+ VisitedVRegs.set(SI.value()->reg);
+}
+#endif //!NDEBUG
+
+// Private interface accessed by Query.
+//
+// Find a pair of segments that intersect, one in the live virtual register
+// (LiveInterval), and the other in this LiveIntervalUnion. The caller (Query)
+// is responsible for advancing the LiveIntervalUnion segments to find a
+// "notable" intersection, which requires query-specific logic.
+//
+// This design assumes only a fast mechanism for intersecting a single live
+// virtual register segment with a set of LiveIntervalUnion segments. This may
+// be ok since most virtual registers have very few segments. If we had a data
+// structure that optimizd MxN intersection of segments, then we would bypass
+// the loop that advances within the LiveInterval.
+//
+// If no intersection exists, set VirtRegI = VirtRegEnd, and set SI to the first
+// segment whose start point is greater than LiveInterval's end point.
+//
+// Assumes that segments are sorted by start position in both
+// LiveInterval and LiveSegments.
+void LiveIntervalUnion::Query::findIntersection(InterferenceResult &IR) const {
+ // Search until reaching the end of the LiveUnion segments.
+ LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ if (IR.VirtRegI == VirtRegEnd)
+ return;
+ while (IR.LiveUnionI.valid()) {
+ // Slowly advance the live virtual reg iterator until we surpass the next
+ // segment in LiveUnion.
+ //
+ // Note: If this is ever used for coalescing of fixed registers and we have
+ // a live vreg with thousands of segments, then change this code to use
+ // upperBound instead.
+ IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+ if (IR.VirtRegI == VirtRegEnd)
+ break; // Retain current (nonoverlapping) LiveUnionI
+
+ // VirtRegI may have advanced far beyond LiveUnionI, catch up.
+ IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+
+ // Check if no LiveUnionI exists with VirtRegI->Start < LiveUnionI.end
+ if (!IR.LiveUnionI.valid())
+ break;
+ if (IR.LiveUnionI.start() < IR.VirtRegI->end) {
+ assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+ "upperBound postcondition");
+ break;
+ }
+ }
+ if (!IR.LiveUnionI.valid())
+ IR.VirtRegI = VirtRegEnd;
+}
+
+// Find the first intersection, and cache interference info
+// (retain segment iterators into both VirtReg and LiveUnion).
+const LiveIntervalUnion::InterferenceResult &
+LiveIntervalUnion::Query::firstInterference() {
+ if (CheckedFirstInterference)
+ return FirstInterference;
+ CheckedFirstInterference = true;
+ InterferenceResult &IR = FirstInterference;
+
+ // Quickly skip interference check for empty sets.
+ if (VirtReg->empty() || LiveUnion->empty()) {
+ IR.VirtRegI = VirtReg->end();
+ } else if (VirtReg->beginIndex() < LiveUnion->startIndex()) {
+ // VirtReg starts first, perform double binary search.
+ IR.VirtRegI = VirtReg->find(LiveUnion->startIndex());
+ if (IR.VirtRegI != VirtReg->end())
+ IR.LiveUnionI = LiveUnion->find(IR.VirtRegI->start);
+ } else {
+ // LiveUnion starts first, perform double binary search.
+ IR.LiveUnionI = LiveUnion->find(VirtReg->beginIndex());
+ if (IR.LiveUnionI.valid())
+ IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start());
+ else
+ IR.VirtRegI = VirtReg->end();
+ }
+ findIntersection(FirstInterference);
+ assert((IR.VirtRegI == VirtReg->end() || IR.LiveUnionI.valid())
+ && "Uninitialized iterator");
+ return FirstInterference;
+}
+
+// Treat the result as an iterator and advance to the next interfering pair
+// of segments. This is a plain iterator with no filter.
+bool LiveIntervalUnion::Query::nextInterference(InterferenceResult &IR) const {
+ assert(isInterference(IR) && "iteration past end of interferences");
+
+ // Advance either the VirtReg or LiveUnion segment to ensure that we visit all
+ // unique overlapping pairs.
+ if (IR.VirtRegI->end < IR.LiveUnionI.stop()) {
+ if (++IR.VirtRegI == VirtReg->end())
+ return false;
+ }
+ else {
+ if (!(++IR.LiveUnionI).valid()) {
+ IR.VirtRegI = VirtReg->end();
+ return false;
+ }
+ }
+ // Short-circuit findIntersection() if possible.
+ if (overlap(*IR.VirtRegI, IR.LiveUnionI))
+ return true;
+
+ // Find the next intersection.
+ findIntersection(IR);
+ return isInterference(IR);
+}
+
+// Scan the vector of interfering virtual registers in this union. Assume it's
+// quite small.
+bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+ SmallVectorImpl<LiveInterval*>::const_iterator I =
+ std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
+ return I != InterferingVRegs.end();
+}
+
+// Count the number of virtual registers in this union that interfere with this
+// query's live virtual register.
+//
+// The number of times that we either advance IR.VirtRegI or call
+// LiveUnion.upperBound() will be no more than the number of holes in
+// VirtReg. So each invocation of collectInterferingVRegs() takes
+// time proportional to |VirtReg Holes| * time(LiveUnion.upperBound()).
+//
+// For comments on how to speed it up, see Query::findIntersection().
+unsigned LiveIntervalUnion::Query::
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
+ InterferenceResult IR = firstInterference();
+ LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ LiveInterval *RecentInterferingVReg = NULL;
+ if (IR.VirtRegI != VirtRegEnd) while (IR.LiveUnionI.valid()) {
+ // Advance the union's iterator to reach an unseen interfering vreg.
+ do {
+ if (IR.LiveUnionI.value() == RecentInterferingVReg)
+ continue;
+
+ if (!isSeenInterference(IR.LiveUnionI.value()))
+ break;
+
+ // Cache the most recent interfering vreg to bypass isSeenInterference.
+ RecentInterferingVReg = IR.LiveUnionI.value();
+
+ } while ((++IR.LiveUnionI).valid());
+ if (!IR.LiveUnionI.valid())
+ break;
+
+ // Advance the VirtReg iterator until surpassing the next segment in
+ // LiveUnion.
+ IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+ if (IR.VirtRegI == VirtRegEnd)
+ break;
+
+ // Check for intersection with the union's segment.
+ if (overlap(*IR.VirtRegI, IR.LiveUnionI)) {
+
+ if (!IR.LiveUnionI.value()->isSpillable())
+ SeenUnspillableVReg = true;
+
+ if (InterferingVRegs.size() == MaxInterferingRegs)
+ // Leave SeenAllInterferences set to false to indicate that at least one
+ // interference exists beyond those we collected.
+ return MaxInterferingRegs;
+
+ InterferingVRegs.push_back(IR.LiveUnionI.value());
+
+ // Cache the most recent interfering vreg to bypass isSeenInterference.
+ RecentInterferingVReg = IR.LiveUnionI.value();
+ ++IR.LiveUnionI;
+ continue;
+ }
+ // VirtRegI may have advanced far beyond LiveUnionI,
+ // do a fast intersection test to "catch up"
+ IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+ }
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+}
+
+bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
+ // VirtReg is likely live throughout the loop, so start by checking LIU-Loop
+ // overlaps.
+ IntervalMapOverlaps<LiveIntervalUnion::Map, MachineLoopRange::Map>
+ Overlaps(LiveUnion->getMap(), Loop->getMap());
+ if (!Overlaps.valid())
+ return false;
+
+ // The loop is overlapping an LIU assignment. Check VirtReg as well.
+ LiveInterval::iterator VRI = VirtReg->find(Overlaps.start());
+
+ for (;;) {
+ if (VRI == VirtReg->end())
+ return false;
+ if (VRI->start < Overlaps.stop())
+ return true;
+
+ Overlaps.advanceTo(VRI->start);
+ if (!Overlaps.valid())
+ return false;
+ if (Overlaps.start() < VRI->end)
+ return true;
+
+ VRI = VirtReg->advanceTo(VRI, Overlaps.start());
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h
new file mode 100644
index 0000000..6f9c5f4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h
@@ -0,0 +1,258 @@
+//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion is a union of live segments across multiple live virtual
+// registers. This may be used during coalescing to represent a congruence
+// class, or during register allocation to model liveness of a physical
+// register.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEINTERVALUNION
+#define LLVM_CODEGEN_LIVEINTERVALUNION
+
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+#include <algorithm>
+
+namespace llvm {
+
+class MachineLoopRange;
+class TargetRegisterInfo;
+
+#ifndef NDEBUG
+// forward declaration
+template <unsigned Element> class SparseBitVector;
+typedef SparseBitVector<128> LiveVirtRegBitSet;
+#endif
+
+/// Compare a live virtual register segment to a LiveIntervalUnion segment.
+inline bool
+overlap(const LiveRange &VRSeg,
+ const IntervalMap<SlotIndex, LiveInterval*>::const_iterator &LUSeg) {
+ return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end;
+}
+
+/// Union of live intervals that are strong candidates for coalescing into a
+/// single register (either physical or virtual depending on the context). We
+/// expect the constituent live intervals to be disjoint, although we may
+/// eventually make exceptions to handle value-based interference.
+class LiveIntervalUnion {
+ // A set of live virtual register segments that supports fast insertion,
+ // intersection, and removal.
+ // Mapping SlotIndex intervals to virtual register numbers.
+ typedef IntervalMap<SlotIndex, LiveInterval*> LiveSegments;
+
+public:
+ // SegmentIter can advance to the next segment ordered by starting position
+ // which may belong to a different live virtual register. We also must be able
+ // to reach the current segment's containing virtual register.
+ typedef LiveSegments::iterator SegmentIter;
+
+ // LiveIntervalUnions share an external allocator.
+ typedef LiveSegments::Allocator Allocator;
+
+ class InterferenceResult;
+ class Query;
+
+private:
+ const unsigned RepReg; // representative register number
+ unsigned Tag; // unique tag for current contents.
+ LiveSegments Segments; // union of virtual reg segments
+
+public:
+ LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a)
+ {}
+
+ // Iterate over all segments in the union of live virtual registers ordered
+ // by their starting position.
+ SegmentIter begin() { return Segments.begin(); }
+ SegmentIter end() { return Segments.end(); }
+ SegmentIter find(SlotIndex x) { return Segments.find(x); }
+ bool empty() const { return Segments.empty(); }
+ SlotIndex startIndex() const { return Segments.start(); }
+
+ // Provide public access to the underlying map to allow overlap iteration.
+ typedef LiveSegments Map;
+ const Map &getMap() { return Segments; }
+
+ /// getTag - Return an opaque tag representing the current state of the union.
+ unsigned getTag() const { return Tag; }
+
+ /// changedSince - Return true if the union change since getTag returned tag.
+ bool changedSince(unsigned tag) const { return tag != Tag; }
+
+ // Add a live virtual register to this union and merge its segments.
+ void unify(LiveInterval &VirtReg);
+
+ // Remove a live virtual register's segments from this union.
+ void extract(LiveInterval &VirtReg);
+
+ // Print union, using TRI to translate register names
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+
+#ifndef NDEBUG
+ // Verify the live intervals in this union and add them to the visited set.
+ void verify(LiveVirtRegBitSet& VisitedVRegs);
+#endif
+
+ /// Cache a single interference test result in the form of two intersecting
+ /// segments. This allows efficiently iterating over the interferences. The
+ /// iteration logic is handled by LiveIntervalUnion::Query which may
+ /// filter interferences depending on the type of query.
+ class InterferenceResult {
+ friend class Query;
+
+ LiveInterval::iterator VirtRegI; // current position in VirtReg
+ SegmentIter LiveUnionI; // current position in LiveUnion
+
+ // Internal ctor.
+ InterferenceResult(LiveInterval::iterator VRegI, SegmentIter UnionI)
+ : VirtRegI(VRegI), LiveUnionI(UnionI) {}
+
+ public:
+ // Public default ctor.
+ InterferenceResult(): VirtRegI(), LiveUnionI() {}
+
+ /// start - Return the start of the current overlap.
+ SlotIndex start() const {
+ return std::max(VirtRegI->start, LiveUnionI.start());
+ }
+
+ /// stop - Return the end of the current overlap.
+ SlotIndex stop() const {
+ return std::min(VirtRegI->end, LiveUnionI.stop());
+ }
+
+ /// interference - Return the register that is interfering here.
+ LiveInterval *interference() const { return LiveUnionI.value(); }
+
+ // Note: this interface provides raw access to the iterators because the
+ // result has no way to tell if it's valid to dereference them.
+
+ // Access the VirtReg segment.
+ LiveInterval::iterator virtRegPos() const { return VirtRegI; }
+
+ // Access the LiveUnion segment.
+ const SegmentIter &liveUnionPos() const { return LiveUnionI; }
+
+ bool operator==(const InterferenceResult &IR) const {
+ return VirtRegI == IR.VirtRegI && LiveUnionI == IR.LiveUnionI;
+ }
+ bool operator!=(const InterferenceResult &IR) const {
+ return !operator==(IR);
+ }
+
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+ };
+
+ /// Query interferences between a single live virtual register and a live
+ /// interval union.
+ class Query {
+ LiveIntervalUnion *LiveUnion;
+ LiveInterval *VirtReg;
+ InterferenceResult FirstInterference;
+ SmallVector<LiveInterval*,4> InterferingVRegs;
+ bool CheckedFirstInterference;
+ bool SeenAllInterferences;
+ bool SeenUnspillableVReg;
+ unsigned Tag;
+
+ public:
+ Query(): LiveUnion(), VirtReg() {}
+
+ Query(LiveInterval *VReg, LiveIntervalUnion *LIU):
+ LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false),
+ SeenAllInterferences(false), SeenUnspillableVReg(false)
+ {}
+
+ void clear() {
+ LiveUnion = NULL;
+ VirtReg = NULL;
+ InterferingVRegs.clear();
+ CheckedFirstInterference = false;
+ SeenAllInterferences = false;
+ SeenUnspillableVReg = false;
+ Tag = 0;
+ }
+
+ void init(LiveInterval *VReg, LiveIntervalUnion *LIU) {
+ assert(VReg && LIU && "Invalid arguments");
+ if (VirtReg == VReg && LiveUnion == LIU && !LIU->changedSince(Tag)) {
+ // Retain cached results, e.g. firstInterference.
+ return;
+ }
+ clear();
+ LiveUnion = LIU;
+ VirtReg = VReg;
+ Tag = LIU->getTag();
+ }
+
+ LiveInterval &virtReg() const {
+ assert(VirtReg && "uninitialized");
+ return *VirtReg;
+ }
+
+ bool isInterference(const InterferenceResult &IR) const {
+ if (IR.VirtRegI != VirtReg->end()) {
+ assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+ "invalid segment iterators");
+ return true;
+ }
+ return false;
+ }
+
+ // Does this live virtual register interfere with the union?
+ bool checkInterference() { return isInterference(firstInterference()); }
+
+ // Get the first pair of interfering segments, or a noninterfering result.
+ // This initializes the firstInterference_ cache.
+ const InterferenceResult &firstInterference();
+
+ // Treat the result as an iterator and advance to the next interfering pair
+ // of segments. Visiting each unique interfering pairs means that the same
+ // VirtReg or LiveUnion segment may be visited multiple times.
+ bool nextInterference(InterferenceResult &IR) const;
+
+ // Count the virtual registers in this union that interfere with this
+ // query's live virtual register, up to maxInterferingRegs.
+ unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
+
+ // Was this virtual register visited during collectInterferingVRegs?
+ bool isSeenInterference(LiveInterval *VReg) const;
+
+ // Did collectInterferingVRegs collect all interferences?
+ bool seenAllInterferences() const { return SeenAllInterferences; }
+
+ // Did collectInterferingVRegs encounter an unspillable vreg?
+ bool seenUnspillableVReg() const { return SeenUnspillableVReg; }
+
+ // Vector generated by collectInterferingVRegs.
+ const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
+ return InterferingVRegs;
+ }
+
+ /// checkLoopInterference - Return true if there is interference overlapping
+ /// Loop.
+ bool checkLoopInterference(MachineLoopRange*);
+
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI);
+ private:
+ Query(const Query&); // DO NOT IMPLEMENT
+ void operator=(const Query&); // DO NOT IMPLEMENT
+
+ // Private interface for queries
+ void findIntersection(InterferenceResult &IR) const;
+ };
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION)
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
new file mode 100644
index 0000000..3bbda1c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -0,0 +1,129 @@
+//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//===----------------------------------------------------------------------===//
+
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+LiveInterval &LiveRangeEdit::create(MachineRegisterInfo &mri,
+ LiveIntervals &lis,
+ VirtRegMap &vrm) {
+ const TargetRegisterClass *RC = mri.getRegClass(getReg());
+ unsigned VReg = mri.createVirtualRegister(RC);
+ vrm.grow();
+ vrm.setIsSplitFromReg(VReg, vrm.getOriginal(getReg()));
+ LiveInterval &li = lis.getOrCreateInterval(VReg);
+ newRegs_.push_back(&li);
+ return li;
+}
+
+void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa) {
+ for (LiveInterval::vni_iterator I = parent_.vni_begin(),
+ E = parent_.vni_end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
+ if (!DefMI)
+ continue;
+ if (tii.isTriviallyReMaterializable(DefMI, aa))
+ remattable_.insert(VNI);
+ }
+ scannedRemattable_ = true;
+}
+
+bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa) {
+ if (!scannedRemattable_)
+ scanRemattable(lis, tii, aa);
+ return !remattable_.empty();
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
+ SlotIndex OrigIdx,
+ SlotIndex UseIdx,
+ LiveIntervals &lis) {
+ OrigIdx = OrigIdx.getUseIndex();
+ UseIdx = UseIdx.getUseIndex();
+ for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OrigMI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || MO.getReg() == getReg())
+ continue;
+ // Reserved registers are OK.
+ if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
+ continue;
+ // We don't want to move any defs.
+ if (MO.isDef())
+ return false;
+ // We cannot depend on virtual registers in uselessRegs_.
+ for (unsigned ui = 0, ue = uselessRegs_.size(); ui != ue; ++ui)
+ if (uselessRegs_[ui]->reg == MO.getReg())
+ return false;
+
+ LiveInterval &li = lis.getInterval(MO.getReg());
+ const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
+ if (!OVNI)
+ continue;
+ if (OVNI != li.getVNInfoAt(UseIdx))
+ return false;
+ }
+ return true;
+}
+
+bool LiveRangeEdit::canRematerializeAt(Remat &RM,
+ SlotIndex UseIdx,
+ bool cheapAsAMove,
+ LiveIntervals &lis) {
+ assert(scannedRemattable_ && "Call anyRematerializable first");
+
+ // Use scanRemattable info.
+ if (!remattable_.count(RM.ParentVNI))
+ return false;
+
+ // No defining instruction.
+ RM.OrigMI = lis.getInstructionFromIndex(RM.ParentVNI->def);
+ assert(RM.OrigMI && "Defining instruction for remattable value disappeared");
+
+ // If only cheap remats were requested, bail out early.
+ if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+ return false;
+
+ // Verify that all used registers are available with the same values.
+ if (!allUsesAvailableAt(RM.OrigMI, RM.ParentVNI->def, UseIdx, lis))
+ return false;
+
+ return true;
+}
+
+SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg,
+ const Remat &RM,
+ LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri) {
+ assert(RM.OrigMI && "Invalid remat");
+ tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+ rematted_.insert(RM.ParentVNI);
+ return lis.InsertMachineInstrInMaps(--MI).getDefIndex();
+}
+
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.h b/contrib/llvm/lib/CodeGen/LiveRangeEdit.h
new file mode 100644
index 0000000..73f69ed
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.h
@@ -0,0 +1,135 @@
+//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//
+// The parent register is never changed. Instead, a number of new virtual
+// registers are created and added to the newRegs vector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
+#define LLVM_CODEGEN_LIVERANGEEDIT_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class LiveIntervals;
+class MachineRegisterInfo;
+class VirtRegMap;
+
+class LiveRangeEdit {
+ LiveInterval &parent_;
+ SmallVectorImpl<LiveInterval*> &newRegs_;
+ const SmallVectorImpl<LiveInterval*> &uselessRegs_;
+
+ /// firstNew_ - Index of the first register added to newRegs_.
+ const unsigned firstNew_;
+
+ /// scannedRemattable_ - true when remattable values have been identified.
+ bool scannedRemattable_;
+
+ /// remattable_ - Values defined by remattable instructions as identified by
+ /// tii.isTriviallyReMaterializable().
+ SmallPtrSet<VNInfo*,4> remattable_;
+
+ /// rematted_ - Values that were actually rematted, and so need to have their
+ /// live range trimmed or entirely removed.
+ SmallPtrSet<VNInfo*,4> rematted_;
+
+ /// scanRemattable - Identify the parent_ values that may rematerialize.
+ void scanRemattable(LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa);
+
+ /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+ /// OrigIdx are also available with the same value at UseIdx.
+ bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+ SlotIndex UseIdx, LiveIntervals &lis);
+
+public:
+ /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
+ /// @param parent The register being spilled or split.
+ /// @param newRegs List to receive any new registers created. This needn't be
+ /// empty initially, any existing registers are ignored.
+ /// @param uselessRegs List of registers that can't be used when
+ /// rematerializing values because they are about to be removed.
+ LiveRangeEdit(LiveInterval &parent,
+ SmallVectorImpl<LiveInterval*> &newRegs,
+ const SmallVectorImpl<LiveInterval*> &uselessRegs)
+ : parent_(parent), newRegs_(newRegs), uselessRegs_(uselessRegs),
+ firstNew_(newRegs.size()), scannedRemattable_(false) {}
+
+ LiveInterval &getParent() const { return parent_; }
+ unsigned getReg() const { return parent_.reg; }
+
+ /// Iterator for accessing the new registers added by this edit.
+ typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
+ iterator begin() const { return newRegs_.begin()+firstNew_; }
+ iterator end() const { return newRegs_.end(); }
+ unsigned size() const { return newRegs_.size()-firstNew_; }
+ bool empty() const { return size() == 0; }
+ LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
+
+ /// create - Create a new register with the same class and stack slot as
+ /// parent.
+ LiveInterval &create(MachineRegisterInfo&, LiveIntervals&, VirtRegMap&);
+
+ /// anyRematerializable - Return true if any parent values may be
+ /// rematerializable.
+ /// This function must be called before ny rematerialization is attempted.
+ bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
+ AliasAnalysis*);
+
+ /// Remat - Information needed to rematerialize at a specific location.
+ struct Remat {
+ VNInfo *ParentVNI; // parent_'s value at the remat location.
+ MachineInstr *OrigMI; // Instruction defining ParentVNI.
+ explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
+ };
+
+ /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
+ /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
+ /// When cheapAsAMove is set, only cheap remats are allowed.
+ bool canRematerializeAt(Remat &RM,
+ SlotIndex UseIdx,
+ bool cheapAsAMove,
+ LiveIntervals &lis);
+
+ /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
+ /// instruction into MBB before MI. The new instruction is mapped, but
+ /// liveness is not updated.
+ /// Return the SlotIndex of the new instruction.
+ SlotIndex rematerializeAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg,
+ const Remat &RM,
+ LiveIntervals&,
+ const TargetInstrInfo&,
+ const TargetRegisterInfo&);
+
+ /// markRematerialized - explicitly mark a value as rematerialized after doing
+ /// it manually.
+ void markRematerialized(VNInfo *ParentVNI) {
+ rematted_.insert(ParentVNI);
+ }
+
+ /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
+ bool didRematerialize(VNInfo *ParentVNI) const {
+ return rematted_.count(ParentVNI);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
index b5c385f..c75196a 100644
--- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
@@ -26,7 +26,9 @@ using namespace llvm;
char LiveStacks::ID = 0;
INITIALIZE_PASS(LiveStacks, "livestacks",
- "Live Stack Slot Analysis", false, false);
+ "Live Stack Slot Analysis", false, false)
+
+char &llvm::LiveStacksID = LiveStacks::ID;
void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -48,6 +50,22 @@ bool LiveStacks::runOnMachineFunction(MachineFunction &) {
return false;
}
+LiveInterval &
+LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
+ assert(Slot >= 0 && "Spill slot indice must be >= 0");
+ SS2IntervalMap::iterator I = S2IMap.find(Slot);
+ if (I == S2IMap.end()) {
+ I = S2IMap.insert(I, std::make_pair(Slot,
+ LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+ S2RCMap.insert(std::make_pair(Slot, RC));
+ } else {
+ // Use the largest common subclass register class.
+ const TargetRegisterClass *OldRC = S2RCMap[Slot];
+ S2RCMap[Slot] = getCommonSubClass(OldRC, RC);
+ }
+ return I->second;
+}
+
/// print - Implement the dump method.
void LiveStacks::print(raw_ostream &OS, const Module*) const {
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index 375307b..dd43ef2 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -31,7 +31,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -42,8 +41,11 @@
using namespace llvm;
char LiveVariables::ID = 0;
-INITIALIZE_PASS(LiveVariables, "livevars",
- "Live Variable Analysis", false, false);
+INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -79,13 +81,7 @@ void LiveVariables::VarInfo::dump() const {
LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
"getVarInfo: not a virtual register!");
- RegIdx -= TargetRegisterInfo::FirstVirtualRegister;
- if (RegIdx >= VirtRegInfo.size()) {
- if (RegIdx >= 2*VirtRegInfo.size())
- VirtRegInfo.resize(RegIdx*2);
- else
- VirtRegInfo.resize(2*VirtRegInfo.size());
- }
+ VirtRegInfo.grow(RegIdx);
return VirtRegInfo[RegIdx];
}
@@ -498,9 +494,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
PHIJoins.clear();
- /// Get some space for a respectable number of registers.
- VirtRegInfo.resize(64);
-
analyzePHINodes(mf);
// Calculate live variable information in depth first order on the CFG of the
@@ -628,19 +621,14 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// Convert and transfer the dead / killed information we have gathered into
// VirtRegInfo onto MI's.
- for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i)
- for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j)
- if (VirtRegInfo[i].Kills[j] ==
- MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister))
- VirtRegInfo[i]
- .Kills[j]->addRegisterDead(i +
- TargetRegisterInfo::FirstVirtualRegister,
- TRI);
+ for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
+ const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
+ if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
+ VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
else
- VirtRegInfo[i]
- .Kills[j]->addRegisterKilled(i +
- TargetRegisterInfo::FirstVirtualRegister,
- TRI);
+ VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
+ }
// Check to make sure there are no unreachable blocks in the MC CFG for the
// function. If so, it is due to a bug in the instruction selector or some
@@ -775,8 +763,8 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
// Update info for all live variables
- for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
- E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
VarInfo &VI = getVarInfo(Reg);
if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
VI.AliveBlocks.set(NumNew);
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 7e366f0..1318d62 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -9,7 +9,7 @@
//
// This pass assigns local frame indices to stack slots relative to one another
// and allocates additional base registers to access them when the target
-// estimates the are likely to be out of range of stack pointer and frame
+// estimates they are likely to be out of range of stack pointer and frame
// pointer relative addressing.
//
//===----------------------------------------------------------------------===//
@@ -34,7 +34,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
using namespace llvm;
@@ -152,9 +152,9 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Loop over all of the stack objects, assigning sequential addresses...
MachineFrameInfo *MFI = Fn.getFrameInfo();
- const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
bool StackGrowsDown =
- TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
int64_t Offset = 0;
unsigned MaxAlign = 0;
@@ -227,27 +227,28 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
MachineFrameInfo *MFI = Fn.getFrameInfo();
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
- const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
bool StackGrowsDown =
- TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
- MachineBasicBlock::iterator InsertionPt = Fn.begin()->begin();
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
// Collect all of the instructions in the block that reference
// a frame index. Also store the frame index referenced to ease later
// lookup. (For any insn that has more than one FI reference, we arbitrarily
// choose the first one).
SmallVector<FrameRef, 64> FrameReferenceInsns;
- // A base register definition is a register+offset pair.
- SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
+ // A base register definition is a register + offset pair.
+ SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
MachineInstr *MI = I;
+
// Debug value instructions can't be out of range, so they don't need
// any updates.
if (MI->isDebugValue())
continue;
+
// For now, allocate the base register(s) within the basic block
// where they're used, and don't try to keep them around outside
// of that. It may be beneficial to try sharing them more broadly
@@ -268,11 +269,13 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
}
}
}
+
// Sort the frame references by local offset
array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+ MachineBasicBlock *Entry = Fn.begin();
- // Loop throught the frame references and allocate for them as necessary
+ // Loop through the frame references and allocate for them as necessary.
for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
MachineBasicBlock::iterator I =
FrameReferenceInsns[ref].getMachineInstr();
@@ -321,10 +324,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
DEBUG(dbgs() << " Materializing base register " << BaseReg <<
" at frame local offset " <<
LocalOffsets[FrameIdx] + InstrOffset << "\n");
+
// Tell the target to insert the instruction to initialize
// the base register.
- TRI->materializeFrameBaseRegister(InsertionPt, BaseReg,
- FrameIdx, InstrOffset);
+ // MachineBasicBlock::iterator InsertionPt = Entry->begin();
+ TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+ InstrOffset);
// The base register already includes any offset specified
// by the instruction, so account for that so it doesn't get
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 50f3f67..ccbff0a 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -146,27 +147,46 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
return I;
}
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+ while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+ ++I;
+ return I;
+}
+
MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
iterator I = end();
- while (I != begin() && (--I)->getDesc().isTerminator())
+ while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
; /*noop */
- if (I != end() && !I->getDesc().isTerminator()) ++I;
+ while (I != end() && !I->getDesc().isTerminator())
+ ++I;
return I;
}
-void MachineBasicBlock::dump() const {
- print(dbgs());
+MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
+ iterator B = begin(), I = end();
+ while (I != B) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
+ // A block with a landing pad successor only has one other successor.
+ if (succ_size() > 2)
+ return 0;
+ for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+ if ((*I)->isLandingPad())
+ return *I;
+ return 0;
}
-static inline void OutputReg(raw_ostream &os, unsigned RegNo,
- const TargetRegisterInfo *TRI = 0) {
- if (RegNo != 0 && TargetRegisterInfo::isPhysicalRegister(RegNo)) {
- if (TRI)
- os << " %" << TRI->get(RegNo).Name;
- else
- os << " %physreg" << RegNo;
- } else
- os << " %reg" << RegNo;
+void MachineBasicBlock::dump() const {
+ print(dbgs());
}
StringRef MachineBasicBlock::getName() const {
@@ -176,7 +196,7 @@ StringRef MachineBasicBlock::getName() const {
return "(null)";
}
-void MachineBasicBlock::print(raw_ostream &OS) const {
+void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
const MachineFunction *MF = getParent();
if (!MF) {
OS << "Can't print out MachineBasicBlock because parent MachineFunction"
@@ -186,6 +206,9 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
+ if (Indexes)
+ OS << Indexes->getMBBStartIdx(this) << '\t';
+
OS << "BB#" << getNumber() << ": ";
const char *Comma = "";
@@ -198,28 +221,36 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
OS << '\n';
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
if (!livein_empty()) {
+ if (Indexes) OS << '\t';
OS << " Live Ins:";
for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
- OutputReg(OS, *I, TRI);
+ OS << ' ' << PrintReg(*I, TRI);
OS << '\n';
}
// Print the preds of this block according to the CFG.
if (!pred_empty()) {
+ if (Indexes) OS << '\t';
OS << " Predecessors according to CFG:";
for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
OS << " BB#" << (*PI)->getNumber();
OS << '\n';
}
-
+
for (const_iterator I = begin(); I != end(); ++I) {
+ if (Indexes) {
+ if (Indexes->hasIndex(I))
+ OS << Indexes->getInstructionIndex(I);
+ OS << '\t';
+ }
OS << '\t';
I->print(OS, &getParent()->getTarget());
}
// Print the successors of this block according to the CFG.
if (!succ_empty()) {
+ if (Indexes) OS << '\t';
OS << " Successors according to CFG:";
for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
OS << " BB#" << (*SI)->getNumber();
@@ -431,14 +462,24 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
MachineFunction *MF = getParent();
DebugLoc dl; // FIXME: this is nowhere
- // We may need to update this's terminator, but we can't do that if AnalyzeBranch
- // fails. If this uses a jump table, we won't touch it.
+ // We may need to update this's terminator, but we can't do that if
+ // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
return NULL;
+ // Avoid bugpoint weirdness: A block may end with a conditional branch but
+ // jumps to the same MBB is either case. We have duplicate CFG edges in that
+ // case that we can't handle. Since this never happens in properly optimized
+ // code, just skip those edges.
+ if (TBB && TBB == FBB) {
+ DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
+ << getNumber() << '\n');
+ return NULL;
+ }
+
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
DEBUG(dbgs() << "Splitting critical edge:"
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 272b54d..07a7d27 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -22,15 +22,18 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
-STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated");
+STATISTIC(NumPhysCSEs,
+ "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
namespace {
class MachineCSE : public MachineFunctionPass {
@@ -41,7 +44,9 @@ namespace {
MachineRegisterInfo *MRI;
public:
static char ID; // Pass identification
- MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {}
+ MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
+ initializeMachineCSEPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -61,10 +66,13 @@ namespace {
private:
const unsigned LookAheadLimit;
- typedef ScopedHashTableScope<MachineInstr*, unsigned,
- MachineInstrExpressionTrait> ScopeType;
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
+ typedef ScopedHashTable<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
+ typedef ScopedHTType::ScopeTy ScopeType;
DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
- ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
+ ScopedHTType VNT;
SmallVector<MachineInstr*, 64> Exps;
unsigned CurrVN;
@@ -72,11 +80,11 @@ namespace {
bool isPhysDefTriviallyDead(unsigned Reg,
MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator E) const ;
- bool hasLivePhysRegDefUse(const MachineInstr *MI,
- const MachineBasicBlock *MBB,
- unsigned &PhysDef) const;
- bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
- unsigned PhysDef) const;
+ bool hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs) const;
+ bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs) const;
bool isCSECandidate(MachineInstr *MI);
bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI);
@@ -91,8 +99,12 @@ namespace {
} // end anonymous namespace
char MachineCSE::ID = 0;
-INITIALIZE_PASS(MachineCSE, "machine-cse",
- "Machine Common Subexpression Elimination", false, false);
+INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
@@ -104,7 +116,7 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
if (!MO.isReg() || !MO.isUse())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (!MRI->hasOneNonDBGUse(Reg))
// Only coalesce single use copies. This ensure the copy will be
@@ -120,17 +132,12 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
continue;
if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
continue;
- const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC);
- if (!NewRC)
+ if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
continue;
DEBUG(dbgs() << "Coalescing: " << *DefMI);
- DEBUG(dbgs() << "*** to: " << *MI);
+ DEBUG(dbgs() << "*** to: " << *MI);
MO.setReg(SrcReg);
MRI->clearKillFlags(SrcReg);
- if (NewRC != SRC)
- MRI->setRegClass(SrcReg, NewRC);
DefMI->eraseFromParent();
++NumCoalesces;
Changed = true;
@@ -176,14 +183,14 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
return false;
}
-/// hasLivePhysRegDefUse - Return true if the specified instruction read / write
+/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
/// physical registers (except for dead defs of physical registers). It also
/// returns the physical register def by reference if it's the only one and the
/// instruction does not uses a physical register.
-bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
- const MachineBasicBlock *MBB,
- unsigned &PhysDef) const {
- PhysDef = 0;
+bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs) const {
+ MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
@@ -193,35 +200,22 @@ bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- if (MO.isUse()) {
- // Can't touch anything to read a physical register.
- PhysDef = 0;
- return true;
- }
- if (MO.isDead())
- // If the def is dead, it's ok.
- continue;
- // Ok, this is a physical register def that's not marked "dead". That's
+ // If the def is dead, it's ok. But the def may not marked "dead". That's
// common since this pass is run before livevariables. We can scan
// forward a few instructions and check if it is obviously dead.
- if (PhysDef) {
- // Multiple physical register defs. These are rare, forget about it.
- PhysDef = 0;
- return true;
- }
- PhysDef = Reg;
+ if (MO.isDef() &&
+ (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
+ continue;
+ PhysRefs.insert(Reg);
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ PhysRefs.insert(*Alias);
}
- if (PhysDef) {
- MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
- if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end()))
- return true;
- }
- return false;
+ return !PhysRefs.empty();
}
-bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
- unsigned PhysDef) const {
+bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs) const {
// For now conservatively returns false if the common subexpression is
// not in the same basic block as the given instruction.
MachineBasicBlock *MBB = MI->getParent();
@@ -237,8 +231,17 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
if (I == E)
return true;
- if (I->modifiesRegister(PhysDef, TRI))
- return false;
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ continue;
+ if (PhysRefs.count(MOReg))
+ return false;
+ }
--LookAheadLeft;
++I;
@@ -259,7 +262,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
// Ignore stuff that we obviously can't move.
const TargetInstrDesc &TID = MI->getDesc();
if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
- TID.hasUnmodeledSideEffects())
+ MI->hasUnmodeledSideEffects())
return false;
if (TID.mayLoad()) {
@@ -281,14 +284,13 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.
- // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an
- // immediate predecessor. We don't want to increase register pressure and end up
- // causing other computation to be spilled.
+ // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
+ // an immediate predecessor. We don't want to increase register pressure and
+ // end up causing other computation to be spilled.
if (MI->getDesc().isAsCheapAsAMove()) {
MachineBasicBlock *CSBB = CSMI->getParent();
MachineBasicBlock *BB = MI->getParent();
- if (CSBB != BB &&
- find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end())
+ if (CSBB != BB && !CSBB->isSuccessor(BB))
return false;
}
@@ -297,7 +299,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
bool HasVRegUse = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isUse() && MO.getReg() &&
+ if (MO.isReg() && MO.isUse() &&
TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
HasVRegUse = true;
break;
@@ -359,7 +361,6 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
if (!isCSECandidate(MI))
continue;
- bool DefPhys = false;
bool FoundCSE = VNT.count(MI);
if (!FoundCSE) {
// Look for trivial copy coalescing opportunities.
@@ -370,24 +371,37 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
FoundCSE = VNT.count(MI);
}
}
- // FIXME: commute commutable instructions?
- // If the instruction defines a physical register and the value *may* be
+ // Commute commutable instructions.
+ bool Commuted = false;
+ if (!FoundCSE && MI->getDesc().isCommutable()) {
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+ if (NewMI) {
+ Commuted = true;
+ FoundCSE = VNT.count(NewMI);
+ if (NewMI != MI)
+ // New instruction. It doesn't need to be kept.
+ NewMI->eraseFromParent();
+ else if (!FoundCSE)
+ // MI was changed but it didn't help, commute it back!
+ (void)TII->commuteInstruction(MI);
+ }
+ }
+
+ // If the instruction defines physical registers and the values *may* be
// used, then it's not safe to replace it with a common subexpression.
- unsigned PhysDef = 0;
- if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) {
+ // It's also not safe if the instruction uses physical registers.
+ SmallSet<unsigned,8> PhysRefs;
+ if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
FoundCSE = false;
// ... Unless the CS is local and it also defines the physical register
- // which is not clobbered in between.
- if (PhysDef) {
- unsigned CSVN = VNT.lookup(MI);
- MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefReaches(CSMI, MI, PhysDef)) {
- FoundCSE = true;
- DefPhys = true;
- }
- }
+ // which is not clobbered in between and the physical register uses
+ // were not clobbered.
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+ FoundCSE = true;
}
if (!FoundCSE) {
@@ -432,8 +446,10 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
}
MI->eraseFromParent();
++NumCSEs;
- if (DefPhys)
+ if (!PhysRefs.empty())
++NumPhysCSEs;
+ if (Commuted)
+ ++NumCommutes;
} else {
DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
VNT.insert(MI, CurrVN++);
diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
index 3c67478..04c8ecb 100644
--- a/contrib/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
@@ -25,7 +25,7 @@ TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
char MachineDominatorTree::ID = 0;
INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
- "MachineDominator Tree Construction", true, true);
+ "MachineDominator Tree Construction", true, true)
char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
@@ -42,6 +42,7 @@ bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
MachineDominatorTree::MachineDominatorTree()
: MachineFunctionPass(ID) {
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
DT = new DominatorTreeBase<MachineBasicBlock>(false);
}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index 0171700..8553240 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -33,7 +33,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/GraphWriter.h"
@@ -52,14 +52,15 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
}
MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
- unsigned FunctionNum, MachineModuleInfo &mmi)
- : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) {
+ unsigned FunctionNum, MachineModuleInfo &mmi,
+ GCModuleInfo* gmi)
+ : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
if (TM.getRegisterInfo())
RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
else
RegInfo = 0;
MFInfo = 0;
- FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameInfo());
+ FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
if (Fn->hasFnAttr(Attribute::StackAlignment))
FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
Fn->getAttributes().getFnAttributes()));
@@ -190,20 +191,21 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
}
MachineMemOperand *
-MachineFunction::getMachineMemOperand(const Value *v, unsigned f,
- int64_t o, uint64_t s,
- unsigned base_alignment) {
- return new (Allocator) MachineMemOperand(v, f, o, s, base_alignment);
+MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
+ uint64_t s, unsigned base_alignment,
+ const MDNode *TBAAInfo) {
+ return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
+ TBAAInfo);
}
MachineMemOperand *
MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {
return new (Allocator)
- MachineMemOperand(MMO->getValue(), MMO->getFlags(),
- int64_t(uint64_t(MMO->getOffset()) +
- uint64_t(Offset)),
- Size, MMO->getBaseAlignment());
+ MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+ MMO->getOffset()+Offset),
+ MMO->getFlags(), Size,
+ MMO->getBaseAlignment(), 0);
}
MachineInstr::mmo_iterator
@@ -231,10 +233,10 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
else {
// Clone the MMO and unset the store flag.
MachineMemOperand *JustLoad =
- getMachineMemOperand((*I)->getValue(),
+ getMachineMemOperand((*I)->getPointerInfo(),
(*I)->getFlags() & ~MachineMemOperand::MOStore,
- (*I)->getOffset(), (*I)->getSize(),
- (*I)->getBaseAlignment());
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getTBAAInfo());
Result[Index] = JustLoad;
}
++Index;
@@ -263,10 +265,10 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
else {
// Clone the MMO and unset the load flag.
MachineMemOperand *JustStore =
- getMachineMemOperand((*I)->getValue(),
+ getMachineMemOperand((*I)->getPointerInfo(),
(*I)->getFlags() & ~MachineMemOperand::MOLoad,
- (*I)->getOffset(), (*I)->getSize(),
- (*I)->getBaseAlignment());
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getTBAAInfo());
Result[Index] = JustStore;
}
++Index;
@@ -279,7 +281,7 @@ void MachineFunction::dump() const {
print(dbgs());
}
-void MachineFunction::print(raw_ostream &OS) const {
+void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << "# Machine code for function " << Fn->getName() << ":\n";
// Print Frame Information
@@ -328,7 +330,7 @@ void MachineFunction::print(raw_ostream &OS) const {
for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
OS << '\n';
- BB->print(OS);
+ BB->print(OS, Indexes);
}
OS << "\n# End machine code for function " << Fn->getName() << ".\n\n";
@@ -346,17 +348,15 @@ namespace llvm {
std::string getNodeLabel(const MachineBasicBlock *Node,
const MachineFunction *Graph) {
- if (isSimple () && Node->getBasicBlock() &&
- !Node->getBasicBlock()->getName().empty())
- return Node->getBasicBlock()->getNameStr() + ":";
-
std::string OutStr;
{
raw_string_ostream OSS(OutStr);
-
- if (isSimple())
- OSS << Node->getNumber() << ':';
- else
+
+ if (isSimple()) {
+ OSS << "BB#" << Node->getNumber();
+ if (const BasicBlock *BB = Node->getBasicBlock())
+ OSS << ": " << BB->getName();
+ } else
Node->print(OSS);
}
@@ -396,7 +396,8 @@ void MachineFunction::viewCFGOnly() const
/// addLiveIn - Add the specified physical register as a live-in value and
/// create a corresponding virtual register for it.
unsigned MachineFunction::addLiveIn(unsigned PReg,
- const TargetRegisterClass *RC) {
+ const TargetRegisterClass *RC,
+ DebugLoc DL) {
MachineRegisterInfo &MRI = getRegInfo();
unsigned VReg = MRI.getLiveInVirtReg(PReg);
if (VReg) {
@@ -405,6 +406,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
}
VReg = MRI.createVirtualRegister(RC);
MRI.addLiveIn(PReg, VReg);
+ MRI.addLiveInLoc(VReg, DL);
return VReg;
}
@@ -426,6 +428,13 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
return Ctx.GetOrCreateSymbol(Name.str());
}
+/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
+/// base.
+MCSymbol *MachineFunction::getPICBaseSymbol() const {
+ const MCAsmInfo &MAI = *Target.getMCAsmInfo();
+ return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+ Twine(getFunctionNumber())+"$pb");
+}
//===----------------------------------------------------------------------===//
// MachineFrameInfo implementation
@@ -485,7 +494,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
if (Objects.empty()) return;
- const TargetFrameInfo *FI = MF.getTarget().getFrameInfo();
+ const TargetFrameLowering *FI = MF.getTarget().getFrameLowering();
int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
OS << "Frame Objects:\n";
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
index 4f84b95..054c750 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -12,22 +12,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
using namespace llvm;
-// Register this pass with PassInfo directly to avoid having to define
-// a default constructor.
-static PassInfo
-X("Machine Function Analysis", "machine-function-analysis",
- &MachineFunctionAnalysis::ID, 0,
- /*CFGOnly=*/false, /*is_analysis=*/true);
-
char MachineFunctionAnalysis::ID = 0;
MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
CodeGenOpt::Level OL) :
FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
}
MachineFunctionAnalysis::~MachineFunctionAnalysis() {
@@ -52,7 +47,8 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) {
bool MachineFunctionAnalysis::runOnFunction(Function &F) {
assert(!MF && "MachineFunctionAnalysis already initialized!");
MF = new MachineFunction(&F, TM, NextFnNum++,
- getAnalysis<MachineModuleInfo>());
+ getAnalysis<MachineModuleInfo>(),
+ getAnalysisIfAvailable<GCModuleInfo>());
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 446e461..aa9ea61 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -102,13 +102,13 @@ void MachineOperand::setReg(unsigned Reg) {
if (MachineBasicBlock *MBB = MI->getParent())
if (MachineFunction *MF = MBB->getParent()) {
RemoveRegOperandFromRegInfo();
- Contents.Reg.RegNo = Reg;
+ SmallContents.RegNo = Reg;
AddRegOperandToRegInfo(&MF->getRegInfo());
return;
}
// Otherwise, just change the register, no problem. :)
- Contents.Reg.RegNo = Reg;
+ SmallContents.RegNo = Reg;
}
void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
@@ -159,7 +159,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
} else {
// Otherwise, change this to a register and set the reg#.
OpKind = MO_Register;
- Contents.Reg.RegNo = Reg;
+ SmallContents.RegNo = Reg;
// If this operand is embedded in a function, add the operand to the
// register's use/def list.
@@ -227,24 +227,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
if (const MachineBasicBlock *MBB = MI->getParent())
if (const MachineFunction *MF = MBB->getParent())
TM = &MF->getTarget();
+ const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
switch (getType()) {
case MachineOperand::MO_Register:
- if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) {
- OS << "%reg" << getReg();
- } else {
- if (TM)
- OS << "%" << TM->getRegisterInfo()->get(getReg()).Name;
- else
- OS << "%physreg" << getReg();
- }
-
- if (getSubReg() != 0) {
- if (TM)
- OS << ':' << TM->getRegisterInfo()->getSubRegIndexName(getSubReg());
- else
- OS << ':' << getSubReg();
- }
+ OS << PrintReg(getReg(), TRI, getSubReg());
if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
isEarlyClobber()) {
@@ -335,10 +322,45 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
// MachineMemOperand Implementation
//===----------------------------------------------------------------------===//
-MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
- int64_t o, uint64_t s, unsigned int a)
- : Offset(o), Size(s), V(v),
- Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)) {
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const {
+ if (V == 0) return 0;
+ return cast<PointerType>(V->getType())->getAddressSpace();
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool() {
+ return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
+ return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable() {
+ return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT() {
+ return MachinePointerInfo(PseudoSourceValue::getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
+ return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
+ uint64_t s, unsigned int a,
+ const MDNode *TBAAInfo)
+ : PtrInfo(ptrinfo), Size(s),
+ Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
+ TBAAInfo(TBAAInfo) {
+ assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
+ "invalid pointer value");
assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
assert((isLoad() || isStore()) && "Not a load/store!");
}
@@ -346,9 +368,9 @@ MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
/// Profile - Gather unique data for the object.
///
void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
- ID.AddInteger(Offset);
+ ID.AddInteger(getOffset());
ID.AddInteger(Size);
- ID.AddPointer(V);
+ ID.AddPointer(getValue());
ID.AddInteger(Flags);
}
@@ -364,8 +386,7 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
// Also update the base and offset, because the new alignment may
// not be applicable with the old ones.
- V = MMO->getValue();
- Offset = MMO->getOffset();
+ PtrInfo = MMO->PtrInfo;
}
}
@@ -410,6 +431,16 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
MMO.getBaseAlignment() != MMO.getSize())
OS << "(align=" << MMO.getAlignment() << ")";
+ // Print TBAA info.
+ if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
+ OS << "(tbaa=";
+ if (TBAAInfo->getNumOperands() > 0)
+ WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false);
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
return OS;
}
@@ -782,6 +813,14 @@ unsigned MachineInstr::getNumExplicitOperands() const {
return NumOperands;
}
+bool MachineInstr::isStackAligningInlineAsm() const {
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ return true;
+ }
+ return false;
+}
/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
/// the specific register or -1 if it is not found. It further tightens
@@ -881,14 +920,15 @@ int MachineInstr::findFirstPredOperandIdx() const {
bool MachineInstr::
isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
if (isInlineAsm()) {
- assert(DefOpIdx >= 3);
+ assert(DefOpIdx > InlineAsm::MIOp_FirstOperand);
const MachineOperand &MO = getOperand(DefOpIdx);
if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
return false;
// Determine the actual operand index that corresponds to this index.
unsigned DefNo = 0;
unsigned DefPart = 0;
- for (unsigned i = 2, e = getNumOperands(); i < e; ) {
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+ i < e; ) {
const MachineOperand &FMO = getOperand(i);
// After the normal asm operands there may be additional imp-def regs.
if (!FMO.isImm())
@@ -903,7 +943,8 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
}
++DefNo;
}
- for (unsigned i = 2, e = getNumOperands(); i != e; ++i) {
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+ i != e; ++i) {
const MachineOperand &FMO = getOperand(i);
if (!FMO.isImm())
continue;
@@ -946,7 +987,8 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
// Find the flag operand corresponding to UseOpIdx
unsigned FlagIdx, NumOps=0;
- for (FlagIdx = 2; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
+ for (FlagIdx = InlineAsm::MIOp_FirstOperand;
+ FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
const MachineOperand &UFMO = getOperand(FlagIdx);
// After the normal asm operands there may be additional imp-def regs.
if (!UFMO.isImm())
@@ -964,9 +1006,9 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
if (!DefOpIdx)
return true;
- unsigned DefIdx = 2;
+ unsigned DefIdx = InlineAsm::MIOp_FirstOperand;
// Remember to adjust the index. First operand is asm string, second is
- // the AlignStack bit, then there is a flag for each.
+ // the HasSideEffects and AlignStack bits, then there is a flag for each.
while (DefNo) {
const MachineOperand &FMO = getOperand(DefIdx);
assert(FMO.isImm());
@@ -1071,7 +1113,9 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
SawStore = true;
return false;
}
- if (TID->isTerminator() || TID->hasUnmodeledSideEffects())
+
+ if (isLabel() || isDebugValue() ||
+ TID->isTerminator() || hasUnmodeledSideEffects())
return false;
// See if this instruction does a load. If so, we have to guarantee that the
@@ -1122,7 +1166,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
if (!TID->mayStore() &&
!TID->mayLoad() &&
!TID->isCall() &&
- !TID->hasUnmodeledSideEffects())
+ !hasUnmodeledSideEffects())
return false;
// Otherwise, if the instruction has no memory reference information,
@@ -1166,7 +1210,9 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
if (PSV->isConstant(MFI))
continue;
// If we have an AliasAnalysis, ask it whether the memory is constant.
- if (AA && AA->pointsToConstantMemory(V))
+ if (AA && AA->pointsToConstantMemory(
+ AliasAnalysis::Location(V, (*I)->getSize(),
+ (*I)->getTBAAInfo())))
continue;
}
@@ -1194,6 +1240,18 @@ unsigned MachineInstr::isConstantValuePHI() const {
return Reg;
}
+bool MachineInstr::hasUnmodeledSideEffects() const {
+ if (getDesc().hasUnmodeledSideEffects())
+ return true;
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ return true;
+ }
+
+ return false;
+}
+
/// allDefsAreDead - Return true if all the defs of this instruction are dead.
///
bool MachineInstr::allDefsAreDead() const {
@@ -1207,6 +1265,17 @@ bool MachineInstr::allDefsAreDead() const {
return true;
}
+/// copyImplicitOps - Copy implicit register operands from specified
+/// instruction to this instruction.
+void MachineInstr::copyImplicitOps(const MachineInstr *MI) {
+ for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit())
+ addOperand(MO);
+ }
+}
+
void MachineInstr::dump() const {
dbgs() << " " << *this;
}
@@ -1257,7 +1326,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
if (StartOp != 0) OS << ", ";
getOperand(StartOp).print(OS, TM);
unsigned Reg = getOperand(StartOp).getReg();
- if (Reg && TargetRegisterInfo::isVirtualRegister(Reg))
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
VirtRegs.push_back(Reg);
}
@@ -1270,11 +1339,28 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
// Print the rest of the operands.
bool OmittedAnyCallClobbers = false;
bool FirstOp = true;
+
+ if (isInlineAsm()) {
+ // Print asm string.
+ OS << " ";
+ getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+
+ // Print HasSideEffects, IsAlignStack
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ OS << " [sideeffect]";
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ OS << " [alignstack]";
+
+ StartOp = InlineAsm::MIOp_FirstOperand;
+ FirstOp = false;
+ }
+
+
for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
- if (MO.isReg() && MO.getReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
VirtRegs.push_back(MO.getReg());
// Omit call-clobbered registers which aren't used anywhere. This makes
@@ -1284,7 +1370,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
if (MF && getDesc().isCall() &&
MO.isReg() && MO.isImplicit() && MO.isDef()) {
unsigned Reg = MO.getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
const MachineRegisterInfo &MRI = MF->getRegInfo();
if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
bool HasAliasLive = false;
@@ -1348,14 +1434,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
if (!HaveSemi) OS << ";"; HaveSemi = true;
for (unsigned i = 0; i != VirtRegs.size(); ++i) {
const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
- OS << " " << RC->getName() << ":%reg" << VirtRegs[i];
+ OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]);
for (unsigned j = i+1; j != VirtRegs.size();) {
if (MRI->getRegClass(VirtRegs[j]) != RC) {
++j;
continue;
}
if (VirtRegs[i] != VirtRegs[j])
- OS << "," << VirtRegs[j];
+ OS << "," << PrintReg(VirtRegs[j]);
VirtRegs.erase(VirtRegs.begin()+j);
}
}
@@ -1533,8 +1619,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
switch (MO.getType()) {
default: break;
case MachineOperand::MO_Register:
- if (MO.isDef() && MO.getReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue; // Skip virtual register defs.
Key |= MO.getReg();
break;
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index 1a74b74..443fc2d 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -28,8 +28,10 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/ADT/DenseMap.h"
@@ -40,8 +42,14 @@
using namespace llvm;
-STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops");
-STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed");
+STATISTIC(NumHoisted,
+ "Number of machine instructions hoisted out of loops");
+STATISTIC(NumLowRP,
+ "Number of instructions hoisted in low reg pressure situation");
+STATISTIC(NumHighLatency,
+ "Number of high latency instructions hoisted");
+STATISTIC(NumCSEed,
+ "Number of hoisted machine instructions CSEed");
STATISTIC(NumPostRAHoisted,
"Number of machine instructions hoisted out of loops post regalloc");
@@ -51,9 +59,11 @@ namespace {
const TargetMachine *TM;
const TargetInstrInfo *TII;
+ const TargetLowering *TLI;
const TargetRegisterInfo *TRI;
const MachineFrameInfo *MFI;
- MachineRegisterInfo *RegInfo;
+ MachineRegisterInfo *MRI;
+ const InstrItineraryData *InstrItins;
// Various analyses that we use...
AliasAnalysis *AA; // Alias analysis info.
@@ -68,23 +78,37 @@ namespace {
BitVector AllocatableSet;
+ // Track 'estimated' register pressure.
+ SmallSet<unsigned, 32> RegSeen;
+ SmallVector<unsigned, 8> RegPressure;
+
+ // Register pressure "limit" per register class. If the pressure
+ // is higher than the limit, then it's considered high.
+ SmallVector<unsigned, 8> RegLimit;
+
+ // Register pressure on path leading from loop preheader to current BB.
+ SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
+
// For each opcode, keep a list of potential CSE instructions.
DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
public:
static char ID; // Pass identification, replacement for typeid
MachineLICM() :
- MachineFunctionPass(ID), PreRegAlloc(true) {}
+ MachineFunctionPass(ID), PreRegAlloc(true) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
explicit MachineLICM(bool PreRA) :
- MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
+ MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const { return "Machine Instruction LICM"; }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<AliasAnalysis>();
@@ -94,6 +118,13 @@ namespace {
}
virtual void releaseMemory() {
+ RegSeen.clear();
+ RegPressure.clear();
+ RegLimit.clear();
+ BackTrace.clear();
+ for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
+ CI->second.clear();
CSEMap.clear();
}
@@ -138,6 +169,24 @@ namespace {
///
bool IsLoopInvariantInst(MachineInstr &I);
+ /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+ /// and an use in the current loop, return true if the target considered
+ /// it 'high'.
+ bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+ unsigned Reg) const;
+
+ bool IsCheapInstruction(MachineInstr &MI) const;
+
+ /// CanCauseHighRegPressure - Visit BBs from header to current BB,
+ /// check if hoisting an instruction of the given cost matrix can cause high
+ /// register pressure.
+ bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
+
+ /// UpdateBackTraceRegPressure - Traverse the back trace from header to
+ /// the current block and update their register pressures to reflect the
+ /// effect of hoisting MI from the current block to the preheader.
+ void UpdateBackTraceRegPressure(const MachineInstr *MI);
+
/// IsProfitableToHoist - Return true if it is potentially profitable to
/// hoist the given loop invariant.
bool IsProfitableToHoist(MachineInstr &MI);
@@ -148,11 +197,16 @@ namespace {
/// visit definitions before uses, allowing us to hoist a loop body in one
/// pass without iteration.
///
- void HoistRegion(MachineDomTreeNode *N);
+ void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
+
+ /// InitRegPressure - Find all virtual register references that are liveout
+ /// of the preheader to initialize the starting "register pressure". Note
+ /// this does not count live through (livein but not used) registers.
+ void InitRegPressure(MachineBasicBlock *BB);
- /// isLoadFromConstantMemory - Return true if the given instruction is a
- /// load from constant memory.
- bool isLoadFromConstantMemory(MachineInstr *MI);
+ /// UpdateRegPressure - Update estimate of register pressure after the
+ /// specified instruction.
+ void UpdateRegPressure(const MachineInstr *MI);
/// ExtractHoistableLoad - Unfold a load from the given machineinstr if
/// the load itself could be hoisted. Return the unfolded and hoistable
@@ -174,8 +228,8 @@ namespace {
/// Hoist - When an instruction is found to only use loop invariant operands
/// that is safe to hoist, this instruction is called to do the dirty work.
- ///
- void Hoist(MachineInstr *MI);
+ /// It returns true if the instruction is hoisted.
+ bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
/// InitCSEMap - Initialize the CSE map with instructions that are in the
/// current loop preheader that may become duplicates of instructions that
@@ -189,8 +243,13 @@ namespace {
} // end anonymous namespace
char MachineLICM::ID = 0;
-INITIALIZE_PASS(MachineLICM, "machinelicm",
- "Machine Loop Invariant Code Motion", false, false);
+INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
return new MachineLICM(PreRegAlloc);
@@ -212,18 +271,32 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
if (PreRegAlloc)
- DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n");
+ DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
else
- DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n");
+ DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+ DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
Changed = FirstInLoop = false;
TM = &MF.getTarget();
TII = TM->getInstrInfo();
+ TLI = TM->getTargetLowering();
TRI = TM->getRegisterInfo();
MFI = MF.getFrameInfo();
- RegInfo = &MF.getRegInfo();
+ MRI = &MF.getRegInfo();
+ InstrItins = TM->getInstrItineraryData();
AllocatableSet = TRI->getAllocatableSet(MF);
+ if (PreRegAlloc) {
+ // Estimate register pressure during pre-regalloc pass.
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegPressure.resize(NumRC);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ RegLimit.resize(NumRC);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = TLI->getRegPressureLimit(*I, MF);
+ }
+
// Get our Loop information...
MLI = &getAnalysis<MachineLoopInfo>();
DT = &getAnalysis<MachineDominatorTree>();
@@ -248,7 +321,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
// being hoisted.
MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
FirstInLoop = true;
- HoistRegion(N);
+ HoistRegion(N, true);
CSEMap.clear();
}
}
@@ -474,17 +547,33 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
/// first order w.r.t the DominatorTree. This allows us to visit definitions
/// before uses, allowing us to hoist a loop body in one pass without iteration.
///
-void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
+void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
assert(N != 0 && "Null dominator tree node?");
MachineBasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
if (!CurLoop->contains(BB)) return;
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ if (IsHeader) {
+ // Compute registers which are livein into the loop headers.
+ RegSeen.clear();
+ BackTrace.clear();
+ InitRegPressure(Preheader);
+ }
+
+ // Remember livein register pressure.
+ BackTrace.push_back(RegPressure);
+
for (MachineBasicBlock::iterator
MII = BB->begin(), E = BB->end(); MII != E; ) {
MachineBasicBlock::iterator NextMII = MII; ++NextMII;
- Hoist(&*MII);
+ MachineInstr *MI = &*MII;
+ if (!Hoist(MI, Preheader))
+ UpdateRegPressure(MI);
MII = NextMII;
}
@@ -496,6 +585,99 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
for (unsigned I = 0, E = Children.size(); I != E; ++I)
HoistRegion(Children[I]);
}
+
+ BackTrace.pop_back();
+}
+
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+ return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
+/// InitRegPressure - Find all virtual register references that are liveout of
+/// the preheader to initialize the starting "register pressure". Note this
+/// does not count live through (livein but not used) registers.
+void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+
+ // If the preheader has only a single predecessor and it ends with a
+ // fallthrough or an unconditional branch, then scan its predecessor for live
+ // defs as well. This happens whenever the preheader is created by splitting
+ // the critical edge from the loop predecessor to the loop header.
+ if (BB->pred_size() == 1) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+ InitRegPressure(*BB->pred_begin());
+ }
+
+ for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
+ MII != E; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ bool isNew = RegSeen.insert(Reg);
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (MO.isDef())
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ else {
+ bool isKill = isOperandKill(MO, MRI);
+ if (isNew && !isKill)
+ // Haven't seen this, it must be a livein.
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ else if (!isNew && isKill)
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+ }
+}
+
+/// UpdateRegPressure - Update estimate of register pressure after the
+/// specified instruction.
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
+ if (MI->isImplicitDef())
+ return;
+
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ bool isNew = RegSeen.insert(Reg);
+ if (MO.isDef())
+ Defs.push_back(Reg);
+ else if (!isNew && isOperandKill(MO, MRI)) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+
+ if (RCCost > RegPressure[RCId])
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= RCCost;
+ }
+ }
+
+ while (!Defs.empty()) {
+ unsigned Reg = Defs.pop_back_val();
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ RegPressure[RCId] += RCCost;
+ }
}
/// IsLICMCandidate - Returns true if the instruction may be a suitable
@@ -535,14 +717,14 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!RegInfo->def_empty(Reg))
+ if (!MRI->def_empty(Reg))
return false;
if (AllocatableSet.test(Reg))
return false;
// Check for a def among the register's aliases too.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
- if (!RegInfo->def_empty(AliasReg))
+ if (!MRI->def_empty(AliasReg))
return false;
if (AllocatableSet.test(AliasReg))
return false;
@@ -562,12 +744,12 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
if (!MO.isUse())
continue;
- assert(RegInfo->getVRegDef(Reg) &&
+ assert(MRI->getVRegDef(Reg) &&
"Machine instr not mapped for this vreg?!");
// If the loop contains the definition of an operand, then the instruction
// isn't loop invariant.
- if (CurLoop->contains(RegInfo->getVRegDef(Reg)))
+ if (CurLoop->contains(MRI->getVRegDef(Reg)))
return false;
}
@@ -577,9 +759,9 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
/// HasPHIUses - Return true if the specified register has any PHI use.
-static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
- for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg),
- UE = RegInfo->use_end(); UI != UE; ++UI) {
+static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
MachineInstr *UseMI = &*UI;
if (UseMI->isPHI())
return true;
@@ -587,37 +769,210 @@ static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
return false;
}
-/// isLoadFromConstantMemory - Return true if the given instruction is a
-/// load from constant memory. Machine LICM will hoist these even if they are
-/// not re-materializable.
-bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
- if (!MI->getDesc().mayLoad()) return false;
- if (!MI->hasOneMemOperand()) return false;
- MachineMemOperand *MMO = *MI->memoperands_begin();
- if (MMO->isVolatile()) return false;
- if (!MMO->getValue()) return false;
- const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue());
- if (PSV) {
- MachineFunction &MF = *MI->getParent()->getParent();
- return PSV->isConstant(MF.getFrameInfo());
- } else {
- return AA->pointsToConstantMemory(MMO->getValue());
+
+/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop, return true if the target considered
+/// it 'high'.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+ unsigned DefIdx, unsigned Reg) const {
+ if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
+ return false;
+
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (UseMI->isCopyLike())
+ continue;
+ if (!CurLoop->contains(UseMI->getParent()))
+ continue;
+ for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+ return true;
+ }
+
+ // Only look at the first in loop use.
+ break;
+ }
+
+ return false;
+}
+
+/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
+/// the operand latency between its def and a use is one or less.
+bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
+ if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+ return true;
+ if (!InstrItins || InstrItins->isEmpty())
+ return false;
+
+ bool isCheap = false;
+ unsigned NumDefs = MI.getDesc().getNumDefs();
+ for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &DefMO = MI.getOperand(i);
+ if (!DefMO.isReg() || !DefMO.isDef())
+ continue;
+ --NumDefs;
+ unsigned Reg = DefMO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+
+ if (!TII->hasLowDefLatency(InstrItins, &MI, i))
+ return false;
+ isCheap = true;
+ }
+
+ return isCheap;
+}
+
+/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
+/// if hoisting an instruction of the given cost matrix can cause high
+/// register pressure.
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
+ for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+ CI != CE; ++CI) {
+ if (CI->second <= 0)
+ continue;
+
+ unsigned RCId = CI->first;
+ for (unsigned i = BackTrace.size(); i != 0; --i) {
+ SmallVector<unsigned, 8> &RP = BackTrace[i-1];
+ if (RP[RCId] + CI->second >= RegLimit[RCId])
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
+/// current block and update their register pressures to reflect the effect
+/// of hoisting MI from the current block to the preheader.
+void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+ if (MI->isImplicitDef())
+ return;
+
+ // First compute the 'cost' of the instruction, i.e. its contribution
+ // to register pressure.
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ if (MO.isDef()) {
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second += RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, RCCost));
+ } else if (isOperandKill(MO, MRI)) {
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second -= RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, -RCCost));
+ }
+ }
+
+ // Update register pressure of blocks from loop header to current block.
+ for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
+ SmallVector<unsigned, 8> &RP = BackTrace[i];
+ for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+ CI != CE; ++CI) {
+ unsigned RCId = CI->first;
+ RP[RCId] += CI->second;
+ }
}
}
/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
/// the given loop invariant.
bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
- // FIXME: For now, only hoist re-materilizable instructions. LICM will
- // increase register pressure. We want to make sure it doesn't increase
- // spilling.
+ if (MI.isImplicitDef())
+ return true;
+
+ // If the instruction is cheap, only hoist if it is re-materilizable. LICM
+ // will increase register pressure. It's probably not worth it if the
+ // instruction is cheap.
// Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
// these tend to help performance in low register pressure situation. The
// trade off is it may cause spill in high pressure situation. It will end up
// adding a store in the loop preheader. But the reload is no more expensive.
// The side benefit is these loads are frequently CSE'ed.
- if (!TII->isTriviallyReMaterializable(&MI, AA)) {
- if (!isLoadFromConstantMemory(&MI))
+ if (IsCheapInstruction(MI)) {
+ if (!TII->isTriviallyReMaterializable(&MI, AA))
+ return false;
+ } else {
+ // Estimate register pressure to determine whether to LICM the instruction.
+ // In low register pressure situation, we can be more aggressive about
+ // hoisting. Also, favors hoisting long latency instructions even in
+ // moderately high pressure situation.
+ // FIXME: If there are long latency loop-invariant instructions inside the
+ // loop at this point, why didn't the optimizer's LICM hoist them?
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isDef()) {
+ if (HasHighOperandLatency(MI, i, Reg)) {
+ ++NumHighLatency;
+ return true;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second += RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, RCCost));
+ } else if (isOperandKill(MO, MRI)) {
+ // Is a virtual register use is a kill, hoisting it out of the loop
+ // may actually reduce register pressure or be register pressure
+ // neutral.
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second -= RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, -RCCost));
+ }
+ }
+
+ // Visit BBs from header to current BB, if hoisting this doesn't cause
+ // high register pressure, then it's safe to proceed.
+ if (!CanCauseHighRegPressure(Cost)) {
+ ++NumLowRP;
+ return true;
+ }
+
+ // High register pressure situation, only hoist if the instruction is going to
+ // be remat'ed.
+ if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+ !MI.isInvariantLoad(AA))
return false;
}
@@ -628,7 +983,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
- if (HasPHIUses(MO.getReg(), RegInfo))
+ if (HasPHIUses(MO.getReg(), MRI))
return false;
}
@@ -636,10 +991,14 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
}
MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+ // Don't unfold simple loads.
+ if (MI->getDesc().canFoldAsLoad())
+ return 0;
+
// If not, we may be able to unfold a load and hoist that.
// First test whether the instruction is loading from an amenable
// memory location.
- if (!isLoadFromConstantMemory(MI))
+ if (!MI->isInvariantLoad(AA))
return 0;
// Next determine the register class for a temporary register.
@@ -654,7 +1013,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
if (TID.getNumDefs() != 1) return 0;
const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
// Ok, we're unfolding. Create a temporary register and do the unfold.
- unsigned Reg = RegInfo->createVirtualRegister(RC);
+ unsigned Reg = MRI->createVirtualRegister(RC);
MachineFunction &MF = *MI->getParent()->getParent();
SmallVector<MachineInstr *, 2> NewMIs;
@@ -678,6 +1037,10 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
NewMIs[1]->eraseFromParent();
return 0;
}
+
+ // Update register pressure for the unfolded instruction.
+ UpdateRegPressure(NewMIs[1]);
+
// Otherwise we successfully unfolded a load that we can hoist.
MI->eraseFromParent();
return NewMIs[0];
@@ -686,20 +1049,15 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
const MachineInstr *MI = &*I;
- // FIXME: For now, only hoist re-materilizable instructions. LICM will
- // increase register pressure. We want to make sure it doesn't increase
- // spilling.
- if (TII->isTriviallyReMaterializable(MI, AA)) {
- unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
- CI = CSEMap.find(Opcode);
- if (CI != CSEMap.end())
- CI->second.push_back(MI);
- else {
- std::vector<const MachineInstr*> CSEMIs;
- CSEMIs.push_back(MI);
- CSEMap.insert(std::make_pair(Opcode, CSEMIs));
- }
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(MI);
+ CSEMap.insert(std::make_pair(Opcode, CSEMIs));
}
}
}
@@ -709,7 +1067,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
std::vector<const MachineInstr*> &PrevMIs) {
for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
const MachineInstr *PrevMI = PrevMIs[i];
- if (TII->produceSameValue(MI, PrevMI))
+ if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
return PrevMI;
}
return 0;
@@ -738,8 +1096,8 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
if (MO.isReg() && MO.isDef() &&
!TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
- RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
- RegInfo->clearKillFlags(Dup->getOperand(i).getReg());
+ MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+ MRI->clearKillFlags(Dup->getOperand(i).getReg());
}
}
MI->eraseFromParent();
@@ -752,15 +1110,12 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
/// Hoist - When an instruction is found to use only loop invariant operands
/// that are safe to hoist, this instruction is called to do the dirty work.
///
-void MachineLICM::Hoist(MachineInstr *MI) {
- MachineBasicBlock *Preheader = getCurPreheader();
- if (!Preheader) return;
-
+bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// First check whether we should hoist this instruction.
if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
// If not, try unfolding a hoistable load.
MI = ExtractHoistableLoad(MI);
- if (!MI) return;
+ if (!MI) return false;
}
// Now move the instructions to the predecessor, inserting it before any
@@ -791,13 +1146,16 @@ void MachineLICM::Hoist(MachineInstr *MI) {
// Otherwise, splice the instruction to the preheader.
Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
+ // Update register pressure for BBs from header to this block.
+ UpdateBackTraceRegPressure(MI);
+
// Clear the kill flags of any register this instruction defines,
// since they may need to be live throughout the entire loop
// rather than just live for part of it.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef() && !MO.isDead())
- RegInfo->clearKillFlags(MO.getReg());
+ MRI->clearKillFlags(MO.getReg());
}
// Add to the CSE map.
@@ -812,6 +1170,8 @@ void MachineLICM::Hoist(MachineInstr *MI) {
++NumHoisted;
Changed = true;
+
+ return true;
}
MachineBasicBlock *MachineLICM::getCurPreheader() {
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index bca4b0c..189cb2b 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -30,8 +30,11 @@ TEMPLATE_INSTANTIATION(MLIB);
}
char MachineLoopInfo::ID = 0;
-INITIALIZE_PASS(MachineLoopInfo, "machine-loops",
- "Machine Natural Loop Construction", true, true);
+INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopRanges.cpp b/contrib/llvm/lib/CodeGen/MachineLoopRanges.cpp
new file mode 100644
index 0000000..17fe67f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLoopRanges.cpp
@@ -0,0 +1,116 @@
+//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MachineLoopRanges analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+char MachineLoopRanges::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges",
+ "Machine Loop Ranges", true, true)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges",
+ "Machine Loop Ranges", true, true)
+
+char &llvm::MachineLoopRangesID = MachineLoopRanges::ID;
+
+void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<SlotIndexes>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// runOnMachineFunction - Don't do much, loop ranges are computed on demand.
+bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ Indexes = &getAnalysis<SlotIndexes>();
+ return false;
+}
+
+void MachineLoopRanges::releaseMemory() {
+ DeleteContainerSeconds(Cache);
+ Cache.clear();
+}
+
+MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) {
+ MachineLoopRange *&Range = Cache[Loop];
+ if (!Range)
+ Range = new MachineLoopRange(Loop, Allocator, *Indexes);
+ return Range;
+}
+
+/// Create a MachineLoopRange, only accessible to MachineLoopRanges.
+MachineLoopRange::MachineLoopRange(const MachineLoop *loop,
+ MachineLoopRange::Allocator &alloc,
+ SlotIndexes &Indexes)
+ : Loop(loop), Intervals(alloc), Area(0) {
+ // Compute loop coverage.
+ for (MachineLoop::block_iterator I = Loop->block_begin(),
+ E = Loop->block_end(); I != E; ++I) {
+ const std::pair<SlotIndex, SlotIndex> &Range = Indexes.getMBBRange(*I);
+ Intervals.insert(Range.first, Range.second, 1u);
+ Area += Range.first.distance(Range.second);
+ }
+}
+
+/// overlaps - Return true if this loop overlaps the given range of machine
+/// instructions.
+bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) {
+ Map::const_iterator I = Intervals.find(Start);
+ return I.valid() && Stop > I.start();
+}
+
+unsigned MachineLoopRange::getNumber() const {
+ return Loop->getHeader()->getNumber();
+}
+
+/// byNumber - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by number.
+int MachineLoopRange::byNumber(const void *pa, const void *pb) {
+ const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+ const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+ unsigned na = a->getNumber();
+ unsigned nb = b->getNumber();
+ if (na < nb)
+ return -1;
+ if (na > nb)
+ return 1;
+ return 0;
+}
+
+/// byAreaDesc - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by:
+/// 1. Descending area.
+/// 2. Ascending number.
+int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) {
+ const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+ const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+ if (a->getArea() != b->getArea())
+ return a->getArea() > b->getArea() ? -1 : 1;
+ return byNumber(pa, pb);
+}
+
+void MachineLoopRange::print(raw_ostream &OS) const {
+ OS << "Loop#" << getNumber() << " =";
+ for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I)
+ OS << " [" << I.start() << ';' << I.stop() << ')';
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) {
+ MLR.print(OS);
+ return OS;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index b647a4d..fadc594 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -29,7 +29,7 @@ using namespace llvm::dwarf;
// Handle the Pass registration stuff necessary to use TargetData's.
INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
- "Machine Module Information", false, false);
+ "Machine Module Information", false, false)
char MachineModuleInfo::ID = 0;
// Out of line virtual method.
@@ -41,30 +41,30 @@ class MMIAddrLabelMapCallbackPtr : CallbackVH {
public:
MMIAddrLabelMapCallbackPtr() : Map(0) {}
MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
-
+
void setPtr(BasicBlock *BB) {
ValueHandleBase::operator=(BB);
}
-
+
void setMap(MMIAddrLabelMap *map) { Map = map; }
-
+
virtual void deleted();
virtual void allUsesReplacedWith(Value *V2);
};
-
+
class MMIAddrLabelMap {
MCContext &Context;
struct AddrLabelSymEntry {
/// Symbols - The symbols for the label. This is a pointer union that is
/// either one symbol (the common case) or a list of symbols.
PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
-
+
Function *Fn; // The containing function of the BasicBlock.
unsigned Index; // The index in BBCallbacks for the BasicBlock.
};
-
+
DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
-
+
/// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We
/// use this so we get notified if a block is deleted or RAUWd.
std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
@@ -76,23 +76,23 @@ class MMIAddrLabelMap {
DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
DeletedAddrLabelsNeedingEmission;
public:
-
+
MMIAddrLabelMap(MCContext &context) : Context(context) {}
~MMIAddrLabelMap() {
assert(DeletedAddrLabelsNeedingEmission.empty() &&
"Some labels for deleted blocks never got emitted");
-
+
// Deallocate any of the 'list of symbols' case.
for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
}
-
+
MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
- void takeDeletedSymbolsForFunction(Function *F,
+ void takeDeletedSymbolsForFunction(Function *F,
std::vector<MCSymbol*> &Result);
void UpdateForDeletedBlock(BasicBlock *BB);
@@ -104,7 +104,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
assert(BB->hasAddressTaken() &&
"Shouldn't get label for block without address taken");
AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-
+
// If we already had an entry for this block, just return it.
if (!Entry.Symbols.isNull()) {
assert(BB->getParent() == Entry.Fn && "Parent changed");
@@ -112,7 +112,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
return Entry.Symbols.get<MCSymbol*>();
return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
}
-
+
// Otherwise, this is a new entry, create a new symbol for it and add an
// entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
BBCallbacks.push_back(BB);
@@ -129,9 +129,9 @@ MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
assert(BB->hasAddressTaken() &&
"Shouldn't get label for block without address taken");
AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-
+
std::vector<MCSymbol*> Result;
-
+
// If we already had an entry for this block, just return it.
if (Entry.Symbols.isNull())
Result.push_back(getAddrLabelSymbol(BB));
@@ -152,7 +152,7 @@ takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
// If there are no entries for the function, just return.
if (I == DeletedAddrLabelsNeedingEmission.end()) return;
-
+
// Otherwise, take the list.
std::swap(Result, I->second);
DeletedAddrLabelsNeedingEmission.erase(I);
@@ -175,7 +175,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
if (Sym->isDefined())
return;
-
+
// If the block is not yet defined, we need to emit it at the end of the
// function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
// for the containing Function. Since the block is being deleted, its
@@ -187,7 +187,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
MCSymbol *Sym = (*Syms)[i];
if (Sym->isDefined()) continue; // Ignore already emitted labels.
-
+
// If the block is not yet defined, we need to emit it at the end of the
// function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
// for the containing Function. Since the block is being deleted, its
@@ -195,7 +195,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
// 'Entry'.
DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
}
-
+
// The entry is deleted, free the memory associated with the symbol list.
delete Syms;
}
@@ -225,7 +225,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
SymList->push_back(PrevSym);
NewEntry.Symbols = SymList;
}
-
+
std::vector<MCSymbol*> *SymList =
NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
@@ -234,7 +234,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
SymList->push_back(Sym);
return;
}
-
+
// Otherwise, concatenate the list.
std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
SymList->insert(SymList->end(), Syms->begin(), Syms->end());
@@ -253,10 +253,13 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
//===----------------------------------------------------------------------===//
-MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
-: ImmutablePass(ID), Context(MAI),
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
+ const TargetAsmInfo *TAI)
+: ImmutablePass(ID), Context(MAI, TAI),
ObjFileMMI(0),
- CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false){
+ CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false),
+ CallsExternalVAFunctionWithFloatingPointArguments(false) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
// Always emit some info, by default "no personality" info.
Personalities.push_back(NULL);
AddrLabelSymbols = 0;
@@ -264,7 +267,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
}
MachineModuleInfo::MachineModuleInfo()
-: ImmutablePass(ID), Context(*(MCAsmInfo*)0) {
+: ImmutablePass(ID), Context(*(MCAsmInfo*)0, NULL) {
assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
"should always be explicitly constructed by LLVMTargetMachine");
abort();
@@ -272,7 +275,7 @@ MachineModuleInfo::MachineModuleInfo()
MachineModuleInfo::~MachineModuleInfo() {
delete ObjFileMMI;
-
+
// FIXME: Why isn't doFinalization being called??
//assert(AddrLabelSymbols == 0 && "doFinalization not called");
delete AddrLabelSymbols;
@@ -472,7 +475,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
(LPMap && (*LPMap)[BeginLabel] != 0)) &&
(EndLabel->isDefined() ||
(LPMap && (*LPMap)[EndLabel] != 0))) continue;
-
+
LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
--j, --e;
@@ -562,20 +565,3 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
// in the zero index.
return 0;
}
-
-namespace {
- /// VariableDebugSorter - Comparison to sort the VariableDbgInfo map
- /// by source location, to avoid depending on the arbitrary order that
- /// instruction selection visits variables in.
- struct VariableDebugSorter {
- bool operator()(const MachineModuleInfo::VariableDbgInfoMapTy::value_type &A,
- const MachineModuleInfo::VariableDbgInfoMapTy::value_type &B)
- const {
- if (A.second.second.getLine() != B.second.second.getLine())
- return A.second.second.getLine() < B.second.second.getLine();
- if (A.second.second.getCol() != B.second.second.getCol())
- return A.second.second.getCol() < B.second.second.getCol();
- return false;
- }
- };
-}
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 5d852f2..b3fb337 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -30,8 +30,9 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
MachineRegisterInfo::~MachineRegisterInfo() {
#ifndef NDEBUG
- for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i)
- assert(VRegInfo[i].second == 0 && "Vreg use list non-empty still?");
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+ assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+ "Vreg use list non-empty still?");
for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
assert(!PhysRegUseDefLists[i] &&
"PhysRegUseDefLists has entries after all instructions are deleted");
@@ -44,20 +45,32 @@ MachineRegisterInfo::~MachineRegisterInfo() {
///
void
MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
- unsigned VR = Reg;
- Reg -= TargetRegisterInfo::FirstVirtualRegister;
- assert(Reg < VRegInfo.size() && "Invalid vreg!");
const TargetRegisterClass *OldRC = VRegInfo[Reg].first;
VRegInfo[Reg].first = RC;
// Remove from old register class's vregs list. This may be slow but
// fortunately this operation is rarely needed.
std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
- std::vector<unsigned>::iterator I = std::find(VRegs.begin(), VRegs.end(), VR);
+ std::vector<unsigned>::iterator I =
+ std::find(VRegs.begin(), VRegs.end(), Reg);
VRegs.erase(I);
// Add to new register class's vregs list.
- RegClass2VRegMap[RC->getID()].push_back(VR);
+ RegClass2VRegMap[RC->getID()].push_back(Reg);
+}
+
+const TargetRegisterClass *
+MachineRegisterInfo::constrainRegClass(unsigned Reg,
+ const TargetRegisterClass *RC) {
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ if (OldRC == RC)
+ return RC;
+ const TargetRegisterClass *NewRC = getCommonSubClass(OldRC, RC);
+ if (!NewRC)
+ return 0;
+ if (NewRC != OldRC)
+ setRegClass(Reg, NewRC);
+ return NewRC;
}
/// createVirtualRegister - Create and return a new virtual register in the
@@ -66,17 +79,22 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
unsigned
MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
assert(RegClass && "Cannot create register without RegClass!");
+
+ // New virtual register number.
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+
// Add a reg, but keep track of whether the vector reallocated or not.
- void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0];
- VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0));
- RegAllocHints.push_back(std::make_pair(0, 0));
+ const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
+ void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
+ VRegInfo.grow(Reg);
+ VRegInfo[Reg].first = RegClass;
+ RegAllocHints.grow(Reg);
- if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1)))
+ if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
// The vector reallocated, handle this now.
HandleVRegListReallocation();
- unsigned VR = getLastVirtReg();
- RegClass2VRegMap[RegClass->getID()].push_back(VR);
- return VR;
+ RegClass2VRegMap[RegClass->getID()].push_back(Reg);
+ return Reg;
}
/// HandleVRegListReallocation - We just added a virtual register to the
@@ -85,11 +103,12 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
void MachineRegisterInfo::HandleVRegListReallocation() {
// The back pointers for the vreg lists point into the previous vector.
// Update them to point to their correct slots.
- for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) {
- MachineOperand *List = VRegInfo[i].second;
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ MachineOperand *List = VRegInfo[Reg].second;
if (!List) continue;
// Update the back-pointer to be accurate once more.
- List->Contents.Reg.Prev = &VRegInfo[i].second;
+ List->Contents.Reg.Prev = &VRegInfo[Reg].second;
}
}
@@ -112,8 +131,6 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
/// register or null if none is found. This assumes that the code is in SSA
/// form, so there should only be one definition.
MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
- assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() &&
- "Invalid vreg!");
// Since we are in SSA form, we can use the first definition.
if (!def_empty(Reg))
return &*def_begin(Reg);
@@ -193,8 +210,15 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
LiveIns.erase(LiveIns.begin() + i);
--i; --e;
} else {
+ DebugLoc DL;
+ // If there is a location for this live in then use it.
+ DenseMap<unsigned, DebugLoc>::iterator DLI =
+ LiveInLocs.find(LiveIns[i].second);
+ if (DLI != LiveInLocs.end())
+ DL = DLI->second;
+
// Emit a copy.
- BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+ BuildMI(*EntryMBB, EntryMBB->begin(), DL,
TII.get(TargetOpcode::COPY), LiveIns[i].second)
.addReg(LiveIns[i].first);
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index c8f8faf..8a93a24 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -25,6 +25,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -34,27 +35,31 @@ using namespace llvm;
static cl::opt<bool>
SplitEdges("machine-sink-split",
cl::desc("Split critical edges during machine sinking"),
- cl::init(false), cl::Hidden);
-static cl::opt<unsigned>
-SplitLimit("split-limit",
- cl::init(~0u), cl::Hidden);
+ cl::init(true), cl::Hidden);
-STATISTIC(NumSunk, "Number of machine instructions sunk");
-STATISTIC(NumSplit, "Number of critical edges split");
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+STATISTIC(NumSplit, "Number of critical edges split");
+STATISTIC(NumCoalesces, "Number of copies coalesced");
namespace {
class MachineSinking : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- MachineRegisterInfo *RegInfo; // Machine register information
+ MachineRegisterInfo *MRI; // Machine register information
MachineDominatorTree *DT; // Machine dominator tree
MachineLoopInfo *LI;
AliasAnalysis *AA;
BitVector AllocatableSet; // Which physregs are allocatable?
+ // Remember which edges have been considered for breaking.
+ SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+ CEBCandidates;
+
public:
static char ID; // Pass identification
- MachineSinking() : MachineFunctionPass(ID) {}
+ MachineSinking() : MachineFunctionPass(ID) {
+ initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -67,43 +72,125 @@ namespace {
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
}
+
+ virtual void releaseMemory() {
+ CEBCandidates.clear();
+ }
+
private:
bool ProcessBlock(MachineBasicBlock &MBB);
- MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *From,
- MachineBasicBlock *To);
+ bool isWorthBreakingCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To);
+ MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To,
+ bool BreakPHIEdge);
bool SinkInstruction(MachineInstr *MI, bool &SawStore);
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
- MachineBasicBlock *DefMBB, bool &LocalUse) const;
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge, bool &LocalUse) const;
+ bool PerformTrivialForwardCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB);
};
} // end anonymous namespace
char MachineSinking::ID = 0;
-INITIALIZE_PASS(MachineSinking, "machine-sink",
- "Machine code sinking", false, false);
+INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ if (!MI->isCopy())
+ return false;
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ !TargetRegisterInfo::isVirtualRegister(DstReg) ||
+ !MRI->hasOneNonDBGUse(SrcReg))
+ return false;
+
+ const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+ const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
+ if (SRC != DRC)
+ return false;
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isCopyLike())
+ return false;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << *MI);
+ MRI->replaceRegWith(DstReg, SrcReg);
+ MI->eraseFromParent();
+ ++NumCoalesces;
+ return true;
+}
+
/// AllUsesDominatedByBlock - Return true if all uses of the specified register
/// occur in blocks dominated by the specified block. If any use is in the
/// definition block, then return false since it is never legal to move def
/// after uses.
-bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
- MachineBasicBlock *MBB,
- MachineBasicBlock *DefMBB,
- bool &LocalUse) const {
+bool
+MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge,
+ bool &LocalUse) const {
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Only makes sense for vregs");
+
+ if (MRI->use_nodbg_empty(Reg))
+ return true;
+
// Ignoring debug uses is necessary so debug info doesn't affect the code.
// This may leave a referencing dbg_value in the original block, before
// the definition of the vreg. Dwarf generator handles this although the
// user might not get the right info at runtime.
+
+ // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
+ // into and they are all PHI nodes. In this case, machine-sink must break
+ // the critical edge first. e.g.
+ //
+ // BB#1: derived from LLVM BB %bb4.preheader
+ // Predecessors according to CFG: BB#0
+ // ...
+ // %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+ // ...
+ // JE_4 <BB#37>, %EFLAGS<imp-use>
+ // Successors according to CFG: BB#37 BB#2
+ //
+ // BB#2: derived from LLVM BB %bb.nph
+ // Predecessors according to CFG: BB#0 BB#1
+ // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+ BreakPHIEdge = true;
for (MachineRegisterInfo::use_nodbg_iterator
- I = RegInfo->use_nodbg_begin(Reg), E = RegInfo->use_nodbg_end();
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
I != E; ++I) {
- // Determine the block of the use.
MachineInstr *UseInst = &*I;
MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (!(UseBlock == MBB && UseInst->isPHI() &&
+ UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) {
+ BreakPHIEdge = false;
+ break;
+ }
+ }
+ if (BreakPHIEdge)
+ return true;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ // Determine the block of the use.
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
if (UseInst->isPHI()) {
// PHI nodes use the operand in the predecessor block, not the block with
// the PHI.
@@ -127,7 +214,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
- RegInfo = &MF.getRegInfo();
+ MRI = &MF.getRegInfo();
DT = &getAnalysis<MachineDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
@@ -139,6 +226,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
// Process all basic blocks.
+ CEBCandidates.clear();
for (MachineFunction::iterator I = MF.begin(), E = MF.end();
I != E; ++I)
MadeChange |= ProcessBlock(*I);
@@ -177,6 +265,9 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
if (MI->isDebugValue())
continue;
+ if (PerformTrivialForwardCoalescing(MI, &MBB))
+ continue;
+
if (SinkInstruction(MI, SawStore))
++NumSunk, MadeChange = true;
@@ -186,51 +277,92 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
return MadeChange;
}
-MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB,
- MachineBasicBlock *ToBB) {
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To) {
+ // FIXME: Need much better heuristics.
+
+ // If the pass has already considered breaking this edge (during this pass
+ // through the function), then let's go ahead and break it. This means
+ // sinking multiple "cheap" instructions into the same block.
+ if (!CEBCandidates.insert(std::make_pair(From, To)))
+ return true;
+
+ if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+ return true;
+
+ // MI is cheap, we probably don't want to break the critical edge for it.
+ // However, if this would allow some definitions of its source operands
+ // to be sunk then it's probably worth it.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MRI->hasOneNonDBGUse(Reg))
+ return true;
+ }
+
+ return false;
+}
+
+MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
+ if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
+ return 0;
+
// Avoid breaking back edge. From == To means backedge for single BB loop.
- if (!SplitEdges || NumSplit == SplitLimit || FromBB == ToBB)
+ if (!SplitEdges || FromBB == ToBB)
+ return 0;
+
+ // Check for backedges of more "complex" loops.
+ if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
+ LI->isLoopHeader(ToBB))
return 0;
- // Check for more "complex" loops.
- if (LI->getLoopFor(FromBB) != LI->getLoopFor(ToBB) ||
- !LI->isLoopHeader(ToBB)) {
- // It's not always legal to break critical edges and sink the computation
- // to the edge.
- //
- // BB#1:
- // v1024
- // Beq BB#3
- // <fallthrough>
- // BB#2:
- // ... no uses of v1024
- // <fallthrough>
- // BB#3:
- // ...
- // = v1024
- //
- // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
- //
- // BB#1:
- // ...
- // Bne BB#2
- // BB#4:
- // v1024 =
- // B BB#3
- // BB#2:
- // ... no uses of v1024
- // <fallthrough>
- // BB#3:
- // ...
- // = v1024
- //
- // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
- // flow. We need to ensure the new basic block where the computation is
- // sunk to dominates all the uses.
- // It's only legal to break critical edge and sink the computation to the
- // new block if all the predecessors of "To", except for "From", are
- // not dominated by "From". Given SSA property, this means these
- // predecessors are dominated by "To".
+ // It's not always legal to break critical edges and sink the computation
+ // to the edge.
+ //
+ // BB#1:
+ // v1024
+ // Beq BB#3
+ // <fallthrough>
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+ //
+ // BB#1:
+ // ...
+ // Bne BB#2
+ // BB#4:
+ // v1024 =
+ // B BB#3
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+ // flow. We need to ensure the new basic block where the computation is
+ // sunk to dominates all the uses.
+ // It's only legal to break critical edge and sink the computation to the
+ // new block if all the predecessors of "To", except for "From", are
+ // not dominated by "From". Given SSA property, this means these
+ // predecessors are dominated by "To".
+ //
+ // There is no need to do this check if all the uses are PHI nodes. PHI
+ // sources are only defined on the specific predecessor edges.
+ if (!BreakPHIEdge) {
for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
E = ToBB->pred_end(); PI != E; ++PI) {
if (*PI == FromBB)
@@ -238,17 +370,23 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB,
if (!DT->dominates(ToBB, *PI))
return 0;
}
-
- // FIXME: Determine if it's cost effective to break this edge.
- return FromBB->SplitCriticalEdge(ToBB, this);
}
- return 0;
+ return FromBB->SplitCriticalEdge(ToBB, this);
+}
+
+static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
+ return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
}
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+ // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+ // be close to the source to make it easier to coalesce.
+ if (AvoidsSinking(MI, MRI))
+ return false;
+
// Check if it's safe to move the instruction.
if (!MI->isSafeToMove(TII, AA, SawStore))
return false;
@@ -269,6 +407,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// decide.
MachineBasicBlock *SuccToSinkTo = 0;
+ bool BreakPHIEdge = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue; // Ignore non-register operands.
@@ -281,7 +420,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!RegInfo->def_empty(Reg))
+ if (!MRI->def_empty(Reg))
return false;
if (AllocatableSet.test(Reg))
@@ -290,7 +429,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// Check for a def among the register's aliases too.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
- if (!RegInfo->def_empty(AliasReg))
+ if (!MRI->def_empty(AliasReg))
return false;
if (AllocatableSet.test(AliasReg))
@@ -305,7 +444,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
if (MO.isUse()) continue;
// If it's not safe to move defs of the register class, then abort.
- if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg)))
+ if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
return false;
// FIXME: This picks a successor to sink into based on having one
@@ -327,7 +466,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If a previous operand picked a block to sink to, then this operand
// must be sinkable to the same block.
bool LocalUse = false;
- if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, LocalUse))
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+ BreakPHIEdge, LocalUse))
return false;
continue;
@@ -338,7 +478,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
E = ParentBlock->succ_end(); SI != E; ++SI) {
bool LocalUse = false;
- if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, LocalUse)) {
+ if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+ BreakPHIEdge, LocalUse)) {
SuccToSinkTo = *SI;
break;
}
@@ -384,7 +525,6 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If the block has multiple predecessors, this would introduce computation on
// a path that it doesn't already exist. We could split the critical edge,
// but for now we just punt.
- // FIXME: Split critical edges if not backedges.
if (SuccToSinkTo->pred_size() > 1) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
@@ -412,10 +552,11 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
if (!TryBreak)
DEBUG(dbgs() << "Sinking along critical edge.\n");
else {
- MachineBasicBlock *NewSucc = SplitCriticalEdge(ParentBlock, SuccToSinkTo);
+ MachineBasicBlock *NewSucc =
+ SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
if (!NewSucc) {
- DEBUG(dbgs() <<
- " *** PUNTING: Not legal or profitable to break critical edge\n");
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
return false;
} else {
DEBUG(dbgs() << " *** Splitting critical edge:"
@@ -424,10 +565,31 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
<< " -- BB#" << SuccToSinkTo->getNumber() << '\n');
SuccToSinkTo = NewSucc;
++NumSplit;
+ BreakPHIEdge = false;
}
}
}
+ if (BreakPHIEdge) {
+ // BreakPHIEdge is true if all the uses are in the successor MBB being
+ // sunken into and they are all PHI nodes. In this case, machine-sink must
+ // break the critical edge first.
+ MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
+ SuccToSinkTo, BreakPHIEdge);
+ if (!NewSucc) {
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << ParentBlock->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+ SuccToSinkTo = NewSucc;
+ ++NumSplit;
+ }
+
// Determine where to insert into. Skip phi nodes.
MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index 1e88562..7351119 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -26,6 +26,7 @@
#include "llvm/Function.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -45,14 +46,16 @@ using namespace llvm;
namespace {
struct MachineVerifier {
- MachineVerifier(Pass *pass) :
+ MachineVerifier(Pass *pass, const char *b) :
PASS(pass),
+ Banner(b),
OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
{}
bool runOnMachineFunction(MachineFunction &MF);
Pass *const PASS;
+ const char *Banner;
const char *const OutFileName;
raw_ostream *OS;
const MachineFunction *MF;
@@ -71,6 +74,8 @@ namespace {
RegVector regsDefined, regsDead, regsKilled;
RegSet regsLiveInButUnused;
+ SlotIndex lastIndex;
+
// Add Reg and any sub-registers to RV
void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
RV.push_back(Reg);
@@ -167,7 +172,9 @@ namespace {
// Analysis information if available
LiveVariables *LiveVars;
- const LiveIntervals *LiveInts;
+ LiveIntervals *LiveInts;
+ LiveStacks *LiveStks;
+ SlotIndexes *Indexes;
void visitMachineFunctionBefore();
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
@@ -193,9 +200,12 @@ namespace {
struct MachineVerifierPass : public MachineFunctionPass {
static char ID; // Pass ID, replacement for typeid
+ const char *const Banner;
- MachineVerifierPass()
- : MachineFunctionPass(ID) {}
+ MachineVerifierPass(const char *b = 0)
+ : MachineFunctionPass(ID), Banner(b) {
+ initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -203,7 +213,7 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) {
- MF.verify(this);
+ MF.verify(this, Banner);
return false;
}
};
@@ -212,14 +222,15 @@ namespace {
char MachineVerifierPass::ID = 0;
INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
- "Verify generated machine code", false, false);
+ "Verify generated machine code", false, false)
-FunctionPass *llvm::createMachineVerifierPass() {
- return new MachineVerifierPass();
+FunctionPass *llvm::createMachineVerifierPass(const char *Banner) {
+ return new MachineVerifierPass(Banner);
}
-void MachineFunction::verify(Pass *p) const {
- MachineVerifier(p).runOnMachineFunction(const_cast<MachineFunction&>(*this));
+void MachineFunction::verify(Pass *p, const char *Banner) const {
+ MachineVerifier(p, Banner)
+ .runOnMachineFunction(const_cast<MachineFunction&>(*this));
}
bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
@@ -247,11 +258,15 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
LiveVars = NULL;
LiveInts = NULL;
+ LiveStks = NULL;
+ Indexes = NULL;
if (PASS) {
LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
// We don't want to verify LiveVariables if LiveIntervals is available.
if (!LiveInts)
LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+ LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
+ Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
}
visitMachineFunctionBefore();
@@ -260,6 +275,11 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
visitMachineBasicBlockBefore(MFI);
for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ if (MBBI->getParent() != MFI) {
+ report("Bad instruction parent pointer", MFI);
+ *OS << "Instruction: " << *MBBI;
+ continue;
+ }
visitMachineInstrBefore(MBBI);
for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
visitMachineOperand(&MBBI->getOperand(I), I);
@@ -288,8 +308,11 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
assert(MF);
*OS << '\n';
- if (!foundErrors++)
- MF->print(*OS);
+ if (!foundErrors++) {
+ if (Banner)
+ *OS << "# " << Banner << '\n';
+ MF->print(*OS, Indexes);
+ }
*OS << "*** Bad machine code: " << msg << " ***\n"
<< "- function: " << MF->getFunction()->getNameStr() << "\n";
}
@@ -299,13 +322,19 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
report(msg, MBB->getParent());
*OS << "- basic block: " << MBB->getName()
<< " " << (void*)MBB
- << " (BB#" << MBB->getNumber() << ")\n";
+ << " (BB#" << MBB->getNumber() << ")";
+ if (Indexes)
+ *OS << " [" << Indexes->getMBBStartIdx(MBB)
+ << ';' << Indexes->getMBBEndIdx(MBB) << ')';
+ *OS << '\n';
}
void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
assert(MI);
report(msg, MI->getParent());
*OS << "- instruction: ";
+ if (Indexes && Indexes->hasIndex(MI))
+ *OS << Indexes->getInstructionIndex(MI) << '\t';
MI->print(*OS, TM);
}
@@ -329,6 +358,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
}
void MachineVerifier::visitMachineFunctionBefore() {
+ lastIndex = SlotIndex();
regsReserved = TRI->getReservedRegs(*MF);
// A sub-register of a reserved register is also reserved
@@ -357,6 +387,16 @@ void
MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ // Count the number of landing pad successors.
+ SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ if ((*I)->isLandingPad())
+ LandingPadSuccs.insert(*I);
+ }
+ if (LandingPadSuccs.size() > 1)
+ report("MBB has more than one landing pad successor", MBB);
+
// Call AnalyzeBranch. If it succeeds, there several more conditions to check.
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
@@ -372,14 +412,14 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
// It's possible that the block legitimately ends with a noreturn
// call or an unreachable, in which case it won't actually fall
// out the bottom of the function.
- } else if (MBB->succ_empty()) {
+ } else if (MBB->succ_size() == LandingPadSuccs.size()) {
// It's possible that the block legitimately ends with a noreturn
// call or an unreachable, in which case it won't actuall fall
// out of the block.
- } else if (MBB->succ_size() != 1) {
+ } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
report("MBB exits via unconditional fall-through but doesn't have "
"exactly one CFG successor!", MBB);
- } else if (MBB->succ_begin()[0] != MBBI) {
+ } else if (!MBB->isSuccessor(MBBI)) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
@@ -394,10 +434,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
} else if (TBB && !FBB && Cond.empty()) {
// Block unconditionally branches somewhere.
- if (MBB->succ_size() != 1) {
+ if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
report("MBB exits via unconditional branch but doesn't have "
"exactly one CFG successor!", MBB);
- } else if (MBB->succ_begin()[0] != TBB) {
+ } else if (!MBB->isSuccessor(TBB)) {
report("MBB exits via unconditional branch but the CFG "
"successor doesn't match the actual successor!", MBB);
}
@@ -487,6 +527,9 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
regsKilled.clear();
regsDefined.clear();
+
+ if (Indexes)
+ lastIndex = Indexes->getMBBStartIdx(MBB);
}
void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
@@ -525,6 +568,7 @@ void
MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
const TargetInstrDesc &TI = MI->getDesc();
+ const TargetOperandInfo &TOI = TI.OpInfo[MONum];
// The first TI.NumDefs operands must be explicit register defines
if (MONum < TI.getNumDefs()) {
@@ -535,9 +579,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
else if (MO->isImplicit())
report("Explicit definition marked as implicit", MO, MONum);
} else if (MONum < TI.getNumOperands()) {
- if (MO->isReg()) {
- if (MO->isDef())
- report("Explicit operand marked as def", MO, MONum);
+ // Don't check if it's the last operand in a variadic instruction. See,
+ // e.g., LDM_RET in the arm back end.
+ if (MO->isReg() && !(TI.isVariadic() && MONum == TI.getNumOperands()-1)) {
+ if (MO->isDef() && !TOI.isOptionalDef())
+ report("Explicit operand marked as def", MO, MONum);
if (MO->isImplicit())
report("Explicit operand marked as implicit", MO, MONum);
}
@@ -554,7 +600,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
return;
// Check Live Variables.
- if (MO->isUndef()) {
+ if (MI->isDebugValue()) {
+ // Liveness checks are not valid for debug values.
+ } else if (MO->isUndef()) {
// An <undef> doesn't refer to any register, so just skip it.
} else if (MO->isUse()) {
regsLiveInButUnused.erase(Reg);
@@ -566,7 +614,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
unsigned DefReg = MI->getOperand(defIdx).getReg();
if (Reg == DefReg) {
isKill = true;
- // ANd in that case an explicit kill flag is not allowed.
+ // And in that case an explicit kill flag is not allowed.
if (MO->isKill())
report("Illegal kill flag on two-address instruction operand",
MO, MONum);
@@ -590,7 +638,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
// Check LiveInts liveness and kill.
- if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ LiveInts && !LiveInts->isNotInMIMap(MI)) {
SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
if (LiveInts->hasInterval(Reg)) {
const LiveInterval &LI = LiveInts->getInterval(Reg);
@@ -598,8 +647,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
report("No live range at use", MO, MONum);
*OS << UseIdx << " is not live in " << LI << '\n';
}
- // TODO: Verify isKill == LI.killedAt.
- } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Check for extra kill flags.
+ // Note that we allow missing kill flags for now.
+ if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ } else {
report("Virtual register has no Live interval", MO, MONum);
}
}
@@ -636,11 +690,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
if (LiveInts->hasInterval(Reg)) {
const LiveInterval &LI = LiveInts->getInterval(Reg);
- if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) {
- assert(LR->valno && "NULL valno is not allowed");
- if (LR->valno->def != DefIdx) {
+ if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+ assert(VNI && "NULL valno is not allowed");
+ if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
report("Inconsistent valno->def", MO, MONum);
- *OS << "Valno " << LR->valno->id << " is not defined at "
+ *OS << "Valno " << VNI->id << " is not defined at "
<< DefIdx << " in " << LI << '\n';
}
} else {
@@ -655,7 +709,6 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Check register classes.
if (MONum < TI.getNumOperands() && !MO->isImplicit()) {
- const TargetOperandInfo &TOI = TI.OpInfo[MONum];
unsigned SubIdx = MO->getSubReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -706,6 +759,22 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
report("PHI operand is not in the CFG", MO, MONum);
break;
+ case MachineOperand::MO_FrameIndex:
+ if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
+ LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+ SlotIndex Idx = LiveInts->getInstructionIndex(MI);
+ if (TI.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+ report("Instruction loads from dead spill slot", MO, MONum);
+ *OS << "Live stack: " << LI << '\n';
+ }
+ if (TI.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+ report("Instruction stores to dead spill slot", MO, MONum);
+ *OS << "Live stack: " << LI << '\n';
+ }
+ }
+ break;
+
default:
break;
}
@@ -717,12 +786,31 @@ void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
set_subtract(regsLive, regsKilled); regsKilled.clear();
set_subtract(regsLive, regsDead); regsDead.clear();
set_union(regsLive, regsDefined); regsDefined.clear();
+
+ if (Indexes && Indexes->hasIndex(MI)) {
+ SlotIndex idx = Indexes->getInstructionIndex(MI);
+ if (!(idx > lastIndex)) {
+ report("Instruction index out of order", MI);
+ *OS << "Last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = idx;
+ }
}
void
MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
MBBInfoMap[MBB].regsLiveOut = regsLive;
regsLive.clear();
+
+ if (Indexes) {
+ SlotIndex stop = Indexes->getMBBEndIdx(MBB);
+ if (!(stop > lastIndex)) {
+ report("Block ends before last instruction index", MBB);
+ *OS << "Block ends at " << stop
+ << " last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = stop;
+ }
}
// Calculate the largest possible vregsPassed sets. These are the registers that
@@ -854,8 +942,8 @@ void MachineVerifier::visitMachineFunctionAfter() {
void MachineVerifier::verifyLiveVariables() {
assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
- for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
- RegE = MRI->getLastVirtReg()-1; Reg != RegE; ++Reg) {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
MFI != MFE; ++MFI) {
@@ -865,13 +953,13 @@ void MachineVerifier::verifyLiveVariables() {
if (MInfo.vregsRequired.count(Reg)) {
if (!VI.AliveBlocks.test(MFI->getNumber())) {
report("LiveVariables: Block missing from AliveBlocks", MFI);
- *OS << "Virtual register %reg" << Reg
+ *OS << "Virtual register " << PrintReg(Reg)
<< " must be live through the block.\n";
}
} else {
if (VI.AliveBlocks.test(MFI->getNumber())) {
report("LiveVariables: Block should not be in AliveBlocks", MFI);
- *OS << "Virtual register %reg" << Reg
+ *OS << "Virtual register " << PrintReg(Reg)
<< " is not needed live through the block.\n";
}
}
@@ -884,14 +972,24 @@ void MachineVerifier::verifyLiveIntervals() {
for (LiveIntervals::const_iterator LVI = LiveInts->begin(),
LVE = LiveInts->end(); LVI != LVE; ++LVI) {
const LiveInterval &LI = *LVI->second;
+
+ // Spilling and splitting may leave unused registers around. Skip them.
+ if (MRI->use_empty(LI.reg))
+ continue;
+
+ // Physical registers have much weirdness going on, mostly from coalescing.
+ // We should probably fix it, but for now just ignore them.
+ if (TargetRegisterInfo::isPhysicalRegister(LI.reg))
+ continue;
+
assert(LVI->first == LI.reg && "Invalid reg to interval mapping");
for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
I!=E; ++I) {
VNInfo *VNI = *I;
- const LiveRange *DefLR = LI.getLiveRangeContaining(VNI->def);
+ const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
- if (!DefLR) {
+ if (!DefVNI) {
if (!VNI->isUnused()) {
report("Valno not live at def and not marked unused", MF);
*OS << "Valno #" << VNI->id << " in " << LI << '\n';
@@ -902,31 +1000,216 @@ void MachineVerifier::verifyLiveIntervals() {
if (VNI->isUnused())
continue;
- if (DefLR->valno != VNI) {
+ if (DefVNI != VNI) {
report("Live range at def has different valno", MF);
- DefLR->print(*OS);
- *OS << " should use valno #" << VNI->id << " in " << LI << '\n';
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
+ continue;
}
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+ if (!MBB) {
+ report("Invalid definition index", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ continue;
+ }
+
+ if (VNI->isPHIDef()) {
+ if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+ report("PHIDef value is not defined at MBB start", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << ", not at the beginning of BB#" << MBB->getNumber()
+ << " in " << LI << '\n';
+ }
+ } else {
+ // Non-PHI def.
+ const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+ if (!MI) {
+ report("No instruction at def index", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ } else if (!MI->modifiesRegister(LI.reg, TRI)) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
+
+ bool isEarlyClobber = false;
+ if (MI) {
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() &&
+ MOI->isEarlyClobber()) {
+ isEarlyClobber = true;
+ break;
+ }
+ }
+ }
+
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isUse()) {
+ report("Early clobber def must be at a USE slot", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ }
+ } else if (!VNI->def.isDef()) {
+ report("Non-PHI, non-early clobber def must be at a DEF slot", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ }
+ }
}
for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
- const LiveRange &LR = *I;
- assert(LR.valno && "Live range has no valno");
+ const VNInfo *VNI = I->valno;
+ assert(VNI && "Live range has no valno");
- if (LR.valno->id >= LI.getNumValNums() ||
- LR.valno != LI.getValNumInfo(LR.valno->id)) {
+ if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
report("Foreign valno in live range", MF);
- LR.print(*OS);
+ I->print(*OS);
*OS << " has a valno not in " << LI << '\n';
}
- if (LR.valno->isUnused()) {
+ if (VNI->isUnused()) {
report("Live range valno is marked unused", MF);
- LR.print(*OS);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+ if (!MBB) {
+ report("Bad start of live segment, no basic block", MF);
+ I->print(*OS);
*OS << " in " << LI << '\n';
+ continue;
+ }
+ SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+ if (I->start != MBBStartIdx && I->start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB);
+ I->print(*OS);
+ *OS << " in " << LI << '\n' << "Basic block starts at "
+ << MBBStartIdx << '\n';
+ }
+
+ const MachineBasicBlock *EndMBB =
+ LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+ if (!EndMBB) {
+ report("Bad end of live segment, no basic block", MF);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ continue;
+ }
+ if (I->end != LiveInts->getMBBEndIdx(EndMBB)) {
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB);
+ I->print(*OS);
+ *OS << " in " << LI << '\n' << "Basic block starts at "
+ << MBBStartIdx << '\n';
+ } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+ !MI->readsVirtualRegister(LI.reg)) {
+ // A live range can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ // FIXME: Should we check for each of these?
+ bool hasDeadDef = false;
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) {
+ hasDeadDef = true;
+ break;
+ }
+ }
+
+ if (!hasDeadDef) {
+ report("Instruction killing live segment neither defines nor reads "
+ "register", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+ }
+ }
+
+ // Now check all the basic blocks in this live segment.
+ MachineFunction::const_iterator MFI = MBB;
+ // Is this live range the beginning of a non-PHIDef VN?
+ if (I->start == VNI->def && !VNI->isPHIDef()) {
+ // Not live-in to any blocks.
+ if (MBB == EndMBB)
+ continue;
+ // Skip this block.
+ ++MFI;
+ }
+ for (;;) {
+ assert(LiveInts->isLiveInToMBB(LI, MFI));
+ // We don't know how to track physregs into a landing pad.
+ if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
+ MFI->isLandingPad()) {
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ continue;
+ }
+ // Check that VNI is live-out of all predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+ PE = MFI->pred_end(); PI != PE; ++PI) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
+ const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
+
+ if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) {
+ if (PVNI && !PVNI->hasPHIKill()) {
+ report("Value live out of predecessor doesn't have PHIKill", MF);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << " doesn't have PHIKill, but Valno #" << VNI->id
+ << " is PHIDef and defined at the beginning of BB#"
+ << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI)
+ << " in " << LI << '\n';
+ }
+ continue;
+ }
+
+ if (!PVNI) {
+ report("Register not marked live out of predecessor", *PI);
+ *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at "
+ << PEnd << " in " << LI << '\n';
+ continue;
+ }
+
+ if (PVNI != VNI) {
+ report("Different value live out of predecessor", *PI);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n';
+ }
+ }
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
}
+ }
+ // Check the LI only has one connected component.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF);
+ *OS << NumComp << " components in " << LI << '\n';
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ *OS << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ *OS << ' ' << (*I)->id;
+ *OS << '\n';
+ }
+ }
}
}
}
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
index edb4eea..c05be13 100644
--- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -33,7 +33,9 @@ namespace {
public:
static char ID; // Pass identification
- OptimizePHIs() : MachineFunctionPass(ID) {}
+ OptimizePHIs() : MachineFunctionPass(ID) {
+ initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -55,7 +57,7 @@ namespace {
char OptimizePHIs::ID = 0;
INITIALIZE_PASS(OptimizePHIs, "opt-phis",
- "Optimize machine instruction PHIs", false, false);
+ "Optimize machine instruction PHIs", false, false)
FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
diff --git a/contrib/llvm/lib/CodeGen/PBQP/Graph.h b/contrib/llvm/lib/CodeGen/PBQP/Graph.h
deleted file mode 100644
index b2224cb..0000000
--- a/contrib/llvm/lib/CodeGen/PBQP/Graph.h
+++ /dev/null
@@ -1,425 +0,0 @@
-//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// PBQP Graph class.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_CODEGEN_PBQP_GRAPH_H
-#define LLVM_CODEGEN_PBQP_GRAPH_H
-
-#include "Math.h"
-
-#include <list>
-#include <vector>
-#include <map>
-
-namespace PBQP {
-
- /// PBQP Graph class.
- /// Instances of this class describe PBQP problems.
- class Graph {
- private:
-
- // ----- TYPEDEFS -----
- class NodeEntry;
- class EdgeEntry;
-
- typedef std::list<NodeEntry> NodeList;
- typedef std::list<EdgeEntry> EdgeList;
-
- public:
-
- typedef NodeList::iterator NodeItr;
- typedef NodeList::const_iterator ConstNodeItr;
-
- typedef EdgeList::iterator EdgeItr;
- typedef EdgeList::const_iterator ConstEdgeItr;
-
- private:
-
- typedef std::list<EdgeItr> AdjEdgeList;
-
- public:
-
- typedef AdjEdgeList::iterator AdjEdgeItr;
-
- private:
-
- class NodeEntry {
- private:
- Vector costs;
- AdjEdgeList adjEdges;
- unsigned degree;
- void *data;
- public:
- NodeEntry(const Vector &costs) : costs(costs), degree(0) {}
- Vector& getCosts() { return costs; }
- const Vector& getCosts() const { return costs; }
- unsigned getDegree() const { return degree; }
- AdjEdgeItr edgesBegin() { return adjEdges.begin(); }
- AdjEdgeItr edgesEnd() { return adjEdges.end(); }
- AdjEdgeItr addEdge(EdgeItr e) {
- ++degree;
- return adjEdges.insert(adjEdges.end(), e);
- }
- void removeEdge(AdjEdgeItr ae) {
- --degree;
- adjEdges.erase(ae);
- }
- void setData(void *data) { this->data = data; }
- void* getData() { return data; }
- };
-
- class EdgeEntry {
- private:
- NodeItr node1, node2;
- Matrix costs;
- AdjEdgeItr node1AEItr, node2AEItr;
- void *data;
- public:
- EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs)
- : node1(node1), node2(node2), costs(costs) {}
- NodeItr getNode1() const { return node1; }
- NodeItr getNode2() const { return node2; }
- Matrix& getCosts() { return costs; }
- const Matrix& getCosts() const { return costs; }
- void setNode1AEItr(AdjEdgeItr ae) { node1AEItr = ae; }
- AdjEdgeItr getNode1AEItr() { return node1AEItr; }
- void setNode2AEItr(AdjEdgeItr ae) { node2AEItr = ae; }
- AdjEdgeItr getNode2AEItr() { return node2AEItr; }
- void setData(void *data) { this->data = data; }
- void *getData() { return data; }
- };
-
- // ----- MEMBERS -----
-
- NodeList nodes;
- unsigned numNodes;
-
- EdgeList edges;
- unsigned numEdges;
-
- // ----- INTERNAL METHODS -----
-
- NodeEntry& getNode(NodeItr nItr) { return *nItr; }
- const NodeEntry& getNode(ConstNodeItr nItr) const { return *nItr; }
-
- EdgeEntry& getEdge(EdgeItr eItr) { return *eItr; }
- const EdgeEntry& getEdge(ConstEdgeItr eItr) const { return *eItr; }
-
- NodeItr addConstructedNode(const NodeEntry &n) {
- ++numNodes;
- return nodes.insert(nodes.end(), n);
- }
-
- EdgeItr addConstructedEdge(const EdgeEntry &e) {
- assert(findEdge(e.getNode1(), e.getNode2()) == edges.end() &&
- "Attempt to add duplicate edge.");
- ++numEdges;
- EdgeItr edgeItr = edges.insert(edges.end(), e);
- EdgeEntry &ne = getEdge(edgeItr);
- NodeEntry &n1 = getNode(ne.getNode1());
- NodeEntry &n2 = getNode(ne.getNode2());
- // Sanity check on matrix dimensions:
- assert((n1.getCosts().getLength() == ne.getCosts().getRows()) &&
- (n2.getCosts().getLength() == ne.getCosts().getCols()) &&
- "Edge cost dimensions do not match node costs dimensions.");
- ne.setNode1AEItr(n1.addEdge(edgeItr));
- ne.setNode2AEItr(n2.addEdge(edgeItr));
- return edgeItr;
- }
-
- inline void copyFrom(const Graph &other);
- public:
-
- /// \brief Construct an empty PBQP graph.
- Graph() : numNodes(0), numEdges(0) {}
-
- /// \brief Copy construct this graph from "other". Note: Does not copy node
- /// and edge data, only graph structure and costs.
- /// @param other Source graph to copy from.
- Graph(const Graph &other) : numNodes(0), numEdges(0) {
- copyFrom(other);
- }
-
- /// \brief Make this graph a copy of "other". Note: Does not copy node and
- /// edge data, only graph structure and costs.
- /// @param other The graph to copy from.
- /// @return A reference to this graph.
- ///
- /// This will clear the current graph, erasing any nodes and edges added,
- /// before copying from other.
- Graph& operator=(const Graph &other) {
- clear();
- copyFrom(other);
- return *this;
- }
-
- /// \brief Add a node with the given costs.
- /// @param costs Cost vector for the new node.
- /// @return Node iterator for the added node.
- NodeItr addNode(const Vector &costs) {
- return addConstructedNode(NodeEntry(costs));
- }
-
- /// \brief Add an edge between the given nodes with the given costs.
- /// @param n1Itr First node.
- /// @param n2Itr Second node.
- /// @return Edge iterator for the added edge.
- EdgeItr addEdge(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr,
- const Matrix &costs) {
- assert(getNodeCosts(n1Itr).getLength() == costs.getRows() &&
- getNodeCosts(n2Itr).getLength() == costs.getCols() &&
- "Matrix dimensions mismatch.");
- return addConstructedEdge(EdgeEntry(n1Itr, n2Itr, costs));
- }
-
- /// \brief Get the number of nodes in the graph.
- /// @return Number of nodes in the graph.
- unsigned getNumNodes() const { return numNodes; }
-
- /// \brief Get the number of edges in the graph.
- /// @return Number of edges in the graph.
- unsigned getNumEdges() const { return numEdges; }
-
- /// \brief Get a node's cost vector.
- /// @param nItr Node iterator.
- /// @return Node cost vector.
- Vector& getNodeCosts(NodeItr nItr) { return getNode(nItr).getCosts(); }
-
- /// \brief Get a node's cost vector (const version).
- /// @param nItr Node iterator.
- /// @return Node cost vector.
- const Vector& getNodeCosts(ConstNodeItr nItr) const {
- return getNode(nItr).getCosts();
- }
-
- /// \brief Set a node's data pointer.
- /// @param nItr Node iterator.
- /// @param data Pointer to node data.
- ///
- /// Typically used by a PBQP solver to attach data to aid in solution.
- void setNodeData(NodeItr nItr, void *data) { getNode(nItr).setData(data); }
-
- /// \brief Get the node's data pointer.
- /// @param nItr Node iterator.
- /// @return Pointer to node data.
- void* getNodeData(NodeItr nItr) { return getNode(nItr).getData(); }
-
- /// \brief Get an edge's cost matrix.
- /// @param eItr Edge iterator.
- /// @return Edge cost matrix.
- Matrix& getEdgeCosts(EdgeItr eItr) { return getEdge(eItr).getCosts(); }
-
- /// \brief Get an edge's cost matrix (const version).
- /// @param eItr Edge iterator.
- /// @return Edge cost matrix.
- const Matrix& getEdgeCosts(ConstEdgeItr eItr) const {
- return getEdge(eItr).getCosts();
- }
-
- /// \brief Set an edge's data pointer.
- /// @param eItr Edge iterator.
- /// @param data Pointer to edge data.
- ///
- /// Typically used by a PBQP solver to attach data to aid in solution.
- void setEdgeData(EdgeItr eItr, void *data) { getEdge(eItr).setData(data); }
-
- /// \brief Get an edge's data pointer.
- /// @param eItr Edge iterator.
- /// @return Pointer to edge data.
- void* getEdgeData(EdgeItr eItr) { return getEdge(eItr).getData(); }
-
- /// \brief Get a node's degree.
- /// @param nItr Node iterator.
- /// @return The degree of the node.
- unsigned getNodeDegree(NodeItr nItr) const {
- return getNode(nItr).getDegree();
- }
-
- /// \brief Begin iterator for node set.
- NodeItr nodesBegin() { return nodes.begin(); }
-
- /// \brief Begin const iterator for node set.
- ConstNodeItr nodesBegin() const { return nodes.begin(); }
-
- /// \brief End iterator for node set.
- NodeItr nodesEnd() { return nodes.end(); }
-
- /// \brief End const iterator for node set.
- ConstNodeItr nodesEnd() const { return nodes.end(); }
-
- /// \brief Begin iterator for edge set.
- EdgeItr edgesBegin() { return edges.begin(); }
-
- /// \brief End iterator for edge set.
- EdgeItr edgesEnd() { return edges.end(); }
-
- /// \brief Get begin iterator for adjacent edge set.
- /// @param nItr Node iterator.
- /// @return Begin iterator for the set of edges connected to the given node.
- AdjEdgeItr adjEdgesBegin(NodeItr nItr) {
- return getNode(nItr).edgesBegin();
- }
-
- /// \brief Get end iterator for adjacent edge set.
- /// @param nItr Node iterator.
- /// @return End iterator for the set of edges connected to the given node.
- AdjEdgeItr adjEdgesEnd(NodeItr nItr) {
- return getNode(nItr).edgesEnd();
- }
-
- /// \brief Get the first node connected to this edge.
- /// @param eItr Edge iterator.
- /// @return The first node connected to the given edge.
- NodeItr getEdgeNode1(EdgeItr eItr) {
- return getEdge(eItr).getNode1();
- }
-
- /// \brief Get the second node connected to this edge.
- /// @param eItr Edge iterator.
- /// @return The second node connected to the given edge.
- NodeItr getEdgeNode2(EdgeItr eItr) {
- return getEdge(eItr).getNode2();
- }
-
- /// \brief Get the "other" node connected to this edge.
- /// @param eItr Edge iterator.
- /// @param nItr Node iterator for the "given" node.
- /// @return The iterator for the "other" node connected to this edge.
- NodeItr getEdgeOtherNode(EdgeItr eItr, NodeItr nItr) {
- EdgeEntry &e = getEdge(eItr);
- if (e.getNode1() == nItr) {
- return e.getNode2();
- } // else
- return e.getNode1();
- }
-
- /// \brief Get the edge connecting two nodes.
- /// @param n1Itr First node iterator.
- /// @param n2Itr Second node iterator.
- /// @return An iterator for edge (n1Itr, n2Itr) if such an edge exists,
- /// otherwise returns edgesEnd().
- EdgeItr findEdge(NodeItr n1Itr, NodeItr n2Itr) {
- for (AdjEdgeItr aeItr = adjEdgesBegin(n1Itr), aeEnd = adjEdgesEnd(n1Itr);
- aeItr != aeEnd; ++aeItr) {
- if ((getEdgeNode1(*aeItr) == n2Itr) ||
- (getEdgeNode2(*aeItr) == n2Itr)) {
- return *aeItr;
- }
- }
- return edges.end();
- }
-
- /// \brief Remove a node from the graph.
- /// @param nItr Node iterator.
- void removeNode(NodeItr nItr) {
- NodeEntry &n = getNode(nItr);
- for (AdjEdgeItr itr = n.edgesBegin(), end = n.edgesEnd(); itr != end;) {
- EdgeItr eItr = *itr;
- ++itr;
- removeEdge(eItr);
- }
- nodes.erase(nItr);
- --numNodes;
- }
-
- /// \brief Remove an edge from the graph.
- /// @param eItr Edge iterator.
- void removeEdge(EdgeItr eItr) {
- EdgeEntry &e = getEdge(eItr);
- NodeEntry &n1 = getNode(e.getNode1());
- NodeEntry &n2 = getNode(e.getNode2());
- n1.removeEdge(e.getNode1AEItr());
- n2.removeEdge(e.getNode2AEItr());
- edges.erase(eItr);
- --numEdges;
- }
-
- /// \brief Remove all nodes and edges from the graph.
- void clear() {
- nodes.clear();
- edges.clear();
- numNodes = numEdges = 0;
- }
-
- /// \brief Print a representation of this graph in DOT format.
- /// @param os Output stream to print on.
- template <typename OStream>
- void printDot(OStream &os) {
-
- os << "graph {\n";
-
- for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd();
- nodeItr != nodeEnd; ++nodeItr) {
-
- os << " node" << nodeItr << " [ label=\""
- << nodeItr << ": " << getNodeCosts(nodeItr) << "\" ]\n";
- }
-
- os << " edge [ len=" << getNumNodes() << " ]\n";
-
- for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd();
- edgeItr != edgeEnd; ++edgeItr) {
-
- os << " node" << getEdgeNode1(edgeItr)
- << " -- node" << getEdgeNode2(edgeItr)
- << " [ label=\"";
-
- const Matrix &edgeCosts = getEdgeCosts(edgeItr);
-
- for (unsigned i = 0; i < edgeCosts.getRows(); ++i) {
- os << edgeCosts.getRowAsVector(i) << "\\n";
- }
- os << "\" ]\n";
- }
- os << "}\n";
- }
-
- };
-
- class NodeItrComparator {
- public:
- bool operator()(Graph::NodeItr n1, Graph::NodeItr n2) const {
- return &*n1 < &*n2;
- }
-
- bool operator()(Graph::ConstNodeItr n1, Graph::ConstNodeItr n2) const {
- return &*n1 < &*n2;
- }
- };
-
- class EdgeItrCompartor {
- public:
- bool operator()(Graph::EdgeItr e1, Graph::EdgeItr e2) const {
- return &*e1 < &*e2;
- }
-
- bool operator()(Graph::ConstEdgeItr e1, Graph::ConstEdgeItr e2) const {
- return &*e1 < &*e2;
- }
- };
-
- void Graph::copyFrom(const Graph &other) {
- std::map<Graph::ConstNodeItr, Graph::NodeItr,
- NodeItrComparator> nodeMap;
-
- for (Graph::ConstNodeItr nItr = other.nodesBegin(),
- nEnd = other.nodesEnd();
- nItr != nEnd; ++nItr) {
- nodeMap[nItr] = addNode(other.getNodeCosts(nItr));
- }
-
- }
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_GRAPH_HPP
diff --git a/contrib/llvm/lib/CodeGen/PBQP/HeuristicBase.h b/contrib/llvm/lib/CodeGen/PBQP/HeuristicBase.h
deleted file mode 100644
index 791c227..0000000
--- a/contrib/llvm/lib/CodeGen/PBQP/HeuristicBase.h
+++ /dev/null
@@ -1,246 +0,0 @@
-//===-- HeuristcBase.h --- Heuristic base class for PBQP --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_HEURISTICBASE_H
-#define LLVM_CODEGEN_PBQP_HEURISTICBASE_H
-
-#include "HeuristicSolver.h"
-
-namespace PBQP {
-
- /// \brief Abstract base class for heuristic implementations.
- ///
- /// This class provides a handy base for heuristic implementations with common
- /// solver behaviour implemented for a number of methods.
- ///
- /// To implement your own heuristic using this class as a base you'll have to
- /// implement, as a minimum, the following methods:
- /// <ul>
- /// <li> void addToHeuristicList(Graph::NodeItr) : Add a node to the
- /// heuristic reduction list.
- /// <li> void heuristicReduce() : Perform a single heuristic reduction.
- /// <li> void preUpdateEdgeCosts(Graph::EdgeItr) : Handle the (imminent)
- /// change to the cost matrix on the given edge (by R2).
- /// <li> void postUpdateEdgeCostts(Graph::EdgeItr) : Handle the new
- /// costs on the given edge.
- /// <li> void handleAddEdge(Graph::EdgeItr) : Handle the addition of a new
- /// edge into the PBQP graph (by R2).
- /// <li> void handleRemoveEdge(Graph::EdgeItr, Graph::NodeItr) : Handle the
- /// disconnection of the given edge from the given node.
- /// <li> A constructor for your derived class : to pass back a reference to
- /// the solver which is using this heuristic.
- /// </ul>
- ///
- /// These methods are implemented in this class for documentation purposes,
- /// but will assert if called.
- ///
- /// Note that this class uses the curiously recursive template idiom to
- /// forward calls to the derived class. These methods need not be made
- /// virtual, and indeed probably shouldn't for performance reasons.
- ///
- /// You'll also need to provide NodeData and EdgeData structs in your class.
- /// These can be used to attach data relevant to your heuristic to each
- /// node/edge in the PBQP graph.
-
- template <typename HImpl>
- class HeuristicBase {
- private:
-
- typedef std::list<Graph::NodeItr> OptimalList;
-
- HeuristicSolverImpl<HImpl> &s;
- Graph &g;
- OptimalList optimalList;
-
- // Return a reference to the derived heuristic.
- HImpl& impl() { return static_cast<HImpl&>(*this); }
-
- // Add the given node to the optimal reductions list. Keep an iterator to
- // its location for fast removal.
- void addToOptimalReductionList(Graph::NodeItr nItr) {
- optimalList.insert(optimalList.end(), nItr);
- }
-
- public:
-
- /// \brief Construct an instance with a reference to the given solver.
- /// @param solver The solver which is using this heuristic instance.
- HeuristicBase(HeuristicSolverImpl<HImpl> &solver)
- : s(solver), g(s.getGraph()) { }
-
- /// \brief Get the solver which is using this heuristic instance.
- /// @return The solver which is using this heuristic instance.
- ///
- /// You can use this method to get access to the solver in your derived
- /// heuristic implementation.
- HeuristicSolverImpl<HImpl>& getSolver() { return s; }
-
- /// \brief Get the graph representing the problem to be solved.
- /// @return The graph representing the problem to be solved.
- Graph& getGraph() { return g; }
-
- /// \brief Tell the solver to simplify the graph before the reduction phase.
- /// @return Whether or not the solver should run a simplification phase
- /// prior to the main setup and reduction.
- ///
- /// HeuristicBase returns true from this method as it's a sensible default,
- /// however you can over-ride it in your derived class if you want different
- /// behaviour.
- bool solverRunSimplify() const { return true; }
-
- /// \brief Decide whether a node should be optimally or heuristically
- /// reduced.
- /// @return Whether or not the given node should be listed for optimal
- /// reduction (via R0, R1 or R2).
- ///
- /// HeuristicBase returns true for any node with degree less than 3. This is
- /// sane and sensible for many situations, but not all. You can over-ride
- /// this method in your derived class if you want a different selection
- /// criteria. Note however that your criteria for selecting optimal nodes
- /// should be <i>at least</i> as strong as this. I.e. Nodes of degree 3 or
- /// higher should not be selected under any circumstances.
- bool shouldOptimallyReduce(Graph::NodeItr nItr) {
- if (g.getNodeDegree(nItr) < 3)
- return true;
- // else
- return false;
- }
-
- /// \brief Add the given node to the list of nodes to be optimally reduced.
- /// @return nItr Node iterator to be added.
- ///
- /// You probably don't want to over-ride this, except perhaps to record
- /// statistics before calling this implementation. HeuristicBase relies on
- /// its behaviour.
- void addToOptimalReduceList(Graph::NodeItr nItr) {
- optimalList.push_back(nItr);
- }
-
- /// \brief Initialise the heuristic.
- ///
- /// HeuristicBase iterates over all nodes in the problem and adds them to
- /// the appropriate list using addToOptimalReduceList or
- /// addToHeuristicReduceList based on the result of shouldOptimallyReduce.
- ///
- /// This behaviour should be fine for most situations.
- void setup() {
- for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
- nItr != nEnd; ++nItr) {
- if (impl().shouldOptimallyReduce(nItr)) {
- addToOptimalReduceList(nItr);
- } else {
- impl().addToHeuristicReduceList(nItr);
- }
- }
- }
-
- /// \brief Optimally reduce one of the nodes in the optimal reduce list.
- /// @return True if a reduction takes place, false if the optimal reduce
- /// list is empty.
- ///
- /// Selects a node from the optimal reduce list and removes it, applying
- /// R0, R1 or R2 as appropriate based on the selected node's degree.
- bool optimalReduce() {
- if (optimalList.empty())
- return false;
-
- Graph::NodeItr nItr = optimalList.front();
- optimalList.pop_front();
-
- switch (s.getSolverDegree(nItr)) {
- case 0: s.applyR0(nItr); break;
- case 1: s.applyR1(nItr); break;
- case 2: s.applyR2(nItr); break;
- default: assert(false &&
- "Optimal reductions of degree > 2 nodes is invalid.");
- }
-
- return true;
- }
-
- /// \brief Perform the PBQP reduction process.
- ///
- /// Reduces the problem to the empty graph by repeated application of the
- /// reduction rules R0, R1, R2 and RN.
- /// R0, R1 or R2 are always applied if possible before RN is used.
- void reduce() {
- bool finished = false;
-
- while (!finished) {
- if (!optimalReduce()) {
- if (impl().heuristicReduce()) {
- getSolver().recordRN();
- } else {
- finished = true;
- }
- }
- }
- }
-
- /// \brief Add a node to the heuristic reduce list.
- /// @param nItr Node iterator to add to the heuristic reduce list.
- void addToHeuristicList(Graph::NodeItr nItr) {
- assert(false && "Must be implemented in derived class.");
- }
-
- /// \brief Heuristically reduce one of the nodes in the heuristic
- /// reduce list.
- /// @return True if a reduction takes place, false if the heuristic reduce
- /// list is empty.
- void heuristicReduce() {
- assert(false && "Must be implemented in derived class.");
- }
-
- /// \brief Prepare a change in the costs on the given edge.
- /// @param eItr Edge iterator.
- void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
- assert(false && "Must be implemented in derived class.");
- }
-
- /// \brief Handle the change in the costs on the given edge.
- /// @param eItr Edge iterator.
- void postUpdateEdgeCostts(Graph::EdgeItr eItr) {
- assert(false && "Must be implemented in derived class.");
- }
-
- /// \brief Handle the addition of a new edge into the PBQP graph.
- /// @param eItr Edge iterator for the added edge.
- void handleAddEdge(Graph::EdgeItr eItr) {
- assert(false && "Must be implemented in derived class.");
- }
-
- /// \brief Handle disconnection of an edge from a node.
- /// @param eItr Edge iterator for edge being disconnected.
- /// @param nItr Node iterator for the node being disconnected from.
- ///
- /// Edges are frequently removed due to the removal of a node. This
- /// method allows for the effect to be computed only for the remaining
- /// node in the graph.
- void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
- assert(false && "Must be implemented in derived class.");
- }
-
- /// \brief Clean up any structures used by HeuristicBase.
- ///
- /// At present this just performs a sanity check: that the optimal reduce
- /// list is empty now that reduction has completed.
- ///
- /// If your derived class has more complex structures which need tearing
- /// down you should over-ride this method but include a call back to this
- /// implementation.
- void cleanup() {
- assert(optimalList.empty() && "Nodes left over in optimal reduce list?");
- }
-
- };
-
-}
-
-
-#endif // LLVM_CODEGEN_PBQP_HEURISTICBASE_H
diff --git a/contrib/llvm/lib/CodeGen/PBQP/HeuristicSolver.h b/contrib/llvm/lib/CodeGen/PBQP/HeuristicSolver.h
deleted file mode 100644
index 35514f9..0000000
--- a/contrib/llvm/lib/CodeGen/PBQP/HeuristicSolver.h
+++ /dev/null
@@ -1,616 +0,0 @@
-//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Heuristic PBQP solver. This solver is able to perform optimal reductions for
-// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is
-// used to select a node for reduction.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
-#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
-
-#include "Graph.h"
-#include "Solution.h"
-#include <vector>
-#include <limits>
-
-namespace PBQP {
-
- /// \brief Heuristic PBQP solver implementation.
- ///
- /// This class should usually be created (and destroyed) indirectly via a call
- /// to HeuristicSolver<HImpl>::solve(Graph&).
- /// See the comments for HeuristicSolver.
- ///
- /// HeuristicSolverImpl provides the R0, R1 and R2 reduction rules,
- /// backpropagation phase, and maintains the internal copy of the graph on
- /// which the reduction is carried out (the original being kept to facilitate
- /// backpropagation).
- template <typename HImpl>
- class HeuristicSolverImpl {
- private:
-
- typedef typename HImpl::NodeData HeuristicNodeData;
- typedef typename HImpl::EdgeData HeuristicEdgeData;
-
- typedef std::list<Graph::EdgeItr> SolverEdges;
-
- public:
-
- /// \brief Iterator type for edges in the solver graph.
- typedef SolverEdges::iterator SolverEdgeItr;
-
- private:
-
- class NodeData {
- public:
- NodeData() : solverDegree(0) {}
-
- HeuristicNodeData& getHeuristicData() { return hData; }
-
- SolverEdgeItr addSolverEdge(Graph::EdgeItr eItr) {
- ++solverDegree;
- return solverEdges.insert(solverEdges.end(), eItr);
- }
-
- void removeSolverEdge(SolverEdgeItr seItr) {
- --solverDegree;
- solverEdges.erase(seItr);
- }
-
- SolverEdgeItr solverEdgesBegin() { return solverEdges.begin(); }
- SolverEdgeItr solverEdgesEnd() { return solverEdges.end(); }
- unsigned getSolverDegree() const { return solverDegree; }
- void clearSolverEdges() {
- solverDegree = 0;
- solverEdges.clear();
- }
-
- private:
- HeuristicNodeData hData;
- unsigned solverDegree;
- SolverEdges solverEdges;
- };
-
- class EdgeData {
- public:
- HeuristicEdgeData& getHeuristicData() { return hData; }
-
- void setN1SolverEdgeItr(SolverEdgeItr n1SolverEdgeItr) {
- this->n1SolverEdgeItr = n1SolverEdgeItr;
- }
-
- SolverEdgeItr getN1SolverEdgeItr() { return n1SolverEdgeItr; }
-
- void setN2SolverEdgeItr(SolverEdgeItr n2SolverEdgeItr){
- this->n2SolverEdgeItr = n2SolverEdgeItr;
- }
-
- SolverEdgeItr getN2SolverEdgeItr() { return n2SolverEdgeItr; }
-
- private:
-
- HeuristicEdgeData hData;
- SolverEdgeItr n1SolverEdgeItr, n2SolverEdgeItr;
- };
-
- Graph &g;
- HImpl h;
- Solution s;
- std::vector<Graph::NodeItr> stack;
-
- typedef std::list<NodeData> NodeDataList;
- NodeDataList nodeDataList;
-
- typedef std::list<EdgeData> EdgeDataList;
- EdgeDataList edgeDataList;
-
- public:
-
- /// \brief Construct a heuristic solver implementation to solve the given
- /// graph.
- /// @param g The graph representing the problem instance to be solved.
- HeuristicSolverImpl(Graph &g) : g(g), h(*this) {}
-
- /// \brief Get the graph being solved by this solver.
- /// @return The graph representing the problem instance being solved by this
- /// solver.
- Graph& getGraph() { return g; }
-
- /// \brief Get the heuristic data attached to the given node.
- /// @param nItr Node iterator.
- /// @return The heuristic data attached to the given node.
- HeuristicNodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
- return getSolverNodeData(nItr).getHeuristicData();
- }
-
- /// \brief Get the heuristic data attached to the given edge.
- /// @param eItr Edge iterator.
- /// @return The heuristic data attached to the given node.
- HeuristicEdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
- return getSolverEdgeData(eItr).getHeuristicData();
- }
-
- /// \brief Begin iterator for the set of edges adjacent to the given node in
- /// the solver graph.
- /// @param nItr Node iterator.
- /// @return Begin iterator for the set of edges adjacent to the given node
- /// in the solver graph.
- SolverEdgeItr solverEdgesBegin(Graph::NodeItr nItr) {
- return getSolverNodeData(nItr).solverEdgesBegin();
- }
-
- /// \brief End iterator for the set of edges adjacent to the given node in
- /// the solver graph.
- /// @param nItr Node iterator.
- /// @return End iterator for the set of edges adjacent to the given node in
- /// the solver graph.
- SolverEdgeItr solverEdgesEnd(Graph::NodeItr nItr) {
- return getSolverNodeData(nItr).solverEdgesEnd();
- }
-
- /// \brief Remove a node from the solver graph.
- /// @param eItr Edge iterator for edge to be removed.
- ///
- /// Does <i>not</i> notify the heuristic of the removal. That should be
- /// done manually if necessary.
- void removeSolverEdge(Graph::EdgeItr eItr) {
- EdgeData &eData = getSolverEdgeData(eItr);
- NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
- &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
-
- n1Data.removeSolverEdge(eData.getN1SolverEdgeItr());
- n2Data.removeSolverEdge(eData.getN2SolverEdgeItr());
- }
-
- /// \brief Compute a solution to the PBQP problem instance with which this
- /// heuristic solver was constructed.
- /// @return A solution to the PBQP problem.
- ///
- /// Performs the full PBQP heuristic solver algorithm, including setup,
- /// calls to the heuristic (which will call back to the reduction rules in
- /// this class), and cleanup.
- Solution computeSolution() {
- setup();
- h.setup();
- h.reduce();
- backpropagate();
- h.cleanup();
- cleanup();
- return s;
- }
-
- /// \brief Add to the end of the stack.
- /// @param nItr Node iterator to add to the reduction stack.
- void pushToStack(Graph::NodeItr nItr) {
- getSolverNodeData(nItr).clearSolverEdges();
- stack.push_back(nItr);
- }
-
- /// \brief Returns the solver degree of the given node.
- /// @param nItr Node iterator for which degree is requested.
- /// @return Node degree in the <i>solver</i> graph (not the original graph).
- unsigned getSolverDegree(Graph::NodeItr nItr) {
- return getSolverNodeData(nItr).getSolverDegree();
- }
-
- /// \brief Set the solution of the given node.
- /// @param nItr Node iterator to set solution for.
- /// @param selection Selection for node.
- void setSolution(const Graph::NodeItr &nItr, unsigned selection) {
- s.setSelection(nItr, selection);
-
- for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
- aeEnd = g.adjEdgesEnd(nItr);
- aeItr != aeEnd; ++aeItr) {
- Graph::EdgeItr eItr(*aeItr);
- Graph::NodeItr anItr(g.getEdgeOtherNode(eItr, nItr));
- getSolverNodeData(anItr).addSolverEdge(eItr);
- }
- }
-
- /// \brief Apply rule R0.
- /// @param nItr Node iterator for node to apply R0 to.
- ///
- /// Node will be automatically pushed to the solver stack.
- void applyR0(Graph::NodeItr nItr) {
- assert(getSolverNodeData(nItr).getSolverDegree() == 0 &&
- "R0 applied to node with degree != 0.");
-
- // Nothing to do. Just push the node onto the reduction stack.
- pushToStack(nItr);
-
- s.recordR0();
- }
-
- /// \brief Apply rule R1.
- /// @param xnItr Node iterator for node to apply R1 to.
- ///
- /// Node will be automatically pushed to the solver stack.
- void applyR1(Graph::NodeItr xnItr) {
- NodeData &nd = getSolverNodeData(xnItr);
- assert(nd.getSolverDegree() == 1 &&
- "R1 applied to node with degree != 1.");
-
- Graph::EdgeItr eItr = *nd.solverEdgesBegin();
-
- const Matrix &eCosts = g.getEdgeCosts(eItr);
- const Vector &xCosts = g.getNodeCosts(xnItr);
-
- // Duplicate a little to avoid transposing matrices.
- if (xnItr == g.getEdgeNode1(eItr)) {
- Graph::NodeItr ynItr = g.getEdgeNode2(eItr);
- Vector &yCosts = g.getNodeCosts(ynItr);
- for (unsigned j = 0; j < yCosts.getLength(); ++j) {
- PBQPNum min = eCosts[0][j] + xCosts[0];
- for (unsigned i = 1; i < xCosts.getLength(); ++i) {
- PBQPNum c = eCosts[i][j] + xCosts[i];
- if (c < min)
- min = c;
- }
- yCosts[j] += min;
- }
- h.handleRemoveEdge(eItr, ynItr);
- } else {
- Graph::NodeItr ynItr = g.getEdgeNode1(eItr);
- Vector &yCosts = g.getNodeCosts(ynItr);
- for (unsigned i = 0; i < yCosts.getLength(); ++i) {
- PBQPNum min = eCosts[i][0] + xCosts[0];
- for (unsigned j = 1; j < xCosts.getLength(); ++j) {
- PBQPNum c = eCosts[i][j] + xCosts[j];
- if (c < min)
- min = c;
- }
- yCosts[i] += min;
- }
- h.handleRemoveEdge(eItr, ynItr);
- }
- removeSolverEdge(eItr);
- assert(nd.getSolverDegree() == 0 &&
- "Degree 1 with edge removed should be 0.");
- pushToStack(xnItr);
- s.recordR1();
- }
-
- /// \brief Apply rule R2.
- /// @param xnItr Node iterator for node to apply R2 to.
- ///
- /// Node will be automatically pushed to the solver stack.
- void applyR2(Graph::NodeItr xnItr) {
- assert(getSolverNodeData(xnItr).getSolverDegree() == 2 &&
- "R2 applied to node with degree != 2.");
-
- NodeData &nd = getSolverNodeData(xnItr);
- const Vector &xCosts = g.getNodeCosts(xnItr);
-
- SolverEdgeItr aeItr = nd.solverEdgesBegin();
- Graph::EdgeItr yxeItr = *aeItr,
- zxeItr = *(++aeItr);
-
- Graph::NodeItr ynItr = g.getEdgeOtherNode(yxeItr, xnItr),
- znItr = g.getEdgeOtherNode(zxeItr, xnItr);
-
- bool flipEdge1 = (g.getEdgeNode1(yxeItr) == xnItr),
- flipEdge2 = (g.getEdgeNode1(zxeItr) == xnItr);
-
- const Matrix *yxeCosts = flipEdge1 ?
- new Matrix(g.getEdgeCosts(yxeItr).transpose()) :
- &g.getEdgeCosts(yxeItr);
-
- const Matrix *zxeCosts = flipEdge2 ?
- new Matrix(g.getEdgeCosts(zxeItr).transpose()) :
- &g.getEdgeCosts(zxeItr);
-
- unsigned xLen = xCosts.getLength(),
- yLen = yxeCosts->getRows(),
- zLen = zxeCosts->getRows();
-
- Matrix delta(yLen, zLen);
-
- for (unsigned i = 0; i < yLen; ++i) {
- for (unsigned j = 0; j < zLen; ++j) {
- PBQPNum min = (*yxeCosts)[i][0] + (*zxeCosts)[j][0] + xCosts[0];
- for (unsigned k = 1; k < xLen; ++k) {
- PBQPNum c = (*yxeCosts)[i][k] + (*zxeCosts)[j][k] + xCosts[k];
- if (c < min) {
- min = c;
- }
- }
- delta[i][j] = min;
- }
- }
-
- if (flipEdge1)
- delete yxeCosts;
-
- if (flipEdge2)
- delete zxeCosts;
-
- Graph::EdgeItr yzeItr = g.findEdge(ynItr, znItr);
- bool addedEdge = false;
-
- if (yzeItr == g.edgesEnd()) {
- yzeItr = g.addEdge(ynItr, znItr, delta);
- addedEdge = true;
- } else {
- Matrix &yzeCosts = g.getEdgeCosts(yzeItr);
- h.preUpdateEdgeCosts(yzeItr);
- if (ynItr == g.getEdgeNode1(yzeItr)) {
- yzeCosts += delta;
- } else {
- yzeCosts += delta.transpose();
- }
- }
-
- bool nullCostEdge = tryNormaliseEdgeMatrix(yzeItr);
-
- if (!addedEdge) {
- // If we modified the edge costs let the heuristic know.
- h.postUpdateEdgeCosts(yzeItr);
- }
-
- if (nullCostEdge) {
- // If this edge ended up null remove it.
- if (!addedEdge) {
- // We didn't just add it, so we need to notify the heuristic
- // and remove it from the solver.
- h.handleRemoveEdge(yzeItr, ynItr);
- h.handleRemoveEdge(yzeItr, znItr);
- removeSolverEdge(yzeItr);
- }
- g.removeEdge(yzeItr);
- } else if (addedEdge) {
- // If the edge was added, and non-null, finish setting it up, add it to
- // the solver & notify heuristic.
- edgeDataList.push_back(EdgeData());
- g.setEdgeData(yzeItr, &edgeDataList.back());
- addSolverEdge(yzeItr);
- h.handleAddEdge(yzeItr);
- }
-
- h.handleRemoveEdge(yxeItr, ynItr);
- removeSolverEdge(yxeItr);
- h.handleRemoveEdge(zxeItr, znItr);
- removeSolverEdge(zxeItr);
-
- pushToStack(xnItr);
- s.recordR2();
- }
-
- /// \brief Record an application of the RN rule.
- ///
- /// For use by the HeuristicBase.
- void recordRN() { s.recordRN(); }
-
- private:
-
- NodeData& getSolverNodeData(Graph::NodeItr nItr) {
- return *static_cast<NodeData*>(g.getNodeData(nItr));
- }
-
- EdgeData& getSolverEdgeData(Graph::EdgeItr eItr) {
- return *static_cast<EdgeData*>(g.getEdgeData(eItr));
- }
-
- void addSolverEdge(Graph::EdgeItr eItr) {
- EdgeData &eData = getSolverEdgeData(eItr);
- NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
- &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
-
- eData.setN1SolverEdgeItr(n1Data.addSolverEdge(eItr));
- eData.setN2SolverEdgeItr(n2Data.addSolverEdge(eItr));
- }
-
- void setup() {
- if (h.solverRunSimplify()) {
- simplify();
- }
-
- // Create node data objects.
- for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
- nItr != nEnd; ++nItr) {
- nodeDataList.push_back(NodeData());
- g.setNodeData(nItr, &nodeDataList.back());
- }
-
- // Create edge data objects.
- for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
- eItr != eEnd; ++eItr) {
- edgeDataList.push_back(EdgeData());
- g.setEdgeData(eItr, &edgeDataList.back());
- addSolverEdge(eItr);
- }
- }
-
- void simplify() {
- disconnectTrivialNodes();
- eliminateIndependentEdges();
- }
-
- // Eliminate trivial nodes.
- void disconnectTrivialNodes() {
- unsigned numDisconnected = 0;
-
- for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
- nItr != nEnd; ++nItr) {
-
- if (g.getNodeCosts(nItr).getLength() == 1) {
-
- std::vector<Graph::EdgeItr> edgesToRemove;
-
- for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
- aeEnd = g.adjEdgesEnd(nItr);
- aeItr != aeEnd; ++aeItr) {
-
- Graph::EdgeItr eItr = *aeItr;
-
- if (g.getEdgeNode1(eItr) == nItr) {
- Graph::NodeItr otherNodeItr = g.getEdgeNode2(eItr);
- g.getNodeCosts(otherNodeItr) +=
- g.getEdgeCosts(eItr).getRowAsVector(0);
- }
- else {
- Graph::NodeItr otherNodeItr = g.getEdgeNode1(eItr);
- g.getNodeCosts(otherNodeItr) +=
- g.getEdgeCosts(eItr).getColAsVector(0);
- }
-
- edgesToRemove.push_back(eItr);
- }
-
- if (!edgesToRemove.empty())
- ++numDisconnected;
-
- while (!edgesToRemove.empty()) {
- g.removeEdge(edgesToRemove.back());
- edgesToRemove.pop_back();
- }
- }
- }
- }
-
- void eliminateIndependentEdges() {
- std::vector<Graph::EdgeItr> edgesToProcess;
- unsigned numEliminated = 0;
-
- for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
- eItr != eEnd; ++eItr) {
- edgesToProcess.push_back(eItr);
- }
-
- while (!edgesToProcess.empty()) {
- if (tryToEliminateEdge(edgesToProcess.back()))
- ++numEliminated;
- edgesToProcess.pop_back();
- }
- }
-
- bool tryToEliminateEdge(Graph::EdgeItr eItr) {
- if (tryNormaliseEdgeMatrix(eItr)) {
- g.removeEdge(eItr);
- return true;
- }
- return false;
- }
-
- bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) {
-
- const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity();
-
- Matrix &edgeCosts = g.getEdgeCosts(eItr);
- Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)),
- &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr));
-
- for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
- PBQPNum rowMin = infinity;
-
- for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
- if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin)
- rowMin = edgeCosts[r][c];
- }
-
- uCosts[r] += rowMin;
-
- if (rowMin != infinity) {
- edgeCosts.subFromRow(r, rowMin);
- }
- else {
- edgeCosts.setRow(r, 0);
- }
- }
-
- for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
- PBQPNum colMin = infinity;
-
- for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
- if (uCosts[r] != infinity && edgeCosts[r][c] < colMin)
- colMin = edgeCosts[r][c];
- }
-
- vCosts[c] += colMin;
-
- if (colMin != infinity) {
- edgeCosts.subFromCol(c, colMin);
- }
- else {
- edgeCosts.setCol(c, 0);
- }
- }
-
- return edgeCosts.isZero();
- }
-
- void backpropagate() {
- while (!stack.empty()) {
- computeSolution(stack.back());
- stack.pop_back();
- }
- }
-
- void computeSolution(Graph::NodeItr nItr) {
-
- NodeData &nodeData = getSolverNodeData(nItr);
-
- Vector v(g.getNodeCosts(nItr));
-
- // Solve based on existing solved edges.
- for (SolverEdgeItr solvedEdgeItr = nodeData.solverEdgesBegin(),
- solvedEdgeEnd = nodeData.solverEdgesEnd();
- solvedEdgeItr != solvedEdgeEnd; ++solvedEdgeItr) {
-
- Graph::EdgeItr eItr(*solvedEdgeItr);
- Matrix &edgeCosts = g.getEdgeCosts(eItr);
-
- if (nItr == g.getEdgeNode1(eItr)) {
- Graph::NodeItr adjNode(g.getEdgeNode2(eItr));
- unsigned adjSolution = s.getSelection(adjNode);
- v += edgeCosts.getColAsVector(adjSolution);
- }
- else {
- Graph::NodeItr adjNode(g.getEdgeNode1(eItr));
- unsigned adjSolution = s.getSelection(adjNode);
- v += edgeCosts.getRowAsVector(adjSolution);
- }
-
- }
-
- setSolution(nItr, v.minIndex());
- }
-
- void cleanup() {
- h.cleanup();
- nodeDataList.clear();
- edgeDataList.clear();
- }
- };
-
- /// \brief PBQP heuristic solver class.
- ///
- /// Given a PBQP Graph g representing a PBQP problem, you can find a solution
- /// by calling
- /// <tt>Solution s = HeuristicSolver<H>::solve(g);</tt>
- ///
- /// The choice of heuristic for the H parameter will affect both the solver
- /// speed and solution quality. The heuristic should be chosen based on the
- /// nature of the problem being solved.
- /// Currently the only solver included with LLVM is the Briggs heuristic for
- /// register allocation.
- template <typename HImpl>
- class HeuristicSolver {
- public:
- static Solution solve(Graph &g) {
- HeuristicSolverImpl<HImpl> hs(g);
- return hs.computeSolution();
- }
- };
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
diff --git a/contrib/llvm/lib/CodeGen/PBQP/Heuristics/Briggs.h b/contrib/llvm/lib/CodeGen/PBQP/Heuristics/Briggs.h
deleted file mode 100644
index 18eaf7c..0000000
--- a/contrib/llvm/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ /dev/null
@@ -1,460 +0,0 @@
-//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class implements the Briggs test for "allocability" of nodes in a
-// PBQP graph representing a register allocation problem. Nodes which can be
-// proven allocable (by a safe and relatively accurate test) are removed from
-// the PBQP graph first. If no provably allocable node is present in the graph
-// then the node with the minimal spill-cost to degree ratio is removed.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
-#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
-
-#include "../HeuristicSolver.h"
-#include "../HeuristicBase.h"
-
-#include <set>
-#include <limits>
-
-namespace PBQP {
- namespace Heuristics {
-
- /// \brief PBQP Heuristic which applies an allocability test based on
- /// Briggs.
- ///
- /// This heuristic assumes that the elements of cost vectors in the PBQP
- /// problem represent storage options, with the first being the spill
- /// option and subsequent elements representing legal registers for the
- /// corresponding node. Edge cost matrices are likewise assumed to represent
- /// register constraints.
- /// If one or more nodes can be proven allocable by this heuristic (by
- /// inspection of their constraint matrices) then the allocable node of
- /// highest degree is selected for the next reduction and pushed to the
- /// solver stack. If no nodes can be proven allocable then the node with
- /// the lowest estimated spill cost is selected and push to the solver stack
- /// instead.
- ///
- /// This implementation is built on top of HeuristicBase.
- class Briggs : public HeuristicBase<Briggs> {
- private:
-
- class LinkDegreeComparator {
- public:
- LinkDegreeComparator(HeuristicSolverImpl<Briggs> &s) : s(&s) {}
- bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
- if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr))
- return true;
- return false;
- }
- private:
- HeuristicSolverImpl<Briggs> *s;
- };
-
- class SpillCostComparator {
- public:
- SpillCostComparator(HeuristicSolverImpl<Briggs> &s)
- : s(&s), g(&s.getGraph()) {}
- bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
- PBQPNum cost1 = g->getNodeCosts(n1Itr)[0] / s->getSolverDegree(n1Itr),
- cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr);
- if (cost1 < cost2)
- return true;
- return false;
- }
-
- private:
- HeuristicSolverImpl<Briggs> *s;
- Graph *g;
- };
-
- typedef std::list<Graph::NodeItr> RNAllocableList;
- typedef RNAllocableList::iterator RNAllocableListItr;
-
- typedef std::list<Graph::NodeItr> RNUnallocableList;
- typedef RNUnallocableList::iterator RNUnallocableListItr;
-
- public:
-
- struct NodeData {
- typedef std::vector<unsigned> UnsafeDegreesArray;
- bool isHeuristic, isAllocable, isInitialized;
- unsigned numDenied, numSafe;
- UnsafeDegreesArray unsafeDegrees;
- RNAllocableListItr rnaItr;
- RNUnallocableListItr rnuItr;
-
- NodeData()
- : isHeuristic(false), isAllocable(false), isInitialized(false),
- numDenied(0), numSafe(0) { }
- };
-
- struct EdgeData {
- typedef std::vector<unsigned> UnsafeArray;
- unsigned worst, reverseWorst;
- UnsafeArray unsafe, reverseUnsafe;
- bool isUpToDate;
-
- EdgeData() : worst(0), reverseWorst(0), isUpToDate(false) {}
- };
-
- /// \brief Construct an instance of the Briggs heuristic.
- /// @param solver A reference to the solver which is using this heuristic.
- Briggs(HeuristicSolverImpl<Briggs> &solver) :
- HeuristicBase<Briggs>(solver) {}
-
- /// \brief Determine whether a node should be reduced using optimal
- /// reduction.
- /// @param nItr Node iterator to be considered.
- /// @return True if the given node should be optimally reduced, false
- /// otherwise.
- ///
- /// Selects nodes of degree 0, 1 or 2 for optimal reduction, with one
- /// exception. Nodes whose spill cost (element 0 of their cost vector) is
- /// infinite are checked for allocability first. Allocable nodes may be
- /// optimally reduced, but nodes whose allocability cannot be proven are
- /// selected for heuristic reduction instead.
- bool shouldOptimallyReduce(Graph::NodeItr nItr) {
- if (getSolver().getSolverDegree(nItr) < 3) {
- return true;
- }
- // else
- return false;
- }
-
- /// \brief Add a node to the heuristic reduce list.
- /// @param nItr Node iterator to add to the heuristic reduce list.
- void addToHeuristicReduceList(Graph::NodeItr nItr) {
- NodeData &nd = getHeuristicNodeData(nItr);
- initializeNode(nItr);
- nd.isHeuristic = true;
- if (nd.isAllocable) {
- nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
- } else {
- nd.rnuItr = rnUnallocableList.insert(rnUnallocableList.end(), nItr);
- }
- }
-
- /// \brief Heuristically reduce one of the nodes in the heuristic
- /// reduce list.
- /// @return True if a reduction takes place, false if the heuristic reduce
- /// list is empty.
- ///
- /// If the list of allocable nodes is non-empty a node is selected
- /// from it and pushed to the stack. Otherwise if the non-allocable list
- /// is non-empty a node is selected from it and pushed to the stack.
- /// If both lists are empty the method simply returns false with no action
- /// taken.
- bool heuristicReduce() {
- if (!rnAllocableList.empty()) {
- RNAllocableListItr rnaItr =
- min_element(rnAllocableList.begin(), rnAllocableList.end(),
- LinkDegreeComparator(getSolver()));
- Graph::NodeItr nItr = *rnaItr;
- rnAllocableList.erase(rnaItr);
- handleRemoveNode(nItr);
- getSolver().pushToStack(nItr);
- return true;
- } else if (!rnUnallocableList.empty()) {
- RNUnallocableListItr rnuItr =
- min_element(rnUnallocableList.begin(), rnUnallocableList.end(),
- SpillCostComparator(getSolver()));
- Graph::NodeItr nItr = *rnuItr;
- rnUnallocableList.erase(rnuItr);
- handleRemoveNode(nItr);
- getSolver().pushToStack(nItr);
- return true;
- }
- // else
- return false;
- }
-
- /// \brief Prepare a change in the costs on the given edge.
- /// @param eItr Edge iterator.
- void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
- Graph &g = getGraph();
- Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
- n2Itr = g.getEdgeNode2(eItr);
- NodeData &n1 = getHeuristicNodeData(n1Itr),
- &n2 = getHeuristicNodeData(n2Itr);
-
- if (n1.isHeuristic)
- subtractEdgeContributions(eItr, getGraph().getEdgeNode1(eItr));
- if (n2.isHeuristic)
- subtractEdgeContributions(eItr, getGraph().getEdgeNode2(eItr));
-
- EdgeData &ed = getHeuristicEdgeData(eItr);
- ed.isUpToDate = false;
- }
-
- /// \brief Handle the change in the costs on the given edge.
- /// @param eItr Edge iterator.
- void postUpdateEdgeCosts(Graph::EdgeItr eItr) {
- // This is effectively the same as adding a new edge now, since
- // we've factored out the costs of the old one.
- handleAddEdge(eItr);
- }
-
- /// \brief Handle the addition of a new edge into the PBQP graph.
- /// @param eItr Edge iterator for the added edge.
- ///
- /// Updates allocability of any nodes connected by this edge which are
- /// being managed by the heuristic. If allocability changes they are
- /// moved to the appropriate list.
- void handleAddEdge(Graph::EdgeItr eItr) {
- Graph &g = getGraph();
- Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
- n2Itr = g.getEdgeNode2(eItr);
- NodeData &n1 = getHeuristicNodeData(n1Itr),
- &n2 = getHeuristicNodeData(n2Itr);
-
- // If neither node is managed by the heuristic there's nothing to be
- // done.
- if (!n1.isHeuristic && !n2.isHeuristic)
- return;
-
- // Ok - we need to update at least one node.
- computeEdgeContributions(eItr);
-
- // Update node 1 if it's managed by the heuristic.
- if (n1.isHeuristic) {
- bool n1WasAllocable = n1.isAllocable;
- addEdgeContributions(eItr, n1Itr);
- updateAllocability(n1Itr);
- if (n1WasAllocable && !n1.isAllocable) {
- rnAllocableList.erase(n1.rnaItr);
- n1.rnuItr =
- rnUnallocableList.insert(rnUnallocableList.end(), n1Itr);
- }
- }
-
- // Likewise for node 2.
- if (n2.isHeuristic) {
- bool n2WasAllocable = n2.isAllocable;
- addEdgeContributions(eItr, n2Itr);
- updateAllocability(n2Itr);
- if (n2WasAllocable && !n2.isAllocable) {
- rnAllocableList.erase(n2.rnaItr);
- n2.rnuItr =
- rnUnallocableList.insert(rnUnallocableList.end(), n2Itr);
- }
- }
- }
-
- /// \brief Handle disconnection of an edge from a node.
- /// @param eItr Edge iterator for edge being disconnected.
- /// @param nItr Node iterator for the node being disconnected from.
- ///
- /// Updates allocability of the given node and, if appropriate, moves the
- /// node to a new list.
- void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
- NodeData &nd = getHeuristicNodeData(nItr);
-
- // If the node is not managed by the heuristic there's nothing to be
- // done.
- if (!nd.isHeuristic)
- return;
-
- EdgeData &ed = getHeuristicEdgeData(eItr);
- (void)ed;
- assert(ed.isUpToDate && "Edge data is not up to date.");
-
- // Update node.
- bool ndWasAllocable = nd.isAllocable;
- subtractEdgeContributions(eItr, nItr);
- updateAllocability(nItr);
-
- // If the node has gone optimal...
- if (shouldOptimallyReduce(nItr)) {
- nd.isHeuristic = false;
- addToOptimalReduceList(nItr);
- if (ndWasAllocable) {
- rnAllocableList.erase(nd.rnaItr);
- } else {
- rnUnallocableList.erase(nd.rnuItr);
- }
- } else {
- // Node didn't go optimal, but we might have to move it
- // from "unallocable" to "allocable".
- if (!ndWasAllocable && nd.isAllocable) {
- rnUnallocableList.erase(nd.rnuItr);
- nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
- }
- }
- }
-
- private:
-
- NodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
- return getSolver().getHeuristicNodeData(nItr);
- }
-
- EdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
- return getSolver().getHeuristicEdgeData(eItr);
- }
-
- // Work out what this edge will contribute to the allocability of the
- // nodes connected to it.
- void computeEdgeContributions(Graph::EdgeItr eItr) {
- EdgeData &ed = getHeuristicEdgeData(eItr);
-
- if (ed.isUpToDate)
- return; // Edge data is already up to date.
-
- Matrix &eCosts = getGraph().getEdgeCosts(eItr);
-
- unsigned numRegs = eCosts.getRows() - 1,
- numReverseRegs = eCosts.getCols() - 1;
-
- std::vector<unsigned> rowInfCounts(numRegs, 0),
- colInfCounts(numReverseRegs, 0);
-
- ed.worst = 0;
- ed.reverseWorst = 0;
- ed.unsafe.clear();
- ed.unsafe.resize(numRegs, 0);
- ed.reverseUnsafe.clear();
- ed.reverseUnsafe.resize(numReverseRegs, 0);
-
- for (unsigned i = 0; i < numRegs; ++i) {
- for (unsigned j = 0; j < numReverseRegs; ++j) {
- if (eCosts[i + 1][j + 1] ==
- std::numeric_limits<PBQPNum>::infinity()) {
- ed.unsafe[i] = 1;
- ed.reverseUnsafe[j] = 1;
- ++rowInfCounts[i];
- ++colInfCounts[j];
-
- if (colInfCounts[j] > ed.worst) {
- ed.worst = colInfCounts[j];
- }
-
- if (rowInfCounts[i] > ed.reverseWorst) {
- ed.reverseWorst = rowInfCounts[i];
- }
- }
- }
- }
-
- ed.isUpToDate = true;
- }
-
- // Add the contributions of the given edge to the given node's
- // numDenied and safe members. No action is taken other than to update
- // these member values. Once updated these numbers can be used by clients
- // to update the node's allocability.
- void addEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
- EdgeData &ed = getHeuristicEdgeData(eItr);
-
- assert(ed.isUpToDate && "Using out-of-date edge numbers.");
-
- NodeData &nd = getHeuristicNodeData(nItr);
- unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-
- bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
- EdgeData::UnsafeArray &unsafe =
- nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
- nd.numDenied += nIsNode1 ? ed.worst : ed.reverseWorst;
-
- for (unsigned r = 0; r < numRegs; ++r) {
- if (unsafe[r]) {
- if (nd.unsafeDegrees[r]==0) {
- --nd.numSafe;
- }
- ++nd.unsafeDegrees[r];
- }
- }
- }
-
- // Subtract the contributions of the given edge to the given node's
- // numDenied and safe members. No action is taken other than to update
- // these member values. Once updated these numbers can be used by clients
- // to update the node's allocability.
- void subtractEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
- EdgeData &ed = getHeuristicEdgeData(eItr);
-
- assert(ed.isUpToDate && "Using out-of-date edge numbers.");
-
- NodeData &nd = getHeuristicNodeData(nItr);
- unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-
- bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
- EdgeData::UnsafeArray &unsafe =
- nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
- nd.numDenied -= nIsNode1 ? ed.worst : ed.reverseWorst;
-
- for (unsigned r = 0; r < numRegs; ++r) {
- if (unsafe[r]) {
- if (nd.unsafeDegrees[r] == 1) {
- ++nd.numSafe;
- }
- --nd.unsafeDegrees[r];
- }
- }
- }
-
- void updateAllocability(Graph::NodeItr nItr) {
- NodeData &nd = getHeuristicNodeData(nItr);
- unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
- nd.isAllocable = nd.numDenied < numRegs || nd.numSafe > 0;
- }
-
- void initializeNode(Graph::NodeItr nItr) {
- NodeData &nd = getHeuristicNodeData(nItr);
-
- if (nd.isInitialized)
- return; // Node data is already up to date.
-
- unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-
- nd.numDenied = 0;
- nd.numSafe = numRegs;
- nd.unsafeDegrees.resize(numRegs, 0);
-
- typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
-
- for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(nItr),
- aeEnd = getSolver().solverEdgesEnd(nItr);
- aeItr != aeEnd; ++aeItr) {
-
- Graph::EdgeItr eItr = *aeItr;
- computeEdgeContributions(eItr);
- addEdgeContributions(eItr, nItr);
- }
-
- updateAllocability(nItr);
- nd.isInitialized = true;
- }
-
- void handleRemoveNode(Graph::NodeItr xnItr) {
- typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
- std::vector<Graph::EdgeItr> edgesToRemove;
- for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(xnItr),
- aeEnd = getSolver().solverEdgesEnd(xnItr);
- aeItr != aeEnd; ++aeItr) {
- Graph::NodeItr ynItr = getGraph().getEdgeOtherNode(*aeItr, xnItr);
- handleRemoveEdge(*aeItr, ynItr);
- edgesToRemove.push_back(*aeItr);
- }
- while (!edgesToRemove.empty()) {
- getSolver().removeSolverEdge(edgesToRemove.back());
- edgesToRemove.pop_back();
- }
- }
-
- RNAllocableList rnAllocableList;
- RNUnallocableList rnUnallocableList;
- };
-
- }
-}
-
-
-#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
diff --git a/contrib/llvm/lib/CodeGen/PBQP/Math.h b/contrib/llvm/lib/CodeGen/PBQP/Math.h
deleted file mode 100644
index e7598bf..0000000
--- a/contrib/llvm/lib/CodeGen/PBQP/Math.h
+++ /dev/null
@@ -1,288 +0,0 @@
-//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_MATH_H
-#define LLVM_CODEGEN_PBQP_MATH_H
-
-#include <cassert>
-#include <algorithm>
-#include <functional>
-
-namespace PBQP {
-
-typedef float PBQPNum;
-
-/// \brief PBQP Vector class.
-class Vector {
- public:
-
- /// \brief Construct a PBQP vector of the given size.
- explicit Vector(unsigned length) :
- length(length), data(new PBQPNum[length]) {
- }
-
- /// \brief Construct a PBQP vector with initializer.
- Vector(unsigned length, PBQPNum initVal) :
- length(length), data(new PBQPNum[length]) {
- std::fill(data, data + length, initVal);
- }
-
- /// \brief Copy construct a PBQP vector.
- Vector(const Vector &v) :
- length(v.length), data(new PBQPNum[length]) {
- std::copy(v.data, v.data + length, data);
- }
-
- /// \brief Destroy this vector, return its memory.
- ~Vector() { delete[] data; }
-
- /// \brief Assignment operator.
- Vector& operator=(const Vector &v) {
- delete[] data;
- length = v.length;
- data = new PBQPNum[length];
- std::copy(v.data, v.data + length, data);
- return *this;
- }
-
- /// \brief Return the length of the vector
- unsigned getLength() const {
- return length;
- }
-
- /// \brief Element access.
- PBQPNum& operator[](unsigned index) {
- assert(index < length && "Vector element access out of bounds.");
- return data[index];
- }
-
- /// \brief Const element access.
- const PBQPNum& operator[](unsigned index) const {
- assert(index < length && "Vector element access out of bounds.");
- return data[index];
- }
-
- /// \brief Add another vector to this one.
- Vector& operator+=(const Vector &v) {
- assert(length == v.length && "Vector length mismatch.");
- std::transform(data, data + length, v.data, data, std::plus<PBQPNum>());
- return *this;
- }
-
- /// \brief Subtract another vector from this one.
- Vector& operator-=(const Vector &v) {
- assert(length == v.length && "Vector length mismatch.");
- std::transform(data, data + length, v.data, data, std::minus<PBQPNum>());
- return *this;
- }
-
- /// \brief Returns the index of the minimum value in this vector
- unsigned minIndex() const {
- return std::min_element(data, data + length) - data;
- }
-
- private:
- unsigned length;
- PBQPNum *data;
-};
-
-/// \brief Output a textual representation of the given vector on the given
-/// output stream.
-template <typename OStream>
-OStream& operator<<(OStream &os, const Vector &v) {
- assert((v.getLength() != 0) && "Zero-length vector badness.");
-
- os << "[ " << v[0];
- for (unsigned i = 1; i < v.getLength(); ++i) {
- os << ", " << v[i];
- }
- os << " ]";
-
- return os;
-}
-
-
-/// \brief PBQP Matrix class
-class Matrix {
- public:
-
- /// \brief Construct a PBQP Matrix with the given dimensions.
- Matrix(unsigned rows, unsigned cols) :
- rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
- }
-
- /// \brief Construct a PBQP Matrix with the given dimensions and initial
- /// value.
- Matrix(unsigned rows, unsigned cols, PBQPNum initVal) :
- rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
- std::fill(data, data + (rows * cols), initVal);
- }
-
- /// \brief Copy construct a PBQP matrix.
- Matrix(const Matrix &m) :
- rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) {
- std::copy(m.data, m.data + (rows * cols), data);
- }
-
- /// \brief Destroy this matrix, return its memory.
- ~Matrix() { delete[] data; }
-
- /// \brief Assignment operator.
- Matrix& operator=(const Matrix &m) {
- delete[] data;
- rows = m.rows; cols = m.cols;
- data = new PBQPNum[rows * cols];
- std::copy(m.data, m.data + (rows * cols), data);
- return *this;
- }
-
- /// \brief Return the number of rows in this matrix.
- unsigned getRows() const { return rows; }
-
- /// \brief Return the number of cols in this matrix.
- unsigned getCols() const { return cols; }
-
- /// \brief Matrix element access.
- PBQPNum* operator[](unsigned r) {
- assert(r < rows && "Row out of bounds.");
- return data + (r * cols);
- }
-
- /// \brief Matrix element access.
- const PBQPNum* operator[](unsigned r) const {
- assert(r < rows && "Row out of bounds.");
- return data + (r * cols);
- }
-
- /// \brief Returns the given row as a vector.
- Vector getRowAsVector(unsigned r) const {
- Vector v(cols);
- for (unsigned c = 0; c < cols; ++c)
- v[c] = (*this)[r][c];
- return v;
- }
-
- /// \brief Returns the given column as a vector.
- Vector getColAsVector(unsigned c) const {
- Vector v(rows);
- for (unsigned r = 0; r < rows; ++r)
- v[r] = (*this)[r][c];
- return v;
- }
-
- /// \brief Reset the matrix to the given value.
- Matrix& reset(PBQPNum val = 0) {
- std::fill(data, data + (rows * cols), val);
- return *this;
- }
-
- /// \brief Set a single row of this matrix to the given value.
- Matrix& setRow(unsigned r, PBQPNum val) {
- assert(r < rows && "Row out of bounds.");
- std::fill(data + (r * cols), data + ((r + 1) * cols), val);
- return *this;
- }
-
- /// \brief Set a single column of this matrix to the given value.
- Matrix& setCol(unsigned c, PBQPNum val) {
- assert(c < cols && "Column out of bounds.");
- for (unsigned r = 0; r < rows; ++r)
- (*this)[r][c] = val;
- return *this;
- }
-
- /// \brief Matrix transpose.
- Matrix transpose() const {
- Matrix m(cols, rows);
- for (unsigned r = 0; r < rows; ++r)
- for (unsigned c = 0; c < cols; ++c)
- m[c][r] = (*this)[r][c];
- return m;
- }
-
- /// \brief Returns the diagonal of the matrix as a vector.
- ///
- /// Matrix must be square.
- Vector diagonalize() const {
- assert(rows == cols && "Attempt to diagonalize non-square matrix.");
-
- Vector v(rows);
- for (unsigned r = 0; r < rows; ++r)
- v[r] = (*this)[r][r];
- return v;
- }
-
- /// \brief Add the given matrix to this one.
- Matrix& operator+=(const Matrix &m) {
- assert(rows == m.rows && cols == m.cols &&
- "Matrix dimensions mismatch.");
- std::transform(data, data + (rows * cols), m.data, data,
- std::plus<PBQPNum>());
- return *this;
- }
-
- /// \brief Returns the minimum of the given row
- PBQPNum getRowMin(unsigned r) const {
- assert(r < rows && "Row out of bounds");
- return *std::min_element(data + (r * cols), data + ((r + 1) * cols));
- }
-
- /// \brief Returns the minimum of the given column
- PBQPNum getColMin(unsigned c) const {
- PBQPNum minElem = (*this)[0][c];
- for (unsigned r = 1; r < rows; ++r)
- if ((*this)[r][c] < minElem) minElem = (*this)[r][c];
- return minElem;
- }
-
- /// \brief Subtracts the given scalar from the elements of the given row.
- Matrix& subFromRow(unsigned r, PBQPNum val) {
- assert(r < rows && "Row out of bounds");
- std::transform(data + (r * cols), data + ((r + 1) * cols),
- data + (r * cols),
- std::bind2nd(std::minus<PBQPNum>(), val));
- return *this;
- }
-
- /// \brief Subtracts the given scalar from the elements of the given column.
- Matrix& subFromCol(unsigned c, PBQPNum val) {
- for (unsigned r = 0; r < rows; ++r)
- (*this)[r][c] -= val;
- return *this;
- }
-
- /// \brief Returns true if this is a zero matrix.
- bool isZero() const {
- return find_if(data, data + (rows * cols),
- std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) ==
- data + (rows * cols);
- }
-
- private:
- unsigned rows, cols;
- PBQPNum *data;
-};
-
-/// \brief Output a textual representation of the given matrix on the given
-/// output stream.
-template <typename OStream>
-OStream& operator<<(OStream &os, const Matrix &m) {
-
- assert((m.getRows() != 0) && "Zero-row matrix badness.");
-
- for (unsigned i = 0; i < m.getRows(); ++i) {
- os << m.getRowAsVector(i);
- }
-
- return os;
-}
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_MATH_H
diff --git a/contrib/llvm/lib/CodeGen/PBQP/Solution.h b/contrib/llvm/lib/CodeGen/PBQP/Solution.h
deleted file mode 100644
index 047fd04..0000000
--- a/contrib/llvm/lib/CodeGen/PBQP/Solution.h
+++ /dev/null
@@ -1,89 +0,0 @@
-//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// PBQP Solution class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H
-#define LLVM_CODEGEN_PBQP_SOLUTION_H
-
-#include "Math.h"
-#include "Graph.h"
-
-#include <map>
-
-namespace PBQP {
-
- /// \brief Represents a solution to a PBQP problem.
- ///
- /// To get the selection for each node in the problem use the getSelection method.
- class Solution {
- private:
-
- typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap;
- SelectionsMap selections;
-
- unsigned r0Reductions, r1Reductions, r2Reductions, rNReductions;
-
- public:
-
- /// \brief Number of nodes for which selections have been made.
- /// @return Number of nodes for which selections have been made.
- unsigned numNodes() const { return selections.size(); }
-
- /// \brief Records a reduction via the R0 rule. Should be called from the
- /// solver only.
- void recordR0() { ++r0Reductions; }
-
- /// \brief Returns the number of R0 reductions applied to solve the problem.
- unsigned numR0Reductions() const { return r0Reductions; }
-
- /// \brief Records a reduction via the R1 rule. Should be called from the
- /// solver only.
- void recordR1() { ++r1Reductions; }
-
- /// \brief Returns the number of R1 reductions applied to solve the problem.
- unsigned numR1Reductions() const { return r1Reductions; }
-
- /// \brief Records a reduction via the R2 rule. Should be called from the
- /// solver only.
- void recordR2() { ++r2Reductions; }
-
- /// \brief Returns the number of R2 reductions applied to solve the problem.
- unsigned numR2Reductions() const { return r2Reductions; }
-
- /// \brief Records a reduction via the RN rule. Should be called from the
- /// solver only.
- void recordRN() { ++ rNReductions; }
-
- /// \brief Returns the number of RN reductions applied to solve the problem.
- unsigned numRNReductions() const { return rNReductions; }
-
- /// \brief Set the selection for a given node.
- /// @param nItr Node iterator.
- /// @param selection Selection for nItr.
- void setSelection(Graph::NodeItr nItr, unsigned selection) {
- selections[nItr] = selection;
- }
-
- /// \brief Get a node's selection.
- /// @param nItr Node iterator.
- /// @return The selection for nItr;
- unsigned getSelection(Graph::NodeItr nItr) const {
- SelectionsMap::const_iterator sItr = selections.find(nItr);
- assert(sItr != selections.end() && "No selection for node.");
- return sItr->second;
- }
-
- };
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_SOLUTION_H
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index d4df4c5..5f7cf58 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -14,7 +14,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "phielim"
-#include "PHIElimination.h"
+#include "PHIEliminationUtils.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -34,23 +34,72 @@
#include <map>
using namespace llvm;
+namespace {
+ class PHIElimination : public MachineFunctionPass {
+ MachineRegisterInfo *MRI; // Machine register information
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PHIElimination() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ private:
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ ///
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+ void LowerAtomicPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ ///
+ void analyzePHINodes(const MachineFunction& Fn);
+
+ /// Split critical edges where necessary for good coalescer performance.
+ bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+ LiveVariables &LV, MachineLoopInfo *MLI);
+
+ typedef std::pair<unsigned, unsigned> BBVRegPair;
+ typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+
+ VRegPHIUse VRegPHIUseCount;
+
+ // Defs of PHI sources which are implicit_def.
+ SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+ // Map reusable lowered PHI node -> incoming join register.
+ typedef DenseMap<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait> LoweredPHIMap;
+ LoweredPHIMap LoweredPHIs;
+ };
+}
+
STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
- "Eliminate PHI nodes for register allocation", false, false);
+ "Eliminate PHI nodes for register allocation", false, false)
-char &llvm::PHIEliminationID = PHIElimination::ID;
+char& llvm::PHIEliminationID = PHIElimination::ID;
-void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveVariables>();
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
bool Changed = false;
@@ -93,14 +142,14 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
/// predecessor basic blocks.
///
-bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF,
+bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
MachineBasicBlock &MBB) {
if (MBB.empty() || !MBB.front().isPHI())
return false; // Quick exit for basic blocks without PHIs.
// Get an iterator to the first instruction after the last PHI node (this may
// also be the end of the basic block).
- MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin());
+ MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
while (MBB.front().isPHI())
LowerAtomicPHINode(MBB, AfterPHIsIt);
@@ -121,58 +170,14 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
return true;
}
-// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
-// when following the CFG edge to SuccMBB. This needs to be after any def of
-// SrcReg, but before any subsequent point where control flow might jump out of
-// the basic block.
-MachineBasicBlock::iterator
-llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
- MachineBasicBlock &SuccMBB,
- unsigned SrcReg) {
- // Handle the trivial case trivially.
- if (MBB.empty())
- return MBB.begin();
-
- // Usually, we just want to insert the copy before the first terminator
- // instruction. However, for the edge going to a landing pad, we must insert
- // the copy before the call/invoke instruction.
- if (!SuccMBB.isLandingPad())
- return MBB.getFirstTerminator();
-
- // Discover any defs/uses in this basic block.
- SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
- for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
- RE = MRI->reg_end(); RI != RE; ++RI) {
- MachineInstr *DefUseMI = &*RI;
- if (DefUseMI->getParent() == &MBB)
- DefUsesInMBB.insert(DefUseMI);
- }
- MachineBasicBlock::iterator InsertPoint;
- if (DefUsesInMBB.empty()) {
- // No defs. Insert the copy at the start of the basic block.
- InsertPoint = MBB.begin();
- } else if (DefUsesInMBB.size() == 1) {
- // Insert the copy immediately after the def/use.
- InsertPoint = *DefUsesInMBB.begin();
- ++InsertPoint;
- } else {
- // Insert the copy immediately after the last def/use.
- InsertPoint = MBB.end();
- while (!DefUsesInMBB.count(&*--InsertPoint)) {}
- ++InsertPoint;
- }
-
- // Make sure the copy goes after any phi nodes however.
- return SkipPHIsAndLabels(MBB, InsertPoint);
-}
/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
/// under the assuption that it needs to be lowered in a way that supports
/// atomic execution of PHIs. This lowering method is always correct all of the
/// time.
///
-void llvm::PHIElimination::LowerAtomicPHINode(
+void PHIElimination::LowerAtomicPHINode(
MachineBasicBlock &MBB,
MachineBasicBlock::iterator AfterPHIsIt) {
++NumAtomic;
@@ -207,7 +212,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
IncomingReg = entry;
reusedIncoming = true;
++NumReused;
- DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
+ DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
} else {
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
@@ -294,7 +299,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Find a safe location to insert the copy, this may be the first terminator
// in the block (or end()).
MachineBasicBlock::iterator InsertPos =
- FindCopyInsertPoint(opBlock, MBB, SrcReg);
+ findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
// Insert the copy.
if (!reusedIncoming && IncomingReg)
@@ -335,6 +340,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
#ifndef NDEBUG
for (MachineBasicBlock::iterator TI = llvm::next(Term);
TI != opBlock.end(); ++TI) {
+ if (TI->isDebugValue())
+ continue;
assert(!TI->readsRegister(SrcReg) &&
"Terminator instructions cannot use virtual registers unless"
"they are the first terminator in a block!");
@@ -343,9 +350,13 @@ void llvm::PHIElimination::LowerAtomicPHINode(
} else if (reusedIncoming || !IncomingReg) {
// We may have to rewind a bit if we didn't insert a copy this time.
KillInst = Term;
- while (KillInst != opBlock.begin())
- if ((--KillInst)->readsRegister(SrcReg))
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
break;
+ }
} else {
// We just inserted this copy.
KillInst = prior(InsertPos);
@@ -371,7 +382,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
/// used in a PHI node. We map that to the BB the vreg is coming from. This is
/// used later to determine when the vreg is killed in the BB.
///
-void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
I != E; ++I)
for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
@@ -381,10 +392,10 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) {
BBI->getOperand(i).getReg())];
}
-bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
- MachineBasicBlock &MBB,
- LiveVariables &LV,
- MachineLoopInfo *MLI) {
+bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ LiveVariables &LV,
+ MachineLoopInfo *MLI) {
if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
return false; // Quick exit for basic blocks without PHIs.
@@ -403,10 +414,14 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
if (!MLI ||
!(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
- MLI->isLoopHeader(&MBB)))
- Changed |= PreMBB->SplitCriticalEdge(&MBB, this) != 0;
+ MLI->isLoopHeader(&MBB))) {
+ if (PreMBB->SplitCriticalEdge(&MBB, this)) {
+ Changed = true;
+ ++NumCriticalEdgesSplit;
+ }
+ }
}
}
}
- return true;
+ return Changed;
}
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.h b/contrib/llvm/lib/CodeGen/PHIElimination.h
deleted file mode 100644
index 45a9718..0000000
--- a/contrib/llvm/lib/CodeGen/PHIElimination.h
+++ /dev/null
@@ -1,115 +0,0 @@
-//===-- lib/CodeGen/PHIElimination.h ----------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PHIELIMINATION_HPP
-#define LLVM_CODEGEN_PHIELIMINATION_HPP
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-
-namespace llvm {
- class LiveVariables;
- class MachineRegisterInfo;
- class MachineLoopInfo;
-
- /// Lower PHI instructions to copies.
- class PHIElimination : public MachineFunctionPass {
- MachineRegisterInfo *MRI; // Machine register information
-
- public:
- static char ID; // Pass identification, replacement for typeid
- PHIElimination() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &Fn);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
- private:
- /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
- /// in predecessor basic blocks.
- ///
- bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
- void LowerAtomicPHINode(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator AfterPHIsIt);
-
- /// analyzePHINodes - Gather information about the PHI nodes in
- /// here. In particular, we want to map the number of uses of a virtual
- /// register which is used in a PHI node. We map that to the BB the
- /// vreg is coming from. This is used later to determine when the vreg
- /// is killed in the BB.
- ///
- void analyzePHINodes(const MachineFunction& Fn);
-
- /// Split critical edges where necessary for good coalescer performance.
- bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
- LiveVariables &LV, MachineLoopInfo *MLI);
-
- /// SplitCriticalEdge - Split a critical edge from A to B by
- /// inserting a new MBB. Update branches in A and PHI instructions
- /// in B. Return the new block.
- MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *A,
- MachineBasicBlock *B);
-
- /// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
- /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
- /// any def of SrcReg, but before any subsequent point where control flow
- /// might jump out of the basic block.
- MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
- MachineBasicBlock &SuccMBB,
- unsigned SrcReg);
-
- // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
- // also after any exception handling labels: in landing pads execution
- // starts at the label, so any copies placed before it won't be executed!
- // We also deal with DBG_VALUEs, which are a bit tricky:
- // PHI
- // DBG_VALUE
- // LABEL
- // Here the DBG_VALUE needs to be skipped, and if it refers to a PHI it
- // needs to be annulled or, better, moved to follow the label, as well.
- // PHI
- // DBG_VALUE
- // no label
- // Here it is not a good idea to skip the DBG_VALUE.
- // FIXME: For now we skip and annul all DBG_VALUEs, maximally simple and
- // maximally stupid.
- MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
- // Rather than assuming that EH labels come before other kinds of labels,
- // just skip all labels.
- while (I != MBB.end() &&
- (I->isPHI() || I->isLabel() || I->isDebugValue())) {
- if (I->isDebugValue() && I->getNumOperands()==3 &&
- I->getOperand(0).isReg())
- I->getOperand(0).setReg(0U);
- ++I;
- }
- return I;
- }
-
- typedef std::pair<unsigned, unsigned> BBVRegPair;
- typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
-
- VRegPHIUse VRegPHIUseCount;
-
- // Defs of PHI sources which are implicit_def.
- SmallPtrSet<MachineInstr*, 4> ImpDefs;
-
- // Map reusable lowered PHI node -> incoming join register.
- typedef DenseMap<MachineInstr*, unsigned,
- MachineInstrExpressionTrait> LoweredPHIMap;
- LoweredPHIMap LoweredPHIs;
- };
-
-}
-
-#endif /* LLVM_CODEGEN_PHIELIMINATION_HPP */
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
new file mode 100644
index 0000000..10bfdcc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -0,0 +1,61 @@
+//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg) {
+ // Handle the trivial case trivially.
+ if (MBB->empty())
+ return MBB->begin();
+
+ // Usually, we just want to insert the copy before the first terminator
+ // instruction. However, for the edge going to a landing pad, we must insert
+ // the copy before the call/invoke instruction.
+ if (!SuccMBB->isLandingPad())
+ return MBB->getFirstTerminator();
+
+ // Discover any defs/uses in this basic block.
+ SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+ MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg),
+ RE = MRI.reg_end(); RI != RE; ++RI) {
+ MachineInstr* DefUseMI = &*RI;
+ if (DefUseMI->getParent() == MBB)
+ DefUsesInMBB.insert(DefUseMI);
+ }
+
+ MachineBasicBlock::iterator InsertPoint;
+ if (DefUsesInMBB.empty()) {
+ // No defs. Insert the copy at the start of the basic block.
+ InsertPoint = MBB->begin();
+ } else if (DefUsesInMBB.size() == 1) {
+ // Insert the copy immediately after the def/use.
+ InsertPoint = *DefUsesInMBB.begin();
+ ++InsertPoint;
+ } else {
+ // Insert the copy immediately after the last def/use.
+ InsertPoint = MBB->end();
+ while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+ ++InsertPoint;
+ }
+
+ // Make sure the copy goes after any phi nodes however.
+ return MBB->SkipPHIsAndLabels(InsertPoint);
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h
new file mode 100644
index 0000000..9ac47fb4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h
@@ -0,0 +1,25 @@
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+ /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from
+ /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+ /// any def of SrcReg, but before any subsequent point where control flow
+ /// might jump out of the basic block.
+ MachineBasicBlock::iterator
+ findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg);
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 17cee46..5d7123c 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -41,7 +41,9 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -50,8 +52,13 @@ static cl::opt<bool>
Aggressive("aggressive-ext-opt", cl::Hidden,
cl::desc("Aggressive extension optimization"));
+static cl::opt<bool>
+DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
+ cl::desc("Disable the peephole optimizer"));
+
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumEliminated, "Number of compares eliminated");
+STATISTIC(NumImmFold, "Number of move immediate foled");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
@@ -62,7 +69,9 @@ namespace {
public:
static char ID; // Pass identification
- PeepholeOptimizer() : MachineFunctionPass(ID) {}
+ PeepholeOptimizer() : MachineFunctionPass(ID) {
+ initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -79,12 +88,21 @@ namespace {
bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ bool isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
};
}
char PeepholeOptimizer::ID = 0;
-INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts",
- "Peephole Optimizations", false, false);
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false)
FunctionPass *llvm::createPeepholeOptimizerPass() {
return new PeepholeOptimizer();
@@ -102,12 +120,10 @@ FunctionPass *llvm::createPeepholeOptimizerPass() {
bool PeepholeOptimizer::
OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
- LocalMIs.insert(MI);
-
unsigned SrcReg, DstReg, SubIdx;
if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
return false;
-
+
if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
TargetRegisterInfo::isPhysicalRegister(SrcReg))
return false;
@@ -232,22 +248,17 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
/// set) the same flag as the compare, then we can remove the comparison and use
/// the flag from the previous instruction.
bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
- MachineBasicBlock *MBB) {
+ MachineBasicBlock *MBB){
// If this instruction is a comparison against zero and isn't comparing a
// physical register, we can try to optimize it.
unsigned SrcReg;
- int CmpValue;
- if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0)
- return false;
-
- MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
- if (llvm::next(DI) != MRI->def_end())
- // Only support one definition.
+ int CmpMask, CmpValue;
+ if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg))
return false;
- // Attempt to convert the defining instruction to set the "zero" flag.
- if (TII->ConvertToSetZeroFlag(&*DI, MI)) {
+ // Attempt to optimize the comparison instruction.
+ if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
++NumEliminated;
return true;
}
@@ -255,7 +266,53 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
return false;
}
+bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isMoveImmediate())
+ return false;
+ if (TID.getNumDefs() != 1)
+ return false;
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ ImmDefMIs.insert(std::make_pair(Reg, MI));
+ ImmDefRegs.insert(Reg);
+ return true;
+ }
+
+ return false;
+}
+
+/// FoldImmediate - Try folding register operands that are defined by move
+/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (ImmDefRegs.count(Reg) == 0)
+ continue;
+ DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+ assert(II != ImmDefMIs.end());
+ if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
+ ++NumImmFold;
+ return true;
+ }
+ }
+ return false;
+}
+
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ if (DisablePeephole)
+ return false;
+
TM = &MF.getTarget();
TII = TM->getInstrInfo();
MRI = &MF.getRegInfo();
@@ -264,22 +321,50 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ SmallSet<unsigned, 4> ImmDefRegs;
+ DenseMap<unsigned, MachineInstr*> ImmDefMIs;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
+
+ bool SeenMoveImm = false;
LocalMIs.clear();
+ ImmDefRegs.clear();
+ ImmDefMIs.clear();
+ bool First = true;
+ MachineBasicBlock::iterator PMII;
for (MachineBasicBlock::iterator
- MII = I->begin(), ME = I->end(); MII != ME; ) {
+ MII = I->begin(), MIE = I->end(); MII != MIE; ) {
MachineInstr *MI = &*MII;
+ LocalMIs.insert(MI);
- if (MI->getDesc().isCompare() &&
- !MI->getDesc().hasUnmodeledSideEffects()) {
- ++MII; // The iterator may become invalid if the compare is deleted.
- Changed |= OptimizeCmpInstr(MI, MBB);
+ if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+ MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
+ MI->hasUnmodeledSideEffects()) {
+ ++MII;
+ continue;
+ }
+
+ if (MI->getDesc().isCompare()) {
+ if (OptimizeCmpInstr(MI, MBB)) {
+ // MI is deleted.
+ Changed = true;
+ MII = First ? I->begin() : llvm::next(PMII);
+ continue;
+ }
+ }
+
+ if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
+ SeenMoveImm = true;
} else {
Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
- ++MII;
+ if (SeenMoveImm)
+ Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
+
+ First = false;
+ PMII = MII;
+ ++MII;
}
}
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index f0bd6d1..60c24b7 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -133,18 +133,12 @@ namespace {
std::vector<unsigned> KillIndices;
public:
- SchedulePostRATDList(MachineFunction &MF,
- const MachineLoopInfo &MLI,
- const MachineDominatorTree &MDT,
- ScheduleHazardRecognizer *HR,
- AntiDepBreaker *ADB,
- AliasAnalysis *aa)
- : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
- HazardRec(HR), AntiDepBreak(ADB), AA(aa),
- KillIndices(TRI->getNumRegs()) {}
-
- ~SchedulePostRATDList() {
- }
+ SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
+
+ ~SchedulePostRATDList();
/// StartBlock - Initialize register live-range state for scheduling in
/// this block.
@@ -183,9 +177,34 @@ namespace {
};
}
+SchedulePostRATDList::SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
+ KillIndices(TRI->getNumRegs())
+{
+ const TargetMachine &TM = MF.getTarget();
+ const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
+ HazardRec =
+ TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+ AntiDepBreak =
+ ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
+ (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) :
+ ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
+ (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL));
+}
+
+SchedulePostRATDList::~SchedulePostRATDList() {
+ delete HazardRec;
+ delete AntiDepBreak;
+}
+
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
- AA = &getAnalysis<AliasAnalysis>();
TII = Fn.getTarget().getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+ AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
// Check for explicit enable/disable of post-ra scheduling.
TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
@@ -195,6 +214,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
return false;
} else {
// Check that post-RA scheduling is enabled for this target.
+ // This may upgrade the AntiDepMode.
const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
return false;
@@ -210,19 +230,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
DEBUG(dbgs() << "PostRAScheduler\n");
- const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
- const TargetMachine &TM = Fn.getTarget();
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- ScheduleHazardRecognizer *HR =
- TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins);
- AntiDepBreaker *ADB =
- ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
- (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
- ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
- (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL));
-
- SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA);
+ SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode,
+ CriticalPathRCs);
// Loop over all of the basic blocks
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
@@ -270,9 +279,6 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
Scheduler.FixupKills(MBB);
}
- delete HR;
- delete ADB;
-
return true;
}
@@ -617,13 +623,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
MinDepth = PendingQueue[i]->getDepth();
}
- DEBUG(dbgs() << "\n*** Examining Available\n";
- LatencyPriorityQueue q = AvailableQueue;
- while (!q.empty()) {
- SUnit *su = q.pop();
- dbgs() << "Height " << su->getHeight() << ": ";
- su->dump(this);
- });
+ DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
SUnit *FoundSUnit = 0;
bool HasNoopHazards = false;
@@ -631,7 +631,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
SUnit *CurSUnit = AvailableQueue.pop();
ScheduleHazardRecognizer::HazardType HT =
- HazardRec->getHazardType(CurSUnit);
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
if (HT == ScheduleHazardRecognizer::NoHazard) {
FoundSUnit = CurSUnit;
break;
diff --git a/contrib/llvm/lib/CodeGen/PreAllocSplitting.cpp b/contrib/llvm/lib/CodeGen/PreAllocSplitting.cpp
index cd9d83e..d6e31da 100644
--- a/contrib/llvm/lib/CodeGen/PreAllocSplitting.cpp
+++ b/contrib/llvm/lib/CodeGen/PreAllocSplitting.cpp
@@ -91,8 +91,9 @@ namespace {
public:
static char ID;
- PreAllocSplitting()
- : MachineFunctionPass(ID) {}
+ PreAllocSplitting() : MachineFunctionPass(ID) {
+ initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -106,10 +107,8 @@ namespace {
AU.addPreserved<LiveStacks>();
AU.addPreserved<RegisterCoalescer>();
AU.addPreserved<CalculateSpillWeights>();
- if (StrongPHIElim)
- AU.addPreservedID(StrongPHIEliminationID);
- else
- AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(StrongPHIEliminationID);
+ AU.addPreservedID(PHIEliminationID);
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<VirtRegMap>();
@@ -203,9 +202,18 @@ namespace {
char PreAllocSplitting::ID = 0;
-INITIALIZE_PASS(PreAllocSplitting, "pre-alloc-splitting",
+INITIALIZE_PASS_BEGIN(PreAllocSplitting, "pre-alloc-splitting",
+ "Pre-Register Allocation Live Interval Splitting",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(PreAllocSplitting, "pre-alloc-splitting",
"Pre-Register Allocation Live Interval Splitting",
- false, false);
+ false, false)
char &llvm::PreAllocSplittingID = PreAllocSplitting::ID;
@@ -324,7 +332,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
if (CurrSLI->hasAtLeastOneValue())
CurrSValNo = CurrSLI->getValNumInfo(0);
else
- CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false,
+ CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
LSs->getVNInfoAllocator());
return SS;
}
@@ -585,7 +593,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
SlotIndex StartIndex = LIs->getMBBStartIdx(MBB);
VNInfo *RetVNI = Phis[MBB] =
- LI->getNextValue(SlotIndex(), /*FIXME*/ 0, false,
+ LI->getNextValue(SlotIndex(), /*FIXME*/ 0,
LIs->getVNInfoAllocator());
if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
@@ -674,7 +682,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
DefIdx = DefIdx.getDefIndex();
assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting.");
- VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
+ VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc);
// If the def is a move, set the copy field.
if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
@@ -807,7 +815,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
MachineBasicBlock& MBB = *RestorePt->getParent();
MachineBasicBlock::iterator KillPt = BarrierMBB->end();
- if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
+ if (!DefMI || DefMI->getParent() == BarrierMBB)
KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
else
KillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
@@ -872,7 +880,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
if (CurrSLI->hasAtLeastOneValue())
CurrSValNo = CurrSLI->getValNumInfo(0);
else
- CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false,
+ CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
LSs->getVNInfoAllocator());
}
@@ -967,8 +975,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
assert(!ValNo->isUnused() && "Val# is defined by a dead def?");
- MachineInstr *DefMI = ValNo->isDefAccurate()
- ? LIs->getInstructionFromIndex(ValNo->def) : NULL;
+ MachineInstr *DefMI = LIs->getInstructionFromIndex(ValNo->def);
// If this would create a new join point, do not split.
if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) {
@@ -1005,7 +1012,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
SlotIndex SpillIndex;
MachineInstr *SpillMI = NULL;
int SS = -1;
- if (!ValNo->isDefAccurate()) {
+ if (!DefMI) {
// If we don't know where the def is we must split just before the barrier.
if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
BarrierMBB, SS, RefsInMBB))) {
@@ -1199,12 +1206,12 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
// We also don't try to handle the results of PHI joins, since there's
// no defining instruction to analyze.
- if (!CurrVN->isDefAccurate() || CurrVN->isUnused()) continue;
+ MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
+ if (!DefMI || CurrVN->isUnused()) continue;
// We're only interested in eliminating cruft introduced by the splitter,
// is of the form load-use or load-use-store. First, check that the
// definition is a load, and remember what stack slot we loaded it from.
- MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
int FrameIndex;
if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index b8831db..9cd9941 100644
--- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -26,8 +26,11 @@
using namespace llvm;
char ProcessImplicitDefs::ID = 0;
-INITIALIZE_PASS(ProcessImplicitDefs, "processimpdefs",
- "Process Implicit Definitions.", false, false);
+INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index e2802c1..ad7b6e4 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -21,6 +21,7 @@
#define DEBUG_TYPE "pei"
#include "PrologEpilogInserter.h"
+#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -29,7 +30,7 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -44,8 +45,12 @@ using namespace llvm;
char PEI::ID = 0;
-INITIALIZE_PASS(PEI, "prologepilog",
- "Prologue/Epilogue Insertion", false, false);
+INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion", false, false)
STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
@@ -61,6 +66,8 @@ FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
bool PEI::runOnMachineFunction(MachineFunction &Fn) {
const Function* F = Fn.getFunction();
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+
RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
@@ -71,7 +78,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// Allow the target machine to make some adjustments to the function
// e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
- TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+ TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);
// Scan the function for modified callee saved registers and insert spill code
// for any callee saved registers that are modified.
@@ -91,7 +98,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// Allow the target machine to make final modifications to the function
// before the frame layout is finalized.
- TRI->processFunctionBeforeFrameFinalized(Fn);
+ TFI->processFunctionBeforeFrameFinalized(Fn);
// Calculate actual frame offsets for all abstract stack objects...
calculateFrameObjectOffsets(Fn);
@@ -138,6 +145,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
/// pseudo instructions.
void PEI::calculateCallsInformation(MachineFunction &Fn) {
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
MachineFrameInfo *MFI = Fn.getFrameInfo();
unsigned MaxCallFrameSize = 0;
@@ -165,7 +173,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
FrameSDOps.push_back(I);
} else if (I->isInlineAsm()) {
// Some inline asm's need a stack frame, as indicated by operand 1.
- if (I->getOperand(1).getImm())
+ unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
AdjustsStack = true;
}
@@ -180,7 +189,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
// the target doesn't indicate otherwise, remove the call frame pseudos
// here. The sub/add sp instruction pairs are still inserted, but we don't
// need to track the SP adjustment for frame index elimination.
- if (RegInfo->canSimplifyCallFramePseudos(Fn))
+ if (TFI->canSimplifyCallFramePseudos(Fn))
RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
}
}
@@ -190,7 +199,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
/// registers.
void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
- const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
MachineFrameInfo *MFI = Fn.getFrameInfo();
// Get the callee saved register list...
@@ -229,7 +238,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
return; // Early exit if no callee saved registers are modified!
unsigned NumFixedSpillSlots;
- const TargetFrameInfo::SpillSlot *FixedSpillSlots =
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
// Now that we know which registers need to be saved and restored, allocate
@@ -247,7 +256,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
// Check to see if this physreg must be spilled to a particular stack slot
// on this target.
- const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
FixedSlot->Reg != Reg)
++FixedSlot;
@@ -290,13 +299,14 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
return;
const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
MachineBasicBlock::iterator I;
if (! ShrinkWrapThisFunction) {
// Spill using target interface.
I = EntryBlock->begin();
- if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+ if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
// Add the callee-saved register as live-in.
// It's killed at the spill.
@@ -328,7 +338,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Restore all registers immediately before the return and any
// terminators that preceed it.
- if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+ if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
@@ -480,10 +490,10 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
/// abstract stack objects.
///
void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
- const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
bool StackGrowsDown =
- TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
// Loop over all of the stack objects, assigning sequential addresses...
MachineFrameInfo *MFI = Fn.getFrameInfo();
@@ -549,7 +559,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Make sure the special register scavenging spill slot is closest to the
// frame pointer if a frame pointer is required.
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
- if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
+ if (RS && TFI.hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
int SFI = RS->getScavengingFrameIndex();
if (SFI >= 0)
AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
@@ -631,17 +641,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Make sure the special register scavenging spill slot is closest to the
// stack pointer.
- if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
+ if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
int SFI = RS->getScavengingFrameIndex();
if (SFI >= 0)
AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
}
- if (!RegInfo->targetHandlesStackFrameRounding()) {
+ if (!TFI.targetHandlesStackFrameRounding()) {
// If we have reserved argument space for call sites in the function
// immediately on entry to the current function, count it as part of the
// overall stack size.
- if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn))
+ if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
Offset += MFI->getMaxCallFrameSize();
// Round up the size to a multiple of the alignment. If the function has
@@ -672,16 +682,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
/// prolog and epilog code to the function.
///
void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
- const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
// Add prologue to the function...
- TRI->emitPrologue(Fn);
+ TFI.emitPrologue(Fn);
// Add epilogue to restore the callee-save registers in each exiting block
for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
if (!I->empty() && I->back().getDesc().isReturn())
- TRI->emitEpilogue(Fn, *I);
+ TFI.emitEpilogue(Fn, *I);
}
}
@@ -694,9 +704,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
- const TargetFrameInfo *TFI = TM.getFrameInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
bool StackGrowsDown =
- TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
int FrameSetupOpcode = TRI.getCallFrameSetupOpcode();
int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
@@ -755,8 +765,8 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
// If this instruction has a FrameIndex operand, we need to
// use that target machine register info object to eliminate
// it.
- TRI.eliminateFrameIndex(MI, SPAdj,
- FrameIndexVirtualScavenging ? NULL : RS);
+ TRI.eliminateFrameIndex(MI, SPAdj,
+ FrameIndexVirtualScavenging ? NULL : RS);
// Reset the iterator if we were at the beginning of the BB.
if (AtBeginning) {
@@ -825,7 +835,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
++NumScavengedRegs;
}
- // replace this reference to the virtual register with the
+ // Replace this reference to the virtual register with the
// scratch register.
assert (ScratchReg && "Missing scratch register!");
MI->getOperand(i).setReg(ScratchReg);
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
index d575124..e239159 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h
@@ -36,7 +36,9 @@ namespace llvm {
class PEI : public MachineFunctionPass {
public:
static char ID;
- PEI() : MachineFunctionPass(ID) {}
+ PEI() : MachineFunctionPass(ID) {
+ initializePEIPass(*PassRegistry::getPassRegistry());
+ }
const char *getPassName() const {
return "Prolog/Epilog Insertion & Frame Finalization";
diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
index 5e86e5a..73b66d8 100644
--- a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
+++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -18,7 +18,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include <map>
using namespace llvm;
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h
new file mode 100644
index 0000000..8c7e5f5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h
@@ -0,0 +1,181 @@
+//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class, which is the skeleton of a basic
+// register allocation algorithm and interface for extending it. It provides the
+// building blocks on which to construct other experimental allocators and test
+// the validity of two principles:
+//
+// - If virtual and physical register liveness is modeled using intervals, then
+// on-the-fly interference checking is cheap. Furthermore, interferences can be
+// lazily cached and reused.
+//
+// - Register allocation complexity, and generated code performance is
+// determined by the effectiveness of live range splitting rather than optimal
+// coloring.
+//
+// Following the first principle, interfering checking revolves around the
+// LiveIntervalUnion data structure.
+//
+// To fulfill the second principle, the basic allocator provides a driver for
+// incremental splitting. It essentially punts on the problem of register
+// coloring, instead driving the assignment of virtual to physical registers by
+// the cost of splitting. The basic allocator allows for heuristic reassignment
+// of registers, if a more sophisticated allocator chooses to do that.
+//
+// This framework provides a way to engineer the compile time vs. code
+// quality trade-off without relying on a particular theoretical solver.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCBASE
+#define LLVM_CODEGEN_REGALLOCBASE
+
+#include "llvm/ADT/OwningPtr.h"
+#include "LiveIntervalUnion.h"
+#include <queue>
+
+namespace llvm {
+
+template<typename T> class SmallVectorImpl;
+class TargetRegisterInfo;
+class VirtRegMap;
+class LiveIntervals;
+class Spiller;
+
+// Forward declare a priority queue of live virtual registers. If an
+// implementation needs to prioritize by anything other than spill weight, then
+// this will become an abstract base class with virtual calls to push/get.
+class LiveVirtRegQueue;
+
+/// RegAllocBase provides the register allocation driver and interface that can
+/// be extended to add interesting heuristics.
+///
+/// Register allocators must override the selectOrSplit() method to implement
+/// live range splitting. They may also override getPriority() which otherwise
+/// defaults to the spill weight computed by CalculateSpillWeights.
+class RegAllocBase {
+ LiveIntervalUnion::Allocator UnionAllocator;
+protected:
+ // Array of LiveIntervalUnions indexed by physical register.
+ class LiveUnionArray {
+ unsigned NumRegs;
+ LiveIntervalUnion *Array;
+ public:
+ LiveUnionArray(): NumRegs(0), Array(0) {}
+ ~LiveUnionArray() { clear(); }
+
+ unsigned numRegs() const { return NumRegs; }
+
+ void init(LiveIntervalUnion::Allocator &, unsigned NRegs);
+
+ void clear();
+
+ LiveIntervalUnion& operator[](unsigned PhysReg) {
+ assert(PhysReg < NumRegs && "physReg out of bounds");
+ return Array[PhysReg];
+ }
+ };
+
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ VirtRegMap *VRM;
+ LiveIntervals *LIS;
+ LiveUnionArray PhysReg2LiveUnion;
+
+ // Current queries, one per physreg. They must be reinitialized each time we
+ // query on a new live virtual register.
+ OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+
+ RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0) {}
+
+ virtual ~RegAllocBase() {}
+
+ // A RegAlloc pass should call this before allocatePhysRegs.
+ void init(VirtRegMap &vrm, LiveIntervals &lis);
+
+ // Get an initialized query to check interferences between lvr and preg. Note
+ // that Query::init must be called at least once for each physical register
+ // before querying a new live virtual register. This ties Queries and
+ // PhysReg2LiveUnion together.
+ LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) {
+ Queries[PhysReg].init(&VirtReg, &PhysReg2LiveUnion[PhysReg]);
+ return Queries[PhysReg];
+ }
+
+ // The top-level driver. The output is a VirtRegMap that us updated with
+ // physical register assignments.
+ //
+ // If an implementation wants to override the LiveInterval comparator, we
+ // should modify this interface to allow passing in an instance derived from
+ // LiveVirtRegQueue.
+ void allocatePhysRegs();
+
+ // Get a temporary reference to a Spiller instance.
+ virtual Spiller &spiller() = 0;
+
+ // getPriority - Calculate the allocation priority for VirtReg.
+ // Virtual registers with higher priorities are allocated first.
+ virtual float getPriority(LiveInterval *LI) = 0;
+
+ // A RegAlloc pass should override this to provide the allocation heuristics.
+ // Each call must guarantee forward progess by returning an available PhysReg
+ // or new set of split live virtual registers. It is up to the splitter to
+ // converge quickly toward fully spilled live ranges.
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
+
+ // A RegAlloc pass should call this when PassManager releases its memory.
+ virtual void releaseMemory();
+
+ // Helper for checking interference between a live virtual register and a
+ // physical register, including all its register aliases. If an interference
+ // exists, return the interfering register, which may be preg or an alias.
+ unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg);
+
+ /// assign - Assign VirtReg to PhysReg.
+ /// This should not be called from selectOrSplit for the current register.
+ void assign(LiveInterval &VirtReg, unsigned PhysReg);
+
+ /// unassign - Undo a previous assignment of VirtReg to PhysReg.
+ /// This can be invoked from selectOrSplit, but be careful to guarantee that
+ /// allocation is making progress.
+ void unassign(LiveInterval &VirtReg, unsigned PhysReg);
+
+ // Helper for spilling all live virtual registers currently unified under preg
+ // that interfere with the most recently queried lvr. Return true if spilling
+ // was successful, and append any new spilled/split intervals to splitLVRs.
+ bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ /// addMBBLiveIns - Add physreg liveins to basic blocks.
+ void addMBBLiveIns(MachineFunction *);
+
+#ifndef NDEBUG
+ // Verify each LiveIntervalUnion.
+ void verify();
+#endif
+
+ // Use this group name for NamedRegionTimer.
+ static const char *TimerGroupName;
+
+public:
+ /// VerifyEnabled - True when -verify-regalloc is given.
+ static bool VerifyEnabled;
+
+private:
+ void seedLiveVirtRegs(std::priority_queue<std::pair<float, unsigned> >&);
+
+ void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_REGALLOCBASE)
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
new file mode 100644
index 0000000..045c8db
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -0,0 +1,523 @@
+//===-- RegAllocBasic.cpp - basic register allocator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RABasic function pass, which provides a minimal
+// implementation of the basic register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "RegAllocBase.h"
+#include "RenderMachineFunction.h"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <cstdlib>
+
+using namespace llvm;
+
+STATISTIC(NumAssigned , "Number of registers assigned");
+STATISTIC(NumUnassigned , "Number of registers unassigned");
+STATISTIC(NumNewQueued , "Number of new live ranges queued");
+
+static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
+ createBasicRegisterAllocator);
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+ cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+namespace {
+/// RABasic provides a minimal implementation of the basic register allocation
+/// algorithm. It prioritizes live virtual registers by spill weight and spills
+/// whenever a register is unavailable. This is not practical in production but
+/// provides a useful baseline both for measuring other allocators and comparing
+/// the speed of the basic algorithm against other styles of allocators.
+class RABasic : public MachineFunctionPass, public RegAllocBase
+{
+ // context
+ MachineFunction *MF;
+ BitVector ReservedRegs;
+
+ // analyses
+ LiveStacks *LS;
+ RenderMachineFunction *RMF;
+
+ // state
+ std::auto_ptr<Spiller> SpillerInstance;
+
+public:
+ RABasic();
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "Basic Register Allocator";
+ }
+
+ /// RABasic analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ virtual Spiller &spiller() { return *SpillerInstance; }
+
+ virtual float getPriority(LiveInterval *LI) { return LI->weight; }
+
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ /// Perform register allocation.
+ virtual bool runOnMachineFunction(MachineFunction &mf);
+
+ static char ID;
+};
+
+char RABasic::ID = 0;
+
+} // end anonymous namespace
+
+RABasic::RABasic(): MachineFunctionPass(ID) {
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+}
+
+void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ if (StrongPHIElim)
+ AU.addRequiredID(StrongPHIEliminationID);
+ AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ DEBUG(AU.addRequired<RenderMachineFunction>());
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RABasic::releaseMemory() {
+ SpillerInstance.reset(0);
+ RegAllocBase::releaseMemory();
+}
+
+#ifndef NDEBUG
+// Verify each LiveIntervalUnion.
+void RegAllocBase::verify() {
+ LiveVirtRegBitSet VisitedVRegs;
+ OwningArrayPtr<LiveVirtRegBitSet>
+ unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
+
+ // Verify disjoint unions.
+ for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+ DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
+ LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
+ PhysReg2LiveUnion[PhysReg].verify(VRegs);
+ // Union + intersection test could be done efficiently in one pass, but
+ // don't add a method to SparseBitVector unless we really need it.
+ assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
+ VisitedVRegs |= VRegs;
+ }
+
+ // Verify vreg coverage.
+ for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
+ liItr != liEnd; ++liItr) {
+ unsigned reg = liItr->first;
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
+ if (!VRM->hasPhys(reg)) continue; // spilled?
+ unsigned PhysReg = VRM->getPhys(reg);
+ if (!unionVRegs[PhysReg].test(reg)) {
+ dbgs() << "LiveVirtReg " << reg << " not in union " <<
+ TRI->getName(PhysReg) << "\n";
+ llvm_unreachable("unallocated live vreg");
+ }
+ }
+ // FIXME: I'm not sure how to verify spilled intervals.
+}
+#endif //!NDEBUG
+
+//===----------------------------------------------------------------------===//
+// RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Instantiate a LiveIntervalUnion for each physical register.
+void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
+ unsigned NRegs) {
+ NumRegs = NRegs;
+ Array =
+ static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
+ for (unsigned r = 0; r != NRegs; ++r)
+ new(Array + r) LiveIntervalUnion(r, allocator);
+}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
+ NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+ TRI = &vrm.getTargetRegInfo();
+ MRI = &vrm.getRegInfo();
+ VRM = &vrm;
+ LIS = &lis;
+ PhysReg2LiveUnion.init(UnionAllocator, TRI->getNumRegs());
+ // Cache an interferece query for each physical reg
+ Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+}
+
+void RegAllocBase::LiveUnionArray::clear() {
+ if (!Array)
+ return;
+ for (unsigned r = 0; r != NumRegs; ++r)
+ Array[r].~LiveIntervalUnion();
+ free(Array);
+ NumRegs = 0;
+ Array = 0;
+}
+
+void RegAllocBase::releaseMemory() {
+ PhysReg2LiveUnion.clear();
+}
+
+// Visit all the live virtual registers. If they are already assigned to a
+// physical register, unify them with the corresponding LiveIntervalUnion,
+// otherwise push them on the priority queue for later assignment.
+void RegAllocBase::
+seedLiveVirtRegs(std::priority_queue<std::pair<float, unsigned> > &VirtRegQ) {
+ for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
+ unsigned RegNum = I->first;
+ LiveInterval &VirtReg = *I->second;
+ if (TargetRegisterInfo::isPhysicalRegister(RegNum))
+ PhysReg2LiveUnion[RegNum].unify(VirtReg);
+ else
+ VirtRegQ.push(std::make_pair(getPriority(&VirtReg), RegNum));
+ }
+}
+
+void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+ << " to " << PrintReg(PhysReg, TRI) << '\n');
+ assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+ PhysReg2LiveUnion[PhysReg].unify(VirtReg);
+ ++NumAssigned;
+}
+
+void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+ << " from " << PrintReg(PhysReg, TRI) << '\n');
+ assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
+ PhysReg2LiveUnion[PhysReg].extract(VirtReg);
+ VRM->clearVirt(VirtReg.reg);
+ ++NumUnassigned;
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+
+ // Push each vreg onto a queue or "precolor" by adding it to a physreg union.
+ std::priority_queue<std::pair<float, unsigned> > VirtRegQ;
+ seedLiveVirtRegs(VirtRegQ);
+
+ // Continue assigning vregs one at a time to available physical registers.
+ while (!VirtRegQ.empty()) {
+ // Pop the highest priority vreg.
+ LiveInterval &VirtReg = LIS->getInterval(VirtRegQ.top().second);
+ VirtRegQ.pop();
+
+ // selectOrSplit requests the allocator to return an available physical
+ // register if possible and populate a list of new live intervals that
+ // result from splitting.
+ DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg.reg)->getName()
+ << ':' << VirtReg << '\n');
+ typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+ VirtRegVec SplitVRegs;
+ unsigned AvailablePhysReg = selectOrSplit(VirtReg, SplitVRegs);
+
+ if (AvailablePhysReg)
+ assign(VirtReg, AvailablePhysReg);
+
+ for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+ I != E; ++I) {
+ LiveInterval* SplitVirtReg = *I;
+ if (SplitVirtReg->empty()) continue;
+ DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+ assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+ "expect split value in virtual register");
+ VirtRegQ.push(std::make_pair(getPriority(SplitVirtReg),
+ SplitVirtReg->reg));
+ ++NumNewQueued;
+ }
+ }
+}
+
+// Check if this live virtual register interferes with a physical register. If
+// not, then check for interference on each register that aliases with the
+// physical register. Return the interfering register.
+unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+ if (query(VirtReg, *AliasI).checkInterference())
+ return *AliasI;
+ return 0;
+}
+
+// Helper for spillInteferences() that spills all interfering vregs currently
+// assigned to this physical register.
+void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
+ assert(Q.seenAllInterferences() && "need collectInterferences()");
+ const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
+
+ for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(),
+ E = PendingSpills.end(); I != E; ++I) {
+ LiveInterval &SpilledVReg = **I;
+ DEBUG(dbgs() << "extracting from " <<
+ TRI->getName(PhysReg) << " " << SpilledVReg << '\n');
+
+ // Deallocate the interfering vreg by removing it from the union.
+ // A LiveInterval instance may not be in a union during modification!
+ unassign(SpilledVReg, PhysReg);
+
+ // Spill the extracted interval.
+ spiller().spill(&SpilledVReg, SplitVRegs, PendingSpills);
+ }
+ // After extracting segments, the query's results are invalid. But keep the
+ // contents valid until we're done accessing pendingSpills.
+ Q.clear();
+}
+
+// Spill or split all live virtual registers currently unified under PhysReg
+// that interfere with VirtReg. The newly spilled or split live intervals are
+// returned by appending them to SplitVRegs.
+bool
+RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Record each interference and determine if all are spillable before mutating
+ // either the union or live intervals.
+ unsigned NumInterferences = 0;
+ // Collect interferences assigned to any alias of the physical register.
+ for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
+ LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
+ NumInterferences += QAlias.collectInterferingVRegs();
+ if (QAlias.seenUnspillableVReg()) {
+ return false;
+ }
+ }
+ DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
+ " interferences with " << VirtReg << "\n");
+ assert(NumInterferences > 0 && "expect interference");
+
+ // Spill each interfering vreg allocated to PhysReg or an alias.
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+ spillReg(VirtReg, *AliasI, SplitVRegs);
+ return true;
+}
+
+// Add newly allocated physical registers to the MBB live in sets.
+void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
+ NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
+ typedef SmallVector<MachineBasicBlock*, 8> MBBVec;
+ MBBVec liveInMBBs;
+ MachineBasicBlock &entryMBB = *MF->begin();
+
+ for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+ LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
+ if (LiveUnion.empty())
+ continue;
+ for (LiveIntervalUnion::SegmentIter SI = LiveUnion.begin(); SI.valid();
+ ++SI) {
+
+ // Find the set of basic blocks which this range is live into...
+ liveInMBBs.clear();
+ if (!LIS->findLiveInMBBs(SI.start(), SI.stop(), liveInMBBs)) continue;
+
+ // And add the physreg for this interval to their live-in sets.
+ for (MBBVec::iterator I = liveInMBBs.begin(), E = liveInMBBs.end();
+ I != E; ++I) {
+ MachineBasicBlock *MBB = *I;
+ if (MBB == &entryMBB) continue;
+ if (MBB->isLiveIn(PhysReg)) continue;
+ MBB->addLiveIn(PhysReg);
+ }
+ }
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// RABasic Implementation
+//===----------------------------------------------------------------------===//
+
+// Driver for the register assignment and splitting heuristics.
+// Manages iteration over the LiveIntervalUnions.
+//
+// This is a minimal implementation of register assignment and splitting that
+// spills whenever we run out of registers.
+//
+// selectOrSplit can only be called once per live virtual register. We then do a
+// single interference test for each register the correct class until we find an
+// available register. So, the number of interference tests in the worst case is
+// |vregs| * |machineregs|. And since the number of interference tests is
+// minimal, there is no value in caching them outside the scope of
+// selectOrSplit().
+unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Populate a list of physical register spill candidates.
+ SmallVector<unsigned, 8> PhysRegSpillCands;
+
+ // Check for an available register in this class.
+ const TargetRegisterClass *TRC = MRI->getRegClass(VirtReg.reg);
+
+ for (TargetRegisterClass::iterator I = TRC->allocation_order_begin(*MF),
+ E = TRC->allocation_order_end(*MF);
+ I != E; ++I) {
+
+ unsigned PhysReg = *I;
+ if (ReservedRegs.test(PhysReg)) continue;
+
+ // Check interference and as a side effect, intialize queries for this
+ // VirtReg and its aliases.
+ unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
+ if (interfReg == 0) {
+ // Found an available register.
+ return PhysReg;
+ }
+ LiveInterval *interferingVirtReg =
+ Queries[interfReg].firstInterference().liveUnionPos().value();
+
+ // The current VirtReg must either be spillable, or one of its interferences
+ // must have less spill weight.
+ if (interferingVirtReg->weight < VirtReg.weight ) {
+ PhysRegSpillCands.push_back(PhysReg);
+ }
+ }
+ // Try to spill another interfering reg with less spill weight.
+ for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
+ PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+
+ if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue;
+
+ assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 &&
+ "Interference after spill.");
+ // Tell the caller to allocate to this newly freed physical register.
+ return *PhysRegI;
+ }
+ // No other spill candidates were found, so spill the current VirtReg.
+ DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+ SmallVector<LiveInterval*, 1> pendingSpills;
+
+ spiller().spill(&VirtReg, SplitVRegs, pendingSpills);
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RABasic::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << ((Value*)mf.getFunction())->getName() << '\n');
+
+ MF = &mf;
+ DEBUG(RMF = &getAnalysis<RenderMachineFunction>());
+
+ RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+
+ ReservedRegs = TRI->getReservedRegs(*MF);
+
+ SpillerInstance.reset(createSpiller(*this, *MF, *VRM));
+
+ allocatePhysRegs();
+
+ addMBBLiveIns(MF);
+
+ // Diagnostic output before rewriting
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+
+ // optional HTML output
+ DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM));
+
+ // FIXME: Verification currently must run before VirtRegRewriter. We should
+ // make the rewriter a separate pass and override verifyAnalysis instead. When
+ // that happens, verification naturally falls under VerifyMachineCode.
+#ifndef NDEBUG
+ if (VerifyEnabled) {
+ // Verify accuracy of LiveIntervals. The standard machine code verifier
+ // ensures that each LiveIntervals covers all uses of the virtual reg.
+
+ // FIXME: MachineVerifier is badly broken when using the standard
+ // spiller. Always use -spiller=inline with -verify-regalloc. Even with the
+ // inline spiller, some tests fail to verify because the coalescer does not
+ // always generate verifiable code.
+ MF->verify(this, "In RABasic::verify");
+
+ // Verify that LiveIntervals are partitioned into unions and disjoint within
+ // the unions.
+ verify();
+ }
+#endif // !NDEBUG
+
+ // Run rewriter
+ VRM->rewrite(LIS->getSlotIndexes());
+
+ // The pass output is in VirtRegMap. Release all the transient data.
+ releaseMemory();
+
+ return true;
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator()
+{
+ return new RABasic();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index fc150d5..15036e3 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -48,7 +48,10 @@ namespace {
public:
static char ID;
RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
- isBulkSpilling(false) {}
+ isBulkSpilling(false) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
private:
const TargetMachine *TM;
MachineFunction *MF;
@@ -259,8 +262,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
// instruction, not on the spill.
bool SpillKill = LR.LastUse != MI;
LR.Dirty = false;
- DEBUG(dbgs() << "Spilling %reg" << LRI->first
- << " in " << TRI->getName(LR.PhysReg));
+ DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI)
+ << " in " << PrintReg(LR.PhysReg, TRI));
const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
int FI = getStackSpaceFor(LRI->first, RC);
DEBUG(dbgs() << " to stack slot #" << FI << "\n");
@@ -331,7 +334,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
MO.setIsKill();
return;
default:
- // The physreg was allocated to a virtual register. That means to value we
+ // The physreg was allocated to a virtual register. That means the value we
// wanted has been clobbered.
llvm_unreachable("Instruction uses an allocated register");
}
@@ -458,8 +461,8 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
/// register must not be used for anything else when this is called.
///
void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
- DEBUG(dbgs() << "Assigning %reg" << LRE.first << " to "
- << TRI->getName(PhysReg) << "\n");
+ DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to "
+ << PrintReg(PhysReg, TRI) << "\n");
PhysRegState[PhysReg] = LRE.first;
assert(!LRE.second.PhysReg && "Already assigned a physreg");
LRE.second.PhysReg = PhysReg;
@@ -503,8 +506,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
return assignVirtToPhysReg(LRE, PhysReg);
}
- DEBUG(dbgs() << "Allocating %reg" << VirtReg << " from " << RC->getName()
- << "\n");
+ DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
+ << RC->getName() << "\n");
unsigned BestReg = 0, BestCost = spillImpossible;
for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
@@ -584,8 +587,8 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
allocVirtReg(MI, *LRI, Hint);
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
int FrameIndex = getStackSpaceFor(VirtReg, RC);
- DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into "
- << TRI->getName(LR.PhysReg) << "\n");
+ DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
+ << PrintReg(LR.PhysReg, TRI) << "\n");
TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
++NumLoads;
} else if (LR.Dirty) {
@@ -653,11 +656,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
(MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
if (ThroughRegs.insert(Reg))
- DEBUG(dbgs() << " %reg" << Reg);
+ DEBUG(dbgs() << ' ' << PrintReg(Reg));
}
}
@@ -685,7 +689,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
unsigned DefIdx = 0;
if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
@@ -731,6 +735,27 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
void RAFast::AllocateBasicBlock() {
DEBUG(dbgs() << "\nAllocating " << *MBB);
+ // FIXME: This should probably be added by instruction selection instead?
+ // If the last instruction in the block is a return, make sure to mark it as
+ // using all of the live-out values in the function. Things marked both call
+ // and return are tail calls; do not do this for them. The tail callee need
+ // not take the same registers as input that it produces as output, and there
+ // are dependencies for its input registers elsewhere.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
+ !MBB->back().getDesc().isCall()) {
+ MachineInstr *Ret = &MBB->back();
+
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+ "Cannot have a live-out virtual register.");
+
+ // Add live-out registers as implicit uses.
+ Ret->addRegisterKilled(*I, TRI, true);
+ }
+ }
+
PhysRegState.assign(TRI->getNumRegs(), regDisabled);
assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
@@ -761,7 +786,7 @@ void RAFast::AllocateBasicBlock() {
dbgs() << "*";
break;
default:
- dbgs() << "=%reg" << PhysRegState[Reg];
+ dbgs() << '=' << PrintReg(PhysRegState[Reg]);
if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
dbgs() << "*";
assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
@@ -791,16 +816,18 @@ void RAFast::AllocateBasicBlock() {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
LiveDbgValueMap[Reg] = MI;
LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
if (LRI != LiveVirtRegs.end())
setPhysReg(MI, i, LRI->second.PhysReg);
else {
int SS = StackSlotForVirtReg[Reg];
- if (SS == -1)
+ if (SS == -1) {
// We can't allocate a physreg for a DebugValue, sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
MO.setReg(0);
+ }
else {
// Modify DBG_VALUE now that the value is in a spill slot.
int64_t Offset = MI->getOperand(1).getImm();
@@ -817,9 +844,11 @@ void RAFast::AllocateBasicBlock() {
MI = NewDV;
ScanDbgValue = true;
break;
- } else
+ } else {
// We can't allocate a physreg for a DebugValue; sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
MO.setReg(0);
+ }
}
}
}
@@ -902,7 +931,7 @@ void RAFast::AllocateBasicBlock() {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
unsigned PhysReg = LRI->second.PhysReg;
@@ -1017,8 +1046,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
// initialize the virtual->physical register map to have a 'null'
// mapping for all virtual registers
- unsigned LastVirtReg = MRI->getLastVirtReg();
- StackSlotForVirtReg.grow(LastVirtReg);
+ StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
// Loop over all of the basic blocks, eliminating virtual register references
for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
new file mode 100644
index 0000000..c1372cd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -0,0 +1,1285 @@
+//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "AllocationOrder.h"
+#include "LiveIntervalUnion.h"
+#include "LiveRangeEdit.h"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "SpillPlacement.h"
+#include "SplitKit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+STATISTIC(NumGlobalSplits, "Number of split global live ranges");
+STATISTIC(NumLocalSplits, "Number of split local live ranges");
+STATISTIC(NumReassigned, "Number of interferences reassigned");
+STATISTIC(NumEvicted, "Number of interferences evicted");
+
+static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
+ createGreedyRegisterAllocator);
+
+namespace {
+class RAGreedy : public MachineFunctionPass, public RegAllocBase {
+ // context
+ MachineFunction *MF;
+ BitVector ReservedRegs;
+
+ // analyses
+ SlotIndexes *Indexes;
+ LiveStacks *LS;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ MachineLoopRanges *LoopRanges;
+ EdgeBundles *Bundles;
+ SpillPlacement *SpillPlacer;
+
+ // state
+ std::auto_ptr<Spiller> SpillerInstance;
+ std::auto_ptr<SplitAnalysis> SA;
+
+ // splitting state.
+
+ /// All basic blocks where the current register is live.
+ SmallVector<SpillPlacement::BlockConstraint, 8> SpillConstraints;
+
+ /// For every instruction in SA->UseSlots, store the previous non-copy
+ /// instruction.
+ SmallVector<SlotIndex, 8> PrevSlot;
+
+public:
+ RAGreedy();
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "Greedy Register Allocator";
+ }
+
+ /// RAGreedy analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ virtual Spiller &spiller() { return *SpillerInstance; }
+
+ virtual float getPriority(LiveInterval *LI);
+
+ virtual unsigned selectOrSplit(LiveInterval&,
+ SmallVectorImpl<LiveInterval*>&);
+
+ /// Perform register allocation.
+ virtual bool runOnMachineFunction(MachineFunction &mf);
+
+ static char ID;
+
+private:
+ bool checkUncachedInterference(LiveInterval&, unsigned);
+ LiveInterval *getSingleInterference(LiveInterval&, unsigned);
+ bool reassignVReg(LiveInterval &InterferingVReg, unsigned OldPhysReg);
+ float calcInterferenceWeight(LiveInterval&, unsigned);
+ float calcInterferenceInfo(LiveInterval&, unsigned);
+ float calcGlobalSplitCost(const BitVector&);
+ void splitAroundRegion(LiveInterval&, unsigned, const BitVector&,
+ SmallVectorImpl<LiveInterval*>&);
+ void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+ SlotIndex getPrevMappedIndex(const MachineInstr*);
+ void calcPrevSlots();
+ unsigned nextSplitPoint(unsigned);
+
+ unsigned tryReassignOrEvict(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned trySplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned trySpillInterferences(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+};
+} // end anonymous namespace
+
+char RAGreedy::ID = 0;
+
+FunctionPass* llvm::createGreedyRegisterAllocator() {
+ return new RAGreedy();
+}
+
+RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+ initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
+}
+
+void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ if (StrongPHIElim)
+ AU.addRequiredID(StrongPHIEliminationID);
+ AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineLoopRanges>();
+ AU.addPreserved<MachineLoopRanges>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<EdgeBundles>();
+ AU.addRequired<SpillPlacement>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RAGreedy::releaseMemory() {
+ SpillerInstance.reset(0);
+ RegAllocBase::releaseMemory();
+}
+
+float RAGreedy::getPriority(LiveInterval *LI) {
+ float Priority = LI->weight;
+
+ // Prioritize hinted registers so they are allocated first.
+ std::pair<unsigned, unsigned> Hint;
+ if (Hint.first || Hint.second) {
+ // The hint can be target specific, a virtual register, or a physreg.
+ Priority *= 2;
+
+ // Prefer physreg hints above anything else.
+ if (Hint.first == 0 && TargetRegisterInfo::isPhysicalRegister(Hint.second))
+ Priority *= 2;
+ }
+ return Priority;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Register Reassignment
+//===----------------------------------------------------------------------===//
+
+// Check interference without using the cache.
+bool RAGreedy::checkUncachedInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+ LiveIntervalUnion::Query subQ(&VirtReg, &PhysReg2LiveUnion[*AliasI]);
+ if (subQ.checkInterference())
+ return true;
+ }
+ return false;
+}
+
+/// getSingleInterference - Return the single interfering virtual register
+/// assigned to PhysReg. Return 0 if more than one virtual register is
+/// interfering.
+LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ // Check physreg and aliases.
+ LiveInterval *Interference = 0;
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+ if (Q.checkInterference()) {
+ if (Interference)
+ return 0;
+ Q.collectInterferingVRegs(1);
+ if (!Q.seenAllInterferences())
+ return 0;
+ Interference = Q.interferingVRegs().front();
+ }
+ }
+ return Interference;
+}
+
+// Attempt to reassign this virtual register to a different physical register.
+//
+// FIXME: we are not yet caching these "second-level" interferences discovered
+// in the sub-queries. These interferences can change with each call to
+// selectOrSplit. However, we could implement a "may-interfere" cache that
+// could be conservatively dirtied when we reassign or split.
+//
+// FIXME: This may result in a lot of alias queries. We could summarize alias
+// live intervals in their parent register's live union, but it's messy.
+bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg,
+ unsigned WantedPhysReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(InterferingVReg.reg) &&
+ "Can only reassign virtual registers");
+ assert(TRI->regsOverlap(WantedPhysReg, VRM->getPhys(InterferingVReg.reg)) &&
+ "inconsistent phys reg assigment");
+
+ AllocationOrder Order(InterferingVReg.reg, *VRM, ReservedRegs);
+ while (unsigned PhysReg = Order.next()) {
+ // Don't reassign to a WantedPhysReg alias.
+ if (TRI->regsOverlap(PhysReg, WantedPhysReg))
+ continue;
+
+ if (checkUncachedInterference(InterferingVReg, PhysReg))
+ continue;
+
+ // Reassign the interfering virtual reg to this physical reg.
+ unsigned OldAssign = VRM->getPhys(InterferingVReg.reg);
+ DEBUG(dbgs() << "reassigning: " << InterferingVReg << " from " <<
+ TRI->getName(OldAssign) << " to " << TRI->getName(PhysReg) << '\n');
+ unassign(InterferingVReg, OldAssign);
+ assign(InterferingVReg, PhysReg);
+ ++NumReassigned;
+ return true;
+ }
+ return false;
+}
+
+/// tryReassignOrEvict - Try to reassign a single interferences to a different
+/// physreg, or evict a single interference with a lower spill weight.
+/// @param VirtReg Currently unassigned virtual register.
+/// @param Order Physregs to try.
+/// @return Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryReassignOrEvict(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs){
+ NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled);
+
+ // Keep track of the lightest single interference seen so far.
+ float BestWeight = VirtReg.weight;
+ LiveInterval *BestVirt = 0;
+ unsigned BestPhys = 0;
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg);
+ if (!InterferingVReg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(InterferingVReg->reg))
+ continue;
+ if (reassignVReg(*InterferingVReg, PhysReg))
+ return PhysReg;
+
+ // Cannot reassign, is this an eviction candidate?
+ if (InterferingVReg->weight < BestWeight) {
+ BestVirt = InterferingVReg;
+ BestPhys = PhysReg;
+ BestWeight = InterferingVReg->weight;
+ }
+ }
+
+ // Nothing reassigned, can we evict a lighter single interference?
+ if (BestVirt) {
+ DEBUG(dbgs() << "evicting lighter " << *BestVirt << '\n');
+ unassign(*BestVirt, VRM->getPhys(BestVirt->reg));
+ ++NumEvicted;
+ NewVRegs.push_back(BestVirt);
+ return BestPhys;
+ }
+
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Region Splitting
+//===----------------------------------------------------------------------===//
+
+/// calcInterferenceInfo - Compute per-block outgoing and ingoing constraints
+/// when considering interference from PhysReg. Also compute an optimistic local
+/// cost of this interference pattern.
+///
+/// The final cost of a split is the local cost + global cost of preferences
+/// broken by SpillPlacement.
+///
+float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) {
+ // Reset interference dependent info.
+ SpillConstraints.resize(SA->LiveBlocks.size());
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+ BC.Number = BI.MBB->getNumber();
+ BC.Entry = (BI.Uses && BI.LiveIn) ?
+ SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.Exit = (BI.Uses && BI.LiveOut) ?
+ SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BI.OverlapEntry = BI.OverlapExit = false;
+ }
+
+ // Add interference info from each PhysReg alias.
+ for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+ if (!query(VirtReg, *AI).checkInterference())
+ continue;
+ LiveIntervalUnion::SegmentIter IntI =
+ PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex());
+ if (!IntI.valid())
+ continue;
+
+ // Determine which blocks have interference live in or after the last split
+ // point.
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+ // Skip interference-free blocks.
+ if (IntI.start() >= Stop)
+ continue;
+
+ // Is the interference live-in?
+ if (BI.LiveIn) {
+ IntI.advanceTo(Start);
+ if (!IntI.valid())
+ break;
+ if (IntI.start() <= Start)
+ BC.Entry = SpillPlacement::MustSpill;
+ }
+
+ // Is the interference overlapping the last split point?
+ if (BI.LiveOut) {
+ if (IntI.stop() < BI.LastSplitPoint)
+ IntI.advanceTo(BI.LastSplitPoint.getPrevSlot());
+ if (!IntI.valid())
+ break;
+ if (IntI.start() < Stop)
+ BC.Exit = SpillPlacement::MustSpill;
+ }
+ }
+
+ // Rewind iterator and check other interferences.
+ IntI.find(VirtReg.beginIndex());
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+ // Skip interference-free blocks.
+ if (IntI.start() >= Stop)
+ continue;
+
+ // Handle transparent blocks with interference separately.
+ // Transparent blocks never incur any fixed cost.
+ if (BI.LiveThrough && !BI.Uses) {
+ IntI.advanceTo(Start);
+ if (!IntI.valid())
+ break;
+ if (IntI.start() >= Stop)
+ continue;
+
+ if (BC.Entry != SpillPlacement::MustSpill)
+ BC.Entry = SpillPlacement::PrefSpill;
+ if (BC.Exit != SpillPlacement::MustSpill)
+ BC.Exit = SpillPlacement::PrefSpill;
+ continue;
+ }
+
+ // Now we only have blocks with uses left.
+ // Check if the interference overlaps the uses.
+ assert(BI.Uses && "Non-transparent block without any uses");
+
+ // Check interference on entry.
+ if (BI.LiveIn && BC.Entry != SpillPlacement::MustSpill) {
+ IntI.advanceTo(Start);
+ if (!IntI.valid())
+ break;
+ // Not live in, but before the first use.
+ if (IntI.start() < BI.FirstUse)
+ BC.Entry = SpillPlacement::PrefSpill;
+ }
+
+ // Does interference overlap the uses in the entry segment
+ // [FirstUse;Kill)?
+ if (BI.LiveIn && !BI.OverlapEntry) {
+ IntI.advanceTo(BI.FirstUse);
+ if (!IntI.valid())
+ break;
+ // A live-through interval has no kill.
+ // Check [FirstUse;LastUse) instead.
+ if (IntI.start() < (BI.LiveThrough ? BI.LastUse : BI.Kill))
+ BI.OverlapEntry = true;
+ }
+
+ // Does interference overlap the uses in the exit segment [Def;LastUse)?
+ if (BI.LiveOut && !BI.LiveThrough && !BI.OverlapExit) {
+ IntI.advanceTo(BI.Def);
+ if (!IntI.valid())
+ break;
+ if (IntI.start() < BI.LastUse)
+ BI.OverlapExit = true;
+ }
+
+ // Check interference on exit.
+ if (BI.LiveOut && BC.Exit != SpillPlacement::MustSpill) {
+ // Check interference between LastUse and Stop.
+ if (BC.Exit != SpillPlacement::PrefSpill) {
+ IntI.advanceTo(BI.LastUse);
+ if (!IntI.valid())
+ break;
+ if (IntI.start() < Stop)
+ BC.Exit = SpillPlacement::PrefSpill;
+ }
+ }
+ }
+ }
+
+ // Accumulate a local cost of this interference pattern.
+ float LocalCost = 0;
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ if (!BI.Uses)
+ continue;
+ SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+ unsigned Inserts = 0;
+
+ // Do we need spill code for the entry segment?
+ if (BI.LiveIn)
+ Inserts += BI.OverlapEntry || BC.Entry != SpillPlacement::PrefReg;
+
+ // For the exit segment?
+ if (BI.LiveOut)
+ Inserts += BI.OverlapExit || BC.Exit != SpillPlacement::PrefReg;
+
+ // The local cost of spill code in this block is the block frequency times
+ // the number of spill instructions inserted.
+ if (Inserts)
+ LocalCost += Inserts * SpillPlacer->getBlockFrequency(BI.MBB);
+ }
+ DEBUG(dbgs() << "Local cost of " << PrintReg(PhysReg, TRI) << " = "
+ << LocalCost << '\n');
+ return LocalCost;
+}
+
+/// calcGlobalSplitCost - Return the global split cost of following the split
+/// pattern in LiveBundles. This cost should be added to the local cost of the
+/// interference pattern in SpillConstraints.
+///
+float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) {
+ float GlobalCost = 0;
+ for (unsigned i = 0, e = SpillConstraints.size(); i != e; ++i) {
+ SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+ unsigned Inserts = 0;
+ // Broken entry preference?
+ Inserts += LiveBundles[Bundles->getBundle(BC.Number, 0)] !=
+ (BC.Entry == SpillPlacement::PrefReg);
+ // Broken exit preference?
+ Inserts += LiveBundles[Bundles->getBundle(BC.Number, 1)] !=
+ (BC.Exit == SpillPlacement::PrefReg);
+ if (Inserts)
+ GlobalCost +=
+ Inserts * SpillPlacer->getBlockFrequency(SA->LiveBlocks[i].MBB);
+ }
+ DEBUG(dbgs() << "Global cost = " << GlobalCost << '\n');
+ return GlobalCost;
+}
+
+/// splitAroundRegion - Split VirtReg around the region determined by
+/// LiveBundles. Make an effort to avoid interference from PhysReg.
+///
+/// The 'register' interval is going to contain as many uses as possible while
+/// avoiding interference. The 'stack' interval is the complement constructed by
+/// SplitEditor. It will contain the rest.
+///
+void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
+ const BitVector &LiveBundles,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ DEBUG({
+ dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI)
+ << " with bundles";
+ for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+
+ // First compute interference ranges in the live blocks.
+ typedef std::pair<SlotIndex, SlotIndex> IndexPair;
+ SmallVector<IndexPair, 8> InterferenceRanges;
+ InterferenceRanges.resize(SA->LiveBlocks.size());
+ for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+ if (!query(VirtReg, *AI).checkInterference())
+ continue;
+ LiveIntervalUnion::SegmentIter IntI =
+ PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex());
+ if (!IntI.valid())
+ continue;
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ IndexPair &IP = InterferenceRanges[i];
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+ // Skip interference-free blocks.
+ if (IntI.start() >= Stop)
+ continue;
+
+ // First interference in block.
+ if (BI.LiveIn) {
+ IntI.advanceTo(Start);
+ if (!IntI.valid())
+ break;
+ if (IntI.start() >= Stop)
+ continue;
+ if (!IP.first.isValid() || IntI.start() < IP.first)
+ IP.first = IntI.start();
+ }
+
+ // Last interference in block.
+ if (BI.LiveOut) {
+ IntI.advanceTo(Stop);
+ if (!IntI.valid() || IntI.start() >= Stop)
+ --IntI;
+ if (IntI.stop() <= Start)
+ continue;
+ if (!IP.second.isValid() || IntI.stop() > IP.second)
+ IP.second = IntI.stop();
+ }
+ }
+ }
+
+ SmallVector<LiveInterval*, 4> SpillRegs;
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
+ SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit);
+
+ // Create the main cross-block interval.
+ SE.openIntv();
+
+ // First add all defs that are live out of a block.
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+
+ // Should the register be live out?
+ if (!BI.LiveOut || !RegOut)
+ continue;
+
+ IndexPair &IP = InterferenceRanges[i];
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#"
+ << Bundles->getBundle(BI.MBB->getNumber(), 1)
+ << " intf [" << IP.first << ';' << IP.second << ')');
+
+ // The interference interval should either be invalid or overlap MBB.
+ assert((!IP.first.isValid() || IP.first < Stop) && "Bad interference");
+ assert((!IP.second.isValid() || IP.second > Start) && "Bad interference");
+
+ // Check interference leaving the block.
+ if (!IP.second.isValid()) {
+ // Block is interference-free.
+ DEBUG(dbgs() << ", no interference");
+ if (!BI.Uses) {
+ assert(BI.LiveThrough && "No uses, but not live through block?");
+ // Block is live-through without interference.
+ DEBUG(dbgs() << ", no uses"
+ << (RegIn ? ", live-through.\n" : ", stack in.\n"));
+ if (!RegIn)
+ SE.enterIntvAtEnd(*BI.MBB);
+ continue;
+ }
+ if (!BI.LiveThrough) {
+ DEBUG(dbgs() << ", not live-through.\n");
+ SE.useIntv(SE.enterIntvBefore(BI.Def), Stop);
+ continue;
+ }
+ if (!RegIn) {
+ // Block is live-through, but entry bundle is on the stack.
+ // Reload just before the first use.
+ DEBUG(dbgs() << ", not live-in, enter before first use.\n");
+ SE.useIntv(SE.enterIntvBefore(BI.FirstUse), Stop);
+ continue;
+ }
+ DEBUG(dbgs() << ", live-through.\n");
+ continue;
+ }
+
+ // Block has interference.
+ DEBUG(dbgs() << ", interference to " << IP.second);
+
+ if (!BI.LiveThrough && IP.second <= BI.Def) {
+ // The interference doesn't reach the outgoing segment.
+ DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n');
+ SE.useIntv(BI.Def, Stop);
+ continue;
+ }
+
+
+ if (!BI.Uses) {
+ // No uses in block, avoid interference by reloading as late as possible.
+ DEBUG(dbgs() << ", no uses.\n");
+ SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB);
+ assert(SegStart >= IP.second && "Couldn't avoid interference");
+ continue;
+ }
+
+ if (IP.second.getBoundaryIndex() < BI.LastUse) {
+ // There are interference-free uses at the end of the block.
+ // Find the first use that can get the live-out register.
+ SmallVectorImpl<SlotIndex>::const_iterator UI =
+ std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
+ IP.second.getBoundaryIndex());
+ assert(UI != SA->UseSlots.end() && "Couldn't find last use");
+ SlotIndex Use = *UI;
+ assert(Use <= BI.LastUse && "Couldn't find last use");
+ // Only attempt a split befroe the last split point.
+ if (Use.getBaseIndex() <= BI.LastSplitPoint) {
+ DEBUG(dbgs() << ", free use at " << Use << ".\n");
+ SlotIndex SegStart = SE.enterIntvBefore(Use);
+ assert(SegStart >= IP.second && "Couldn't avoid interference");
+ assert(SegStart < BI.LastSplitPoint && "Impossible split point");
+ SE.useIntv(SegStart, Stop);
+ continue;
+ }
+ }
+
+ // Interference is after the last use.
+ DEBUG(dbgs() << " after last use.\n");
+ SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB);
+ assert(SegStart >= IP.second && "Couldn't avoid interference");
+ }
+
+ // Now all defs leading to live bundles are handled, do everything else.
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+
+ // Is the register live-in?
+ if (!BI.LiveIn || !RegIn)
+ continue;
+
+ // We have an incoming register. Check for interference.
+ IndexPair &IP = InterferenceRanges[i];
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0)
+ << " -> BB#" << BI.MBB->getNumber());
+
+ // Check interference entering the block.
+ if (!IP.first.isValid()) {
+ // Block is interference-free.
+ DEBUG(dbgs() << ", no interference");
+ if (!BI.Uses) {
+ assert(BI.LiveThrough && "No uses, but not live through block?");
+ // Block is live-through without interference.
+ if (RegOut) {
+ DEBUG(dbgs() << ", no uses, live-through.\n");
+ SE.useIntv(Start, Stop);
+ } else {
+ DEBUG(dbgs() << ", no uses, stack-out.\n");
+ SE.leaveIntvAtTop(*BI.MBB);
+ }
+ continue;
+ }
+ if (!BI.LiveThrough) {
+ DEBUG(dbgs() << ", killed in block.\n");
+ SE.useIntv(Start, SE.leaveIntvAfter(BI.Kill));
+ continue;
+ }
+ if (!RegOut) {
+ // Block is live-through, but exit bundle is on the stack.
+ // Spill immediately after the last use.
+ if (BI.LastUse < BI.LastSplitPoint) {
+ DEBUG(dbgs() << ", uses, stack-out.\n");
+ SE.useIntv(Start, SE.leaveIntvAfter(BI.LastUse));
+ continue;
+ }
+ // The last use is after the last split point, it is probably an
+ // indirect jump.
+ DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point "
+ << BI.LastSplitPoint << ", stack-out.\n");
+ SlotIndex SegEnd = SE.leaveIntvBefore(BI.LastSplitPoint);
+ SE.useIntv(Start, SegEnd);
+ // Run a double interval from the split to the last use.
+ // This makes it possible to spill the complement without affecting the
+ // indirect branch.
+ SE.overlapIntv(SegEnd, BI.LastUse);
+ continue;
+ }
+ // Register is live-through.
+ DEBUG(dbgs() << ", uses, live-through.\n");
+ SE.useIntv(Start, Stop);
+ continue;
+ }
+
+ // Block has interference.
+ DEBUG(dbgs() << ", interference from " << IP.first);
+
+ if (!BI.LiveThrough && IP.first >= BI.Kill) {
+ // The interference doesn't reach the outgoing segment.
+ DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n');
+ SE.useIntv(Start, BI.Kill);
+ continue;
+ }
+
+ if (!BI.Uses) {
+ // No uses in block, avoid interference by spilling as soon as possible.
+ DEBUG(dbgs() << ", no uses.\n");
+ SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB);
+ assert(SegEnd <= IP.first && "Couldn't avoid interference");
+ continue;
+ }
+ if (IP.first.getBaseIndex() > BI.FirstUse) {
+ // There are interference-free uses at the beginning of the block.
+ // Find the last use that can get the register.
+ SmallVectorImpl<SlotIndex>::const_iterator UI =
+ std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
+ IP.first.getBaseIndex());
+ assert(UI != SA->UseSlots.begin() && "Couldn't find first use");
+ SlotIndex Use = (--UI)->getBoundaryIndex();
+ DEBUG(dbgs() << ", free use at " << *UI << ".\n");
+ SlotIndex SegEnd = SE.leaveIntvAfter(Use);
+ assert(SegEnd <= IP.first && "Couldn't avoid interference");
+ SE.useIntv(Start, SegEnd);
+ continue;
+ }
+
+ // Interference is before the first use.
+ DEBUG(dbgs() << " before first use.\n");
+ SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB);
+ assert(SegEnd <= IP.first && "Couldn't avoid interference");
+ }
+
+ SE.closeIntv();
+
+ // FIXME: Should we be more aggressive about splitting the stack region into
+ // per-block segments? The current approach allows the stack region to
+ // separate into connected components. Some components may be allocatable.
+ SE.finish();
+ ++NumGlobalSplits;
+
+ if (VerifyEnabled) {
+ MF->verify(this, "After splitting live range around region");
+
+#ifndef NDEBUG
+ // Make sure that at least one of the new intervals can allocate to PhysReg.
+ // That was the whole point of splitting the live range.
+ bool found = false;
+ for (LiveRangeEdit::iterator I = LREdit.begin(), E = LREdit.end(); I != E;
+ ++I)
+ if (!checkUncachedInterference(**I, PhysReg)) {
+ found = true;
+ break;
+ }
+ assert(found && "No allocatable intervals after pointless splitting");
+#endif
+ }
+}
+
+unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ BitVector LiveBundles, BestBundles;
+ float BestCost = 0;
+ unsigned BestReg = 0;
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ float Cost = calcInterferenceInfo(VirtReg, PhysReg);
+ if (BestReg && Cost >= BestCost)
+ continue;
+
+ SpillPlacer->placeSpills(SpillConstraints, LiveBundles);
+ // No live bundles, defer to splitSingleBlocks().
+ if (!LiveBundles.any())
+ continue;
+
+ Cost += calcGlobalSplitCost(LiveBundles);
+ if (!BestReg || Cost < BestCost) {
+ BestReg = PhysReg;
+ BestCost = Cost;
+ BestBundles.swap(LiveBundles);
+ }
+ }
+
+ if (!BestReg)
+ return 0;
+
+ splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs);
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Local Splitting
+//===----------------------------------------------------------------------===//
+
+
+/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
+/// in order to use PhysReg between two entries in SA->UseSlots.
+///
+/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+///
+void RAGreedy::calcGapWeights(unsigned PhysReg,
+ SmallVectorImpl<float> &GapWeight) {
+ assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();
+ const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ const unsigned NumGaps = Uses.size()-1;
+
+ // Start and end points for the interference check.
+ SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse;
+ SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse;
+
+ GapWeight.assign(NumGaps, 0.0f);
+
+ // Add interference from each overlapping register.
+ for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+ if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
+ .checkInterference())
+ continue;
+
+ // We know that VirtReg is a continuous interval from FirstUse to LastUse,
+ // so we don't need InterferenceQuery.
+ //
+ // Interference that overlaps an instruction is counted in both gaps
+ // surrounding the instruction. The exception is interference before
+ // StartIdx and after StopIdx.
+ //
+ LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
+ for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
+ // Skip the gaps before IntI.
+ while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ // Update the gaps covered by IntI.
+ const float weight = IntI.value()->weight;
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = std::max(GapWeight[Gap], weight);
+ if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
+}
+
+/// getPrevMappedIndex - Return the slot index of the last non-copy instruction
+/// before MI that has a slot index. If MI is the first mapped instruction in
+/// its block, return the block start index instead.
+///
+SlotIndex RAGreedy::getPrevMappedIndex(const MachineInstr *MI) {
+ assert(MI && "Missing MachineInstr");
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_iterator B = MBB->begin(), I = MI;
+ while (I != B)
+ if (!(--I)->isDebugValue() && !I->isCopy())
+ return Indexes->getInstructionIndex(I);
+ return Indexes->getMBBStartIdx(MBB);
+}
+
+/// calcPrevSlots - Fill in the PrevSlot array with the index of the previous
+/// real non-copy instruction for each instruction in SA->UseSlots.
+///
+void RAGreedy::calcPrevSlots() {
+ const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ PrevSlot.clear();
+ PrevSlot.reserve(Uses.size());
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+ const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]);
+ PrevSlot.push_back(getPrevMappedIndex(MI).getDefIndex());
+ }
+}
+
+/// nextSplitPoint - Find the next index into SA->UseSlots > i such that it may
+/// be beneficial to split before UseSlots[i].
+///
+/// 0 is always a valid split point
+unsigned RAGreedy::nextSplitPoint(unsigned i) {
+ const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ const unsigned Size = Uses.size();
+ assert(i != Size && "No split points after the end");
+ // Allow split before i when Uses[i] is not adjacent to the previous use.
+ while (++i != Size && PrevSlot[i].getBaseIndex() <= Uses[i-1].getBaseIndex())
+ ;
+ return i;
+}
+
+/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
+/// basic block.
+///
+unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();
+
+ // Note that it is possible to have an interval that is live-in or live-out
+ // while only covering a single block - A phi-def can use undef values from
+ // predecessors, and the block could be a single-block loop.
+ // We don't bother doing anything clever about such a case, we simply assume
+ // that the interval is continuous from FirstUse to LastUse. We should make
+ // sure that we don't do anything illegal to such an interval, though.
+
+ const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ if (Uses.size() <= 2)
+ return 0;
+ const unsigned NumGaps = Uses.size()-1;
+
+ DEBUG({
+ dbgs() << "tryLocalSplit: ";
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ dbgs() << ' ' << SA->UseSlots[i];
+ dbgs() << '\n';
+ });
+
+ // For every use, find the previous mapped non-copy instruction.
+ // We use this to detect valid split points, and to estimate new interval
+ // sizes.
+ calcPrevSlots();
+
+ unsigned BestBefore = NumGaps;
+ unsigned BestAfter = 0;
+ float BestDiff = 0;
+
+ const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB);
+ SmallVector<float, 8> GapWeight;
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ // Keep track of the largest spill weight that would need to be evicted in
+ // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+ calcGapWeights(PhysReg, GapWeight);
+
+ // Try to find the best sequence of gaps to close.
+ // The new spill weight must be larger than any gap interference.
+
+ // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
+ unsigned SplitBefore = 0, SplitAfter = nextSplitPoint(1) - 1;
+
+ // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
+ // It is the spill weight that needs to be evicted.
+ float MaxGap = GapWeight[0];
+ for (unsigned i = 1; i != SplitAfter; ++i)
+ MaxGap = std::max(MaxGap, GapWeight[i]);
+
+ for (;;) {
+ // Live before/after split?
+ const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
+ const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+ << Uses[SplitBefore] << '-' << Uses[SplitAfter]
+ << " i=" << MaxGap);
+
+ // Stop before the interval gets so big we wouldn't be making progress.
+ if (!LiveBefore && !LiveAfter) {
+ DEBUG(dbgs() << " all\n");
+ break;
+ }
+ // Should the interval be extended or shrunk?
+ bool Shrink = true;
+ if (MaxGap < HUGE_VALF) {
+ // Estimate the new spill weight.
+ //
+ // Each instruction reads and writes the register, except the first
+ // instr doesn't read when !FirstLive, and the last instr doesn't write
+ // when !LastLive.
+ //
+ // We will be inserting copies before and after, so the total number of
+ // reads and writes is 2 * EstUses.
+ //
+ const unsigned EstUses = 2*(SplitAfter - SplitBefore) +
+ 2*(LiveBefore + LiveAfter);
+
+ // Try to guess the size of the new interval. This should be trivial,
+ // but the slot index of an inserted copy can be a lot smaller than the
+ // instruction it is inserted before if there are many dead indexes
+ // between them.
+ //
+ // We measure the distance from the instruction before SplitBefore to
+ // get a conservative estimate.
+ //
+ // The final distance can still be different if inserting copies
+ // triggers a slot index renumbering.
+ //
+ const float EstWeight = normalizeSpillWeight(blockFreq * EstUses,
+ PrevSlot[SplitBefore].distance(Uses[SplitAfter]));
+ // Would this split be possible to allocate?
+ // Never allocate all gaps, we wouldn't be making progress.
+ float Diff = EstWeight - MaxGap;
+ DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff);
+ if (Diff > 0) {
+ Shrink = false;
+ if (Diff > BestDiff) {
+ DEBUG(dbgs() << " (best)");
+ BestDiff = Diff;
+ BestBefore = SplitBefore;
+ BestAfter = SplitAfter;
+ }
+ }
+ }
+
+ // Try to shrink.
+ if (Shrink) {
+ SplitBefore = nextSplitPoint(SplitBefore);
+ if (SplitBefore < SplitAfter) {
+ DEBUG(dbgs() << " shrink\n");
+ // Recompute the max when necessary.
+ if (GapWeight[SplitBefore - 1] >= MaxGap) {
+ MaxGap = GapWeight[SplitBefore];
+ for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
+ MaxGap = std::max(MaxGap, GapWeight[i]);
+ }
+ continue;
+ }
+ MaxGap = 0;
+ }
+
+ // Try to extend the interval.
+ if (SplitAfter >= NumGaps) {
+ DEBUG(dbgs() << " end\n");
+ break;
+ }
+
+ DEBUG(dbgs() << " extend\n");
+ for (unsigned e = nextSplitPoint(SplitAfter + 1) - 1;
+ SplitAfter != e; ++SplitAfter)
+ MaxGap = std::max(MaxGap, GapWeight[SplitAfter]);
+ continue;
+ }
+ }
+
+ // Didn't find any candidates?
+ if (BestBefore == NumGaps)
+ return 0;
+
+ DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
+ << '-' << Uses[BestAfter] << ", " << BestDiff
+ << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
+
+ SmallVector<LiveInterval*, 4> SpillRegs;
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
+ SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit);
+
+ SE.openIntv();
+ SlotIndex SegStart = SE.enterIntvBefore(Uses[BestBefore]);
+ SlotIndex SegStop = SE.leaveIntvAfter(Uses[BestAfter]);
+ SE.useIntv(SegStart, SegStop);
+ SE.closeIntv();
+ SE.finish();
+ ++NumLocalSplits;
+
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+/// trySplit - Try to split VirtReg or one of its interferences, making it
+/// assignable.
+/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
+unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*>&NewVRegs) {
+ SA->analyze(&VirtReg);
+
+ // Local intervals are handled separately.
+ if (LIS->intervalIsInOneMBB(VirtReg)) {
+ NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+ return tryLocalSplit(VirtReg, Order, NewVRegs);
+ }
+
+ NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+
+ // First try to split around a region spanning multiple blocks.
+ unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+
+ // Then isolate blocks with multiple uses.
+ SplitAnalysis::BlockPtrSet Blocks;
+ if (SA->getMultiUseBlocks(Blocks)) {
+ SmallVector<LiveInterval*, 4> SpillRegs;
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
+ SplitEditor(*SA, *LIS, *VRM, *DomTree, LREdit).splitSingleBlocks(Blocks);
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around basic blocks");
+ }
+
+ // Don't assign any physregs.
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Spilling
+//===----------------------------------------------------------------------===//
+
+/// calcInterferenceWeight - Calculate the combined spill weight of
+/// interferences when assigning VirtReg to PhysReg.
+float RAGreedy::calcInterferenceWeight(LiveInterval &VirtReg, unsigned PhysReg){
+ float Sum = 0;
+ for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, *AI);
+ Q.collectInterferingVRegs();
+ if (Q.seenUnspillableVReg())
+ return HUGE_VALF;
+ for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i)
+ Sum += Q.interferingVRegs()[i]->weight;
+ }
+ return Sum;
+}
+
+/// trySpillInterferences - Try to spill interfering registers instead of the
+/// current one. Only do it if the accumulated spill weight is smaller than the
+/// current spill weight.
+unsigned RAGreedy::trySpillInterferences(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ NamedRegionTimer T("Spill Interference", TimerGroupName, TimePassesIsEnabled);
+ unsigned BestPhys = 0;
+ float BestWeight = 0;
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ float Weight = calcInterferenceWeight(VirtReg, PhysReg);
+ if (Weight == HUGE_VALF || Weight >= VirtReg.weight)
+ continue;
+ if (!BestPhys || Weight < BestWeight)
+ BestPhys = PhysReg, BestWeight = Weight;
+ }
+
+ // No candidates found.
+ if (!BestPhys)
+ return 0;
+
+ // Collect all interfering registers.
+ SmallVector<LiveInterval*, 8> Spills;
+ for (const unsigned *AI = TRI->getOverlaps(BestPhys); *AI; ++AI) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, *AI);
+ Spills.append(Q.interferingVRegs().begin(), Q.interferingVRegs().end());
+ for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+ LiveInterval *VReg = Q.interferingVRegs()[i];
+ unassign(*VReg, *AI);
+ }
+ }
+
+ // Spill them all.
+ DEBUG(dbgs() << "spilling " << Spills.size() << " interferences with weight "
+ << BestWeight << '\n');
+ for (unsigned i = 0, e = Spills.size(); i != e; ++i)
+ spiller().spill(Spills[i], NewVRegs, Spills);
+ return BestPhys;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Main Entry Point
+//===----------------------------------------------------------------------===//
+
+unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // First try assigning a free register.
+ AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs);
+ while (unsigned PhysReg = Order.next()) {
+ if (!checkPhysRegInterference(VirtReg, PhysReg))
+ return PhysReg;
+ }
+
+ // Try to reassign interferences.
+ if (unsigned PhysReg = tryReassignOrEvict(VirtReg, Order, NewVRegs))
+ return PhysReg;
+
+ assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+
+ // Try splitting VirtReg or interferences.
+ unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+
+ // Try to spill another interfering reg with less spill weight.
+ PhysReg = trySpillInterferences(VirtReg, Order, NewVRegs);
+ if (PhysReg)
+ return PhysReg;
+
+ // Finally spill VirtReg itself.
+ NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+ SmallVector<LiveInterval*, 1> pendingSpills;
+ spiller().spill(&VirtReg, NewVRegs, pendingSpills);
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << ((Value*)mf.getFunction())->getName() << '\n');
+
+ MF = &mf;
+ if (VerifyEnabled)
+ MF->verify(this, "Before greedy register allocator");
+
+ RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+ Indexes = &getAnalysis<SlotIndexes>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ ReservedRegs = TRI->getReservedRegs(*MF);
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+ Loops = &getAnalysis<MachineLoopInfo>();
+ LoopRanges = &getAnalysis<MachineLoopRanges>();
+ Bundles = &getAnalysis<EdgeBundles>();
+ SpillPlacer = &getAnalysis<SpillPlacement>();
+
+ SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+
+ allocatePhysRegs();
+ addMBBLiveIns(MF);
+ LIS->addKillFlags();
+
+ // Run rewriter
+ {
+ NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled);
+ VRM->rewrite(Indexes);
+ }
+
+ // The pass output is in VirtRegMap. Release all the transient data.
+ releaseMemory();
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp
index 5c62354..b959878 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocLinearScan.cpp
@@ -12,13 +12,14 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
+#include "LiveDebugVariables.h"
#include "VirtRegMap.h"
#include "VirtRegRewriter.h"
#include "Spiller.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -91,6 +92,19 @@ namespace {
struct RALinScan : public MachineFunctionPass {
static char ID;
RALinScan() : MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerAnalysisGroup(
+ *PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+
// Initialize the queue to record recently-used registers.
if (NumRecentlyUsedRegs > 0)
RecentRegs.resize(NumRecentlyUsedRegs, 0);
@@ -127,7 +141,6 @@ namespace {
BitVector allocatableRegs_;
BitVector reservedRegs_;
LiveIntervals* li_;
- LiveStacks* ls_;
MachineLoopInfo *loopInfo;
/// handled_ - Intervals are added to the handled_ set in the order of their
@@ -183,6 +196,8 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<SlotIndexes>();
if (StrongPHIElim)
@@ -193,12 +208,15 @@ namespace {
AU.addRequired<CalculateSpillWeights>();
if (PreSplitIntervals)
AU.addRequiredID(PreAllocSplittingID);
- AU.addRequired<LiveStacks>();
- AU.addPreserved<LiveStacks>();
+ AU.addRequiredID(LiveStacksID);
+ AU.addPreservedID(LiveStacksID);
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<VirtRegMap>();
AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addRequiredID(MachineDominatorsID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -370,8 +388,19 @@ namespace {
char RALinScan::ID = 0;
}
-INITIALIZE_PASS(RALinScan, "linearscan-regalloc",
- "Linear Scan Register Allocator", false, false);
+INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
+ "Linear Scan Register Allocator", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
+INITIALIZE_PASS_DEPENDENCY(PreAllocSplitting)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_AG_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
+ "Linear Scan Register Allocator", false, false)
void RALinScan::ComputeRelatedRegClasses() {
// First pass, add all reg classes to the union, and determine at least one
@@ -402,8 +431,12 @@ void RALinScan::ComputeRelatedRegClasses() {
for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
I != E; ++I)
- for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS)
- RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
+ for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) {
+ const TargetRegisterClass *AliasClass =
+ OneClassForEachPhysReg.lookup(*AS);
+ if (AliasClass)
+ RelatedRegClasses.unionSets(I->second, AliasClass);
+ }
}
/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
@@ -431,8 +464,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
unsigned CandReg;
{
MachineInstr *CopyMI;
- if (vni->def != SlotIndex() && vni->isDefAccurate() &&
- (CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
+ if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
// Defined by a copy, try to extend SrcReg forward
CandReg = CopyMI->getOperand(1).getReg();
else if (TrivCoalesceEnds &&
@@ -442,6 +474,10 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
CandReg = CopyMI->getOperand(0).getReg();
else
return Reg;
+
+ // If the target of the copy is a sub-register then don't coalesce.
+ if(CopyMI->getOperand(0).getSubReg())
+ return Reg;
}
if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
@@ -478,7 +514,6 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
allocatableRegs_ = tri_->getAllocatableSet(fn);
reservedRegs_ = tri_->getReservedRegs(fn);
li_ = &getAnalysis<LiveIntervals>();
- ls_ = &getAnalysis<LiveStacks>();
loopInfo = &getAnalysis<MachineLoopInfo>();
// We don't run the coalescer here because we have no reason to
@@ -505,6 +540,9 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
// Rewrite spill code and update the PhysRegsUsed set.
rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
+ // Write out new DBG_VALUE instructions.
+ getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_);
+
assert(unhandled_.empty() && "Unhandled live intervals remain!");
finalizeRegUses();
@@ -638,8 +676,6 @@ void RALinScan::linearScan() {
// Look for physical registers that end up not being allocated even though
// register allocator had to spill other registers in its register class.
- if (ls_->getNumIntervals() == 0)
- return;
if (!vrm_->FindUnusedRegisters(li_))
return;
}
@@ -784,30 +820,6 @@ static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
}
}
-/// addStackInterval - Create a LiveInterval for stack if the specified live
-/// interval has been spilled.
-static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
- LiveIntervals *li_,
- MachineRegisterInfo* mri_, VirtRegMap &vrm_) {
- int SS = vrm_.getStackSlot(cur->reg);
- if (SS == VirtRegMap::NO_STACK_SLOT)
- return;
-
- const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
- LiveInterval &SI = ls_->getOrCreateInterval(SS, RC);
-
- VNInfo *VNI;
- if (SI.hasAtLeastOneValue())
- VNI = SI.getValNumInfo(0);
- else
- VNI = SI.getNextValue(SlotIndex(), 0, false,
- ls_->getVNInfoAllocator());
-
- LiveInterval &RI = li_->getInterval(cur->reg);
- // FIXME: This may be overly conservative.
- SI.MergeRangesInAsValue(RI, VNI);
-}
-
/// getConflictWeight - Return the number of conflicts between cur
/// live interval and defs and uses of Reg weighted by loop depthes.
static
@@ -925,13 +937,9 @@ LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
}
void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
- bool isNew = DowngradedRegs.insert(Reg);
- isNew = isNew; // Silence compiler warning.
- assert(isNew && "Multiple reloads holding the same register?");
- DowngradeMap.insert(std::make_pair(li->reg, Reg));
- for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) {
- isNew = DowngradedRegs.insert(*AS);
- isNew = isNew; // Silence compiler warning.
+ for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) {
+ bool isNew = DowngradedRegs.insert(*AS);
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Multiple reloads holding the same register?");
DowngradeMap.insert(std::make_pair(li->reg, *AS));
}
@@ -957,10 +965,11 @@ namespace {
/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
/// spill.
void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
- DEBUG(dbgs() << "\tallocating current interval: ");
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ DEBUG(dbgs() << "\tallocating current interval from "
+ << RC->getName() << ": ");
// This is an implicitly defined live interval, just assign any register.
- const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
if (cur->empty()) {
unsigned physReg = vrm_->getRegAllocPref(cur->reg);
if (!physReg)
@@ -984,8 +993,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
// one, e.g. X86::mov32to32_. These move instructions are not coalescable.
if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
VNInfo *vni = cur->begin()->valno;
- if ((vni->def != SlotIndex()) && !vni->isUnused() &&
- vni->isDefAccurate()) {
+ if (!vni->isUnused()) {
MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
if (CopyMI && CopyMI->isCopy()) {
unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
@@ -1225,7 +1233,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
spiller_->spill(cur, added, spillIs);
std::sort(added.begin(), added.end(), LISorter());
- addStackInterval(cur, ls_, li_, mri_, *vrm_);
if (added.empty())
return; // Early exit if all spills were folded.
@@ -1300,7 +1307,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
if (sli->beginIndex() < earliestStart)
earliestStart = sli->beginIndex();
spiller_->spill(sli, added, spillIs);
- addStackInterval(sli, ls_, li_, mri_, *vrm_);
spilled.insert(sli->reg);
}
@@ -1419,8 +1425,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
// Resolve second part of the hint (if possible) given the current allocation.
unsigned physReg = Hint.second;
- if (physReg &&
- TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
+ if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
physReg = vrm_->getPhys(physReg);
TargetRegisterClass::iterator I, E;
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 61f337b..ea0d1fe 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -31,9 +31,6 @@
#define DEBUG_TYPE "regalloc"
-#include "PBQP/HeuristicSolver.h"
-#include "PBQP/Graph.h"
-#include "PBQP/Heuristics/Briggs.h"
#include "RenderMachineFunction.h"
#include "Splitter.h"
#include "VirtRegMap.h"
@@ -41,9 +38,13 @@
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterCoalescer.h"
#include "llvm/Support/Debug.h"
@@ -51,7 +52,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include <limits>
-#include <map>
#include <memory>
#include <set>
#include <vector>
@@ -60,7 +60,7 @@ using namespace llvm;
static RegisterRegAlloc
registerPBQPRepAlloc("pbqp", "PBQP register allocator",
- llvm::createPBQPRegisterAllocator);
+ createDefaultPBQPRegisterAllocator);
static cl::opt<bool>
pbqpCoalescing("pbqp-coalescing",
@@ -69,698 +69,471 @@ pbqpCoalescing("pbqp-coalescing",
static cl::opt<bool>
pbqpPreSplitting("pbqp-pre-splitting",
- cl::desc("Pre-splite before PBQP register allocation."),
+ cl::desc("Pre-split before PBQP register allocation."),
cl::init(false), cl::Hidden);
namespace {
- ///
- /// PBQP based allocators solve the register allocation problem by mapping
- /// register allocation problems to Partitioned Boolean Quadratic
- /// Programming problems.
- class PBQPRegAlloc : public MachineFunctionPass {
- public:
+///
+/// PBQP based allocators solve the register allocation problem by mapping
+/// register allocation problems to Partitioned Boolean Quadratic
+/// Programming problems.
+class RegAllocPBQP : public MachineFunctionPass {
+public:
+
+ static char ID;
+
+ /// Construct a PBQP register allocator.
+ RegAllocPBQP(std::auto_ptr<PBQPBuilder> b)
+ : MachineFunctionPass(ID), builder(b) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+ }
- static char ID;
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "PBQP Register Allocator";
+ }
- /// Construct a PBQP register allocator.
- PBQPRegAlloc() : MachineFunctionPass(ID) {}
+ /// PBQP analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
- /// Return the pass name.
- virtual const char* getPassName() const {
- return "PBQP Register Allocator";
- }
+ /// Perform register allocation
+ virtual bool runOnMachineFunction(MachineFunction &MF);
- /// PBQP analysis usage.
- virtual void getAnalysisUsage(AnalysisUsage &au) const {
- au.addRequired<SlotIndexes>();
- au.addPreserved<SlotIndexes>();
- au.addRequired<LiveIntervals>();
- //au.addRequiredID(SplitCriticalEdgesID);
- au.addRequired<RegisterCoalescer>();
- au.addRequired<CalculateSpillWeights>();
- au.addRequired<LiveStacks>();
- au.addPreserved<LiveStacks>();
- au.addRequired<MachineLoopInfo>();
- au.addPreserved<MachineLoopInfo>();
- if (pbqpPreSplitting)
- au.addRequired<LoopSplitter>();
- au.addRequired<VirtRegMap>();
- au.addRequired<RenderMachineFunction>();
- MachineFunctionPass::getAnalysisUsage(au);
- }
+private:
- /// Perform register allocation
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+ typedef std::vector<const LiveInterval*> Node2LIMap;
+ typedef std::vector<unsigned> AllowedSet;
+ typedef std::vector<AllowedSet> AllowedSetMap;
+ typedef std::pair<unsigned, unsigned> RegPair;
+ typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
+ typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
+ typedef std::set<unsigned> RegSet;
- private:
- class LIOrdering {
- public:
- bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
- return li1->reg < li2->reg;
- }
- };
-
- typedef std::map<const LiveInterval*, unsigned, LIOrdering> LI2NodeMap;
- typedef std::vector<const LiveInterval*> Node2LIMap;
- typedef std::vector<unsigned> AllowedSet;
- typedef std::vector<AllowedSet> AllowedSetMap;
- typedef std::set<unsigned> RegSet;
- typedef std::pair<unsigned, unsigned> RegPair;
- typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
-
- typedef std::set<LiveInterval*, LIOrdering> LiveIntervalSet;
-
- typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
-
- MachineFunction *mf;
- const TargetMachine *tm;
- const TargetRegisterInfo *tri;
- const TargetInstrInfo *tii;
- const MachineLoopInfo *loopInfo;
- MachineRegisterInfo *mri;
- RenderMachineFunction *rmf;
-
- LiveIntervals *lis;
- LiveStacks *lss;
- VirtRegMap *vrm;
-
- LI2NodeMap li2Node;
- Node2LIMap node2LI;
- AllowedSetMap allowedSets;
- LiveIntervalSet vregIntervalsToAlloc,
- emptyVRegIntervals;
- NodeVector problemNodes;
-
-
- /// Builds a PBQP cost vector.
- template <typename RegContainer>
- PBQP::Vector buildCostVector(unsigned vReg,
- const RegContainer &allowed,
- const CoalesceMap &cealesces,
- PBQP::PBQPNum spillCost) const;
-
- /// \brief Builds a PBQP interference matrix.
- ///
- /// @return Either a pointer to a non-zero PBQP matrix representing the
- /// allocation option costs, or a null pointer for a zero matrix.
- ///
- /// Expects allowed sets for two interfering LiveIntervals. These allowed
- /// sets should contain only allocable registers from the LiveInterval's
- /// register class, with any interfering pre-colored registers removed.
- template <typename RegContainer>
- PBQP::Matrix* buildInterferenceMatrix(const RegContainer &allowed1,
- const RegContainer &allowed2) const;
-
- ///
- /// Expects allowed sets for two potentially coalescable LiveIntervals,
- /// and an estimated benefit due to coalescing. The allowed sets should
- /// contain only allocable registers from the LiveInterval's register
- /// classes, with any interfering pre-colored registers removed.
- template <typename RegContainer>
- PBQP::Matrix* buildCoalescingMatrix(const RegContainer &allowed1,
- const RegContainer &allowed2,
- PBQP::PBQPNum cBenefit) const;
-
- /// \brief Finds coalescing opportunities and returns them as a map.
- ///
- /// Any entries in the map are guaranteed coalescable, even if their
- /// corresponding live intervals overlap.
- CoalesceMap findCoalesces();
-
- /// \brief Finds the initial set of vreg intervals to allocate.
- void findVRegIntervalsToAlloc();
-
- /// \brief Constructs a PBQP problem representation of the register
- /// allocation problem for this function.
- ///
- /// @return a PBQP solver object for the register allocation problem.
- PBQP::Graph constructPBQPProblem();
-
- /// \brief Adds a stack interval if the given live interval has been
- /// spilled. Used to support stack slot coloring.
- void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
-
- /// \brief Given a solved PBQP problem maps this solution back to a register
- /// assignment.
- bool mapPBQPToRegAlloc(const PBQP::Solution &solution);
-
- /// \brief Postprocessing before final spilling. Sets basic block "live in"
- /// variables.
- void finalizeAlloc() const;
-
- };
-
- char PBQPRegAlloc::ID = 0;
-}
+ std::auto_ptr<PBQPBuilder> builder;
+ MachineFunction *mf;
+ const TargetMachine *tm;
+ const TargetRegisterInfo *tri;
+ const TargetInstrInfo *tii;
+ const MachineLoopInfo *loopInfo;
+ MachineRegisterInfo *mri;
+ RenderMachineFunction *rmf;
-template <typename RegContainer>
-PBQP::Vector PBQPRegAlloc::buildCostVector(unsigned vReg,
- const RegContainer &allowed,
- const CoalesceMap &coalesces,
- PBQP::PBQPNum spillCost) const {
+ LiveIntervals *lis;
+ LiveStacks *lss;
+ VirtRegMap *vrm;
- typedef typename RegContainer::const_iterator AllowedItr;
+ RegSet vregsToAlloc, emptyIntervalVRegs;
- // Allocate vector. Additional element (0th) used for spill option
- PBQP::Vector v(allowed.size() + 1, 0);
+ /// \brief Finds the initial set of vreg intervals to allocate.
+ void findVRegIntervalsToAlloc();
- v[0] = spillCost;
+ /// \brief Adds a stack interval if the given live interval has been
+ /// spilled. Used to support stack slot coloring.
+ void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
- // Iterate over the allowed registers inserting coalesce benefits if there
- // are any.
- unsigned ai = 0;
- for (AllowedItr itr = allowed.begin(), end = allowed.end();
- itr != end; ++itr, ++ai) {
+ /// \brief Given a solved PBQP problem maps this solution back to a register
+ /// assignment.
+ bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+ const PBQP::Solution &solution);
- unsigned pReg = *itr;
+ /// \brief Postprocessing before final spilling. Sets basic block "live in"
+ /// variables.
+ void finalizeAlloc() const;
- CoalesceMap::const_iterator cmItr =
- coalesces.find(RegPair(vReg, pReg));
+};
- // No coalesce - on to the next preg.
- if (cmItr == coalesces.end())
- continue;
+char RegAllocPBQP::ID = 0;
- // We have a coalesce - insert the benefit.
- v[ai + 1] = -cmItr->second;
- }
+} // End anonymous namespace.
- return v;
+unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const {
+ Node2VReg::const_iterator vregItr = node2VReg.find(node);
+ assert(vregItr != node2VReg.end() && "No vreg for node.");
+ return vregItr->second;
}
-template <typename RegContainer>
-PBQP::Matrix* PBQPRegAlloc::buildInterferenceMatrix(
- const RegContainer &allowed1, const RegContainer &allowed2) const {
-
- typedef typename RegContainer::const_iterator RegContainerIterator;
-
- // Construct a PBQP matrix representing the cost of allocation options. The
- // rows and columns correspond to the allocation options for the two live
- // intervals. Elements will be infinite where corresponding registers alias,
- // since we cannot allocate aliasing registers to interfering live intervals.
- // All other elements (non-aliasing combinations) will have zero cost. Note
- // that the spill option (element 0,0) has zero cost, since we can allocate
- // both intervals to memory safely (the cost for each individual allocation
- // to memory is accounted for by the cost vectors for each live interval).
- PBQP::Matrix *m =
- new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
-
- // Assume this is a zero matrix until proven otherwise. Zero matrices occur
- // between interfering live ranges with non-overlapping register sets (e.g.
- // non-overlapping reg classes, or disjoint sets of allowed regs within the
- // same class). The term "overlapping" is used advisedly: sets which do not
- // intersect, but contain registers which alias, will have non-zero matrices.
- // We optimize zero matrices away to improve solver speed.
- bool isZeroMatrix = true;
-
-
- // Row index. Starts at 1, since the 0th row is for the spill option, which
- // is always zero.
- unsigned ri = 1;
-
- // Iterate over allowed sets, insert infinities where required.
- for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
- a1Itr != a1End; ++a1Itr) {
-
- // Column index, starts at 1 as for row index.
- unsigned ci = 1;
- unsigned reg1 = *a1Itr;
-
- for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
- a2Itr != a2End; ++a2Itr) {
-
- unsigned reg2 = *a2Itr;
-
- // If the row/column regs are identical or alias insert an infinity.
- if (tri->regsOverlap(reg1, reg2)) {
- (*m)[ri][ci] = std::numeric_limits<PBQP::PBQPNum>::infinity();
- isZeroMatrix = false;
- }
-
- ++ci;
- }
-
- ++ri;
- }
-
- // If this turns out to be a zero matrix...
- if (isZeroMatrix) {
- // free it and return null.
- delete m;
- return 0;
- }
-
- // ...otherwise return the cost matrix.
- return m;
+PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
+ VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
+ assert(nodeItr != vreg2Node.end() && "No node for vreg.");
+ return nodeItr->second;
+
}
-template <typename RegContainer>
-PBQP::Matrix* PBQPRegAlloc::buildCoalescingMatrix(
- const RegContainer &allowed1, const RegContainer &allowed2,
- PBQP::PBQPNum cBenefit) const {
-
- typedef typename RegContainer::const_iterator RegContainerIterator;
-
- // Construct a PBQP Matrix representing the benefits of coalescing. As with
- // interference matrices the rows and columns represent allowed registers
- // for the LiveIntervals which are (potentially) to be coalesced. The amount
- // -cBenefit will be placed in any element representing the same register
- // for both intervals.
- PBQP::Matrix *m =
- new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
-
- // Reset costs to zero.
- m->reset(0);
-
- // Assume the matrix is zero till proven otherwise. Zero matrices will be
- // optimized away as in the interference case.
- bool isZeroMatrix = true;
-
- // Row index. Starts at 1, since the 0th row is for the spill option, which
- // is always zero.
- unsigned ri = 1;
-
- // Iterate over the allowed sets, insert coalescing benefits where
- // appropriate.
- for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
- a1Itr != a1End; ++a1Itr) {
-
- // Column index, starts at 1 as for row index.
- unsigned ci = 1;
- unsigned reg1 = *a1Itr;
-
- for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
- a2Itr != a2End; ++a2Itr) {
-
- // If the row and column represent the same register insert a beneficial
- // cost to preference this allocation - it would allow us to eliminate a
- // move instruction.
- if (reg1 == *a2Itr) {
- (*m)[ri][ci] = -cBenefit;
- isZeroMatrix = false;
- }
-
- ++ci;
- }
-
- ++ri;
- }
-
- // If this turns out to be a zero matrix...
- if (isZeroMatrix) {
- // ...free it and return null.
- delete m;
- return 0;
- }
-
- return m;
+const PBQPRAProblem::AllowedSet&
+ PBQPRAProblem::getAllowedSet(unsigned vreg) const {
+ AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg);
+ assert(allowedSetItr != allowedSets.end() && "No pregs for vreg.");
+ const AllowedSet &allowedSet = allowedSetItr->second;
+ return allowedSet;
}
-PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
-
- typedef MachineFunction::const_iterator MFIterator;
- typedef MachineBasicBlock::const_iterator MBBIterator;
- typedef LiveInterval::const_vni_iterator VNIIterator;
+unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
+ assert(isPRegOption(vreg, option) && "Not a preg option.");
- CoalesceMap coalescesFound;
+ const AllowedSet& allowedSet = getAllowedSet(vreg);
+ assert(option <= allowedSet.size() && "Option outside allowed set.");
+ return allowedSet[option - 1];
+}
- // To find coalesces we need to iterate over the function looking for
- // copy instructions.
- for (MFIterator bbItr = mf->begin(), bbEnd = mf->end();
- bbItr != bbEnd; ++bbItr) {
+std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
+ const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
- const MachineBasicBlock *mbb = &*bbItr;
+ typedef std::vector<const LiveInterval*> LIVector;
- for (MBBIterator iItr = mbb->begin(), iEnd = mbb->end();
- iItr != iEnd; ++iItr) {
+ MachineRegisterInfo *mri = &mf->getRegInfo();
+ const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
- const MachineInstr *instr = &*iItr;
+ std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+ PBQP::Graph &g = p->getGraph();
+ RegSet pregs;
- // If this isn't a copy then continue to the next instruction.
- if (!instr->isCopy())
- continue;
-
- unsigned srcReg = instr->getOperand(1).getReg();
- unsigned dstReg = instr->getOperand(0).getReg();
+ // Collect the set of preg intervals, record that they're used in the MF.
+ for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
+ pregs.insert(itr->first);
+ mri->setPhysRegUsed(itr->first);
+ }
+ }
- // If the registers are already the same our job is nice and easy.
- if (dstReg == srcReg)
- continue;
+ BitVector reservedRegs = tri->getReservedRegs(*mf);
+
+ // Iterate over vregs.
+ for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
+ vregItr != vregEnd; ++vregItr) {
+ unsigned vreg = *vregItr;
+ const TargetRegisterClass *trc = mri->getRegClass(vreg);
+ const LiveInterval *vregLI = &lis->getInterval(vreg);
+
+ // Compute an initial allowed set for the current vreg.
+ typedef std::vector<unsigned> VRAllowed;
+ VRAllowed vrAllowed;
+ for (TargetRegisterClass::iterator aoItr = trc->allocation_order_begin(*mf),
+ aoEnd = trc->allocation_order_end(*mf);
+ aoItr != aoEnd; ++aoItr) {
+ unsigned preg = *aoItr;
+ if (!reservedRegs.test(preg)) {
+ vrAllowed.push_back(preg);
+ }
+ }
- bool srcRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(srcReg),
- dstRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(dstReg);
+ // Remove any physical registers which overlap.
+ for (RegSet::const_iterator pregItr = pregs.begin(),
+ pregEnd = pregs.end();
+ pregItr != pregEnd; ++pregItr) {
+ unsigned preg = *pregItr;
+ const LiveInterval *pregLI = &lis->getInterval(preg);
- // If both registers are physical then we can't coalesce.
- if (srcRegIsPhysical && dstRegIsPhysical)
+ if (pregLI->empty()) {
continue;
+ }
- // If it's a copy that includes two virtual register but the source and
- // destination classes differ then we can't coalesce.
- if (!srcRegIsPhysical && !dstRegIsPhysical &&
- mri->getRegClass(srcReg) != mri->getRegClass(dstReg))
+ if (!vregLI->overlaps(*pregLI)) {
continue;
-
- // If one is physical and one is virtual, check that the physical is
- // allocatable in the class of the virtual.
- if (srcRegIsPhysical && !dstRegIsPhysical) {
- const TargetRegisterClass *dstRegClass = mri->getRegClass(dstReg);
- if (std::find(dstRegClass->allocation_order_begin(*mf),
- dstRegClass->allocation_order_end(*mf), srcReg) ==
- dstRegClass->allocation_order_end(*mf))
- continue;
}
- if (!srcRegIsPhysical && dstRegIsPhysical) {
- const TargetRegisterClass *srcRegClass = mri->getRegClass(srcReg);
- if (std::find(srcRegClass->allocation_order_begin(*mf),
- srcRegClass->allocation_order_end(*mf), dstReg) ==
- srcRegClass->allocation_order_end(*mf))
- continue;
- }
-
- // If we've made it here we have a copy with compatible register classes.
- // We can probably coalesce, but we need to consider overlap.
- const LiveInterval *srcLI = &lis->getInterval(srcReg),
- *dstLI = &lis->getInterval(dstReg);
- if (srcLI->overlaps(*dstLI)) {
- // Even in the case of an overlap we might still be able to coalesce,
- // but we need to make sure that no definition of either range occurs
- // while the other range is live.
+ // Remove the register from the allowed set.
+ VRAllowed::iterator eraseItr =
+ std::find(vrAllowed.begin(), vrAllowed.end(), preg);
- // Otherwise start by assuming we're ok.
- bool badDef = false;
-
- // Test all defs of the source range.
- for (VNIIterator
- vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end();
- vniItr != vniEnd; ++vniItr) {
+ if (eraseItr != vrAllowed.end()) {
+ vrAllowed.erase(eraseItr);
+ }
- // If we find a poorly defined def we err on the side of caution.
- if (!(*vniItr)->def.isValid()) {
- badDef = true;
- break;
- }
+ // Also remove any aliases.
+ const unsigned *aliasItr = tri->getAliasSet(preg);
+ if (aliasItr != 0) {
+ for (; *aliasItr != 0; ++aliasItr) {
+ VRAllowed::iterator eraseItr =
+ std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr);
- // If we find a def that kills the coalescing opportunity then
- // record it and break from the loop.
- if (dstLI->liveAt((*vniItr)->def)) {
- badDef = true;
- break;
+ if (eraseItr != vrAllowed.end()) {
+ vrAllowed.erase(eraseItr);
}
}
+ }
+ }
- // If we have a bad def give up, continue to the next instruction.
- if (badDef)
- continue;
-
- // Otherwise test definitions of the destination range.
- for (VNIIterator
- vniItr = dstLI->vni_begin(), vniEnd = dstLI->vni_end();
- vniItr != vniEnd; ++vniItr) {
+ // Construct the node.
+ PBQP::Graph::NodeItr node =
+ g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
- // We want to make sure we skip the copy instruction itself.
- if ((*vniItr)->getCopy() == instr)
- continue;
+ // Record the mapping and allowed set in the problem.
+ p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end());
- if (!(*vniItr)->def.isValid()) {
- badDef = true;
- break;
- }
+ PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
+ vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
- if (srcLI->liveAt((*vniItr)->def)) {
- badDef = true;
- break;
- }
- }
+ addSpillCosts(g.getNodeCosts(node), spillCost);
+ }
- // As before a bad def we give up and continue to the next instr.
- if (badDef)
- continue;
+ for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
+ vr1Itr != vrEnd; ++vr1Itr) {
+ unsigned vr1 = *vr1Itr;
+ const LiveInterval &l1 = lis->getInterval(vr1);
+ const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
+
+ for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr);
+ vr2Itr != vrEnd; ++vr2Itr) {
+ unsigned vr2 = *vr2Itr;
+ const LiveInterval &l2 = lis->getInterval(vr2);
+ const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
+
+ assert(!l2.empty() && "Empty interval in vreg set?");
+ if (l1.overlaps(l2)) {
+ PBQP::Graph::EdgeItr edge =
+ g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
+ PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0));
+
+ addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri);
}
-
- // If we make it to here then either the ranges didn't overlap, or they
- // did, but none of their definitions would prevent us from coalescing.
- // We're good to go with the coalesce.
-
- float cBenefit = std::pow(10.0f, (float)loopInfo->getLoopDepth(mbb)) / 5.0;
-
- coalescesFound[RegPair(srcReg, dstReg)] = cBenefit;
- coalescesFound[RegPair(dstReg, srcReg)] = cBenefit;
}
-
}
- return coalescesFound;
+ return p;
}
-void PBQPRegAlloc::findVRegIntervalsToAlloc() {
-
- // Iterate over all live ranges.
- for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
- itr != end; ++itr) {
-
- // Ignore physical ones.
- if (TargetRegisterInfo::isPhysicalRegister(itr->first))
- continue;
-
- LiveInterval *li = itr->second;
-
- // If this live interval is non-empty we will use pbqp to allocate it.
- // Empty intervals we allocate in a simple post-processing stage in
- // finalizeAlloc.
- if (!li->empty()) {
- vregIntervalsToAlloc.insert(li);
- }
- else {
- emptyVRegIntervals.insert(li);
- }
- }
+void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
+ PBQP::PBQPNum spillCost) {
+ costVec[0] = spillCost;
}
-PBQP::Graph PBQPRegAlloc::constructPBQPProblem() {
-
- typedef std::vector<const LiveInterval*> LIVector;
- typedef std::vector<unsigned> RegVector;
+void PBQPBuilder::addInterferenceCosts(
+ PBQP::Matrix &costMat,
+ const PBQPRAProblem::AllowedSet &vr1Allowed,
+ const PBQPRAProblem::AllowedSet &vr2Allowed,
+ const TargetRegisterInfo *tri) {
+ assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch.");
+ assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch.");
- // This will store the physical intervals for easy reference.
- LIVector physIntervals;
+ for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+ unsigned preg1 = vr1Allowed[i];
- // Start by clearing the old node <-> live interval mappings & allowed sets
- li2Node.clear();
- node2LI.clear();
- allowedSets.clear();
-
- // Populate physIntervals, update preg use:
- for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
- itr != end; ++itr) {
+ for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+ unsigned preg2 = vr2Allowed[j];
- if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
- physIntervals.push_back(itr->second);
- mri->setPhysRegUsed(itr->second->reg);
+ if (tri->regsOverlap(preg1, preg2)) {
+ costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ }
}
}
+}
- // Iterate over vreg intervals, construct live interval <-> node number
- // mappings.
- for (LiveIntervalSet::const_iterator
- itr = vregIntervalsToAlloc.begin(), end = vregIntervalsToAlloc.end();
- itr != end; ++itr) {
- const LiveInterval *li = *itr;
-
- li2Node[li] = node2LI.size();
- node2LI.push_back(li);
- }
-
- // Get the set of potential coalesces.
- CoalesceMap coalesces;
-
- if (pbqpCoalescing) {
- coalesces = findCoalesces();
- }
-
- // Construct a PBQP solver for this problem
- PBQP::Graph problem;
- problemNodes.resize(vregIntervalsToAlloc.size());
-
- // Resize allowedSets container appropriately.
- allowedSets.resize(vregIntervalsToAlloc.size());
-
- BitVector ReservedRegs = tri->getReservedRegs(*mf);
-
- // Iterate over virtual register intervals to compute allowed sets...
- for (unsigned node = 0; node < node2LI.size(); ++node) {
-
- // Grab pointers to the interval and its register class.
- const LiveInterval *li = node2LI[node];
- const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
+ MachineFunction *mf,
+ const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
- // Start by assuming all allocable registers in the class are allowed...
- RegVector liAllowed;
- TargetRegisterClass::iterator aob = liRC->allocation_order_begin(*mf);
- TargetRegisterClass::iterator aoe = liRC->allocation_order_end(*mf);
- for (TargetRegisterClass::iterator it = aob; it != aoe; ++it)
- if (!ReservedRegs.test(*it))
- liAllowed.push_back(*it);
+ std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+ PBQP::Graph &g = p->getGraph();
- // Eliminate the physical registers which overlap with this range, along
- // with all their aliases.
- for (LIVector::iterator pItr = physIntervals.begin(),
- pEnd = physIntervals.end(); pItr != pEnd; ++pItr) {
+ const TargetMachine &tm = mf->getTarget();
+ CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo());
- if (!li->overlaps(**pItr))
- continue;
+ // Scan the machine function and add a coalescing cost whenever CoalescerPair
+ // gives the Ok.
+ for (MachineFunction::const_iterator mbbItr = mf->begin(),
+ mbbEnd = mf->end();
+ mbbItr != mbbEnd; ++mbbItr) {
+ const MachineBasicBlock *mbb = &*mbbItr;
- unsigned pReg = (*pItr)->reg;
-
- // If we get here then the live intervals overlap, but we're still ok
- // if they're coalescable.
- if (coalesces.find(RegPair(li->reg, pReg)) != coalesces.end())
- continue;
+ for (MachineBasicBlock::const_iterator miItr = mbb->begin(),
+ miEnd = mbb->end();
+ miItr != miEnd; ++miItr) {
+ const MachineInstr *mi = &*miItr;
- // If we get here then we have a genuine exclusion.
+ if (!cp.setRegisters(mi)) {
+ continue; // Not coalescable.
+ }
- // Remove the overlapping reg...
- RegVector::iterator eraseItr =
- std::find(liAllowed.begin(), liAllowed.end(), pReg);
+ if (cp.getSrcReg() == cp.getDstReg()) {
+ continue; // Already coalesced.
+ }
- if (eraseItr != liAllowed.end())
- liAllowed.erase(eraseItr);
+ unsigned dst = cp.getDstReg(),
+ src = cp.getSrcReg();
- const unsigned *aliasItr = tri->getAliasSet(pReg);
+ const float copyFactor = 0.5; // Cost of copy relative to load. Current
+ // value plucked randomly out of the air.
+
+ PBQP::PBQPNum cBenefit =
+ copyFactor * LiveIntervals::getSpillWeight(false, true,
+ loopInfo->getLoopDepth(mbb));
- if (aliasItr != 0) {
- // ...and its aliases.
- for (; *aliasItr != 0; ++aliasItr) {
- RegVector::iterator eraseItr =
- std::find(liAllowed.begin(), liAllowed.end(), *aliasItr);
+ if (cp.isPhys()) {
+ if (!lis->isAllocatable(dst)) {
+ continue;
+ }
- if (eraseItr != liAllowed.end()) {
- liAllowed.erase(eraseItr);
+ const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
+ unsigned pregOpt = 0;
+ while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
+ ++pregOpt;
+ }
+ if (pregOpt < allowed.size()) {
+ ++pregOpt; // +1 to account for spill option.
+ PBQP::Graph::NodeItr node = p->getNodeForVReg(src);
+ addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit);
+ }
+ } else {
+ const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
+ const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
+ PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst);
+ PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src);
+ PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2);
+ if (edge == g.edgesEnd()) {
+ edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1,
+ allowed2->size() + 1,
+ 0));
+ } else {
+ if (g.getEdgeNode1(edge) == node2) {
+ std::swap(node1, node2);
+ std::swap(allowed1, allowed2);
}
}
+
+ addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
+ cBenefit);
}
}
+ }
- // Copy the allowed set into a member vector for use when constructing cost
- // vectors & matrices, and mapping PBQP solutions back to assignments.
- allowedSets[node] = AllowedSet(liAllowed.begin(), liAllowed.end());
+ return p;
+}
- // Set the spill cost to the interval weight, or epsilon if the
- // interval weight is zero
- PBQP::PBQPNum spillCost = (li->weight != 0.0) ?
- li->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
+ unsigned pregOption,
+ PBQP::PBQPNum benefit) {
+ costVec[pregOption] += -benefit;
+}
- // Build a cost vector for this interval.
- problemNodes[node] =
- problem.addNode(
- buildCostVector(li->reg, allowedSets[node], coalesces, spillCost));
+void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
+ PBQP::Matrix &costMat,
+ const PBQPRAProblem::AllowedSet &vr1Allowed,
+ const PBQPRAProblem::AllowedSet &vr2Allowed,
+ PBQP::PBQPNum benefit) {
- }
+ assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch.");
+ assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch.");
+ for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+ unsigned preg1 = vr1Allowed[i];
+ for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+ unsigned preg2 = vr2Allowed[j];
+
+ if (preg1 == preg2) {
+ costMat[i + 1][j + 1] += -benefit;
+ }
+ }
+ }
+}
- // Now add the cost matrices...
- for (unsigned node1 = 0; node1 < node2LI.size(); ++node1) {
- const LiveInterval *li = node2LI[node1];
- // Test for live range overlaps and insert interference matrices.
- for (unsigned node2 = node1 + 1; node2 < node2LI.size(); ++node2) {
- const LiveInterval *li2 = node2LI[node2];
+void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ //au.addRequiredID(SplitCriticalEdgesID);
+ au.addRequired<RegisterCoalescer>();
+ au.addRequired<CalculateSpillWeights>();
+ au.addRequired<LiveStacks>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ if (pbqpPreSplitting)
+ au.addRequired<LoopSplitter>();
+ au.addRequired<VirtRegMap>();
+ au.addRequired<RenderMachineFunction>();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
- CoalesceMap::const_iterator cmItr =
- coalesces.find(RegPair(li->reg, li2->reg));
+void RegAllocPBQP::findVRegIntervalsToAlloc() {
- PBQP::Matrix *m = 0;
+ // Iterate over all live ranges.
+ for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
- if (cmItr != coalesces.end()) {
- m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2],
- cmItr->second);
- }
- else if (li->overlaps(*li2)) {
- m = buildInterferenceMatrix(allowedSets[node1], allowedSets[node2]);
- }
+ // Ignore physical ones.
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+ continue;
- if (m != 0) {
- problem.addEdge(problemNodes[node1],
- problemNodes[node2],
- *m);
+ LiveInterval *li = itr->second;
- delete m;
- }
+ // If this live interval is non-empty we will use pbqp to allocate it.
+ // Empty intervals we allocate in a simple post-processing stage in
+ // finalizeAlloc.
+ if (!li->empty()) {
+ vregsToAlloc.insert(li->reg);
+ } else {
+ emptyIntervalVRegs.insert(li->reg);
}
}
-
- assert(problem.getNumNodes() == allowedSets.size());
-/*
- std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, "
- << problem.getNumEdges() << " edges.\n";
-
- problem.printDot(std::cerr);
-*/
- // We're done, PBQP problem constructed - return it.
- return problem;
}
-void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
+void RegAllocPBQP::addStackInterval(const LiveInterval *spilled,
MachineRegisterInfo* mri) {
int stackSlot = vrm->getStackSlot(spilled->reg);
- if (stackSlot == VirtRegMap::NO_STACK_SLOT)
+ if (stackSlot == VirtRegMap::NO_STACK_SLOT) {
return;
+ }
const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
VNInfo *vni;
- if (stackInterval.getNumValNums() != 0)
+ if (stackInterval.getNumValNums() != 0) {
vni = stackInterval.getValNumInfo(0);
- else
+ } else {
vni = stackInterval.getNextValue(
- SlotIndex(), 0, false, lss->getVNInfoAllocator());
+ SlotIndex(), 0, lss->getVNInfoAllocator());
+ }
LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
stackInterval.MergeRangesInAsValue(rhsInterval, vni);
}
-bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
-
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+ const PBQP::Solution &solution) {
// Set to true if we have any spills
bool anotherRoundNeeded = false;
// Clear the existing allocation.
vrm->clearAllVirt();
- // Iterate over the nodes mapping the PBQP solution to a register assignment.
- for (unsigned node = 0; node < node2LI.size(); ++node) {
- unsigned virtReg = node2LI[node]->reg,
- allocSelection = solution.getSelection(problemNodes[node]);
-
-
- // If the PBQP solution is non-zero it's a physical register...
- if (allocSelection != 0) {
- // Get the physical reg, subtracting 1 to account for the spill option.
- unsigned physReg = allowedSets[node][allocSelection - 1];
-
- DEBUG(dbgs() << "VREG " << virtReg << " -> "
- << tri->getName(physReg) << "\n");
-
- assert(physReg != 0);
-
- // Add to the virt reg map and update the used phys regs.
- vrm->assignVirt2Phys(virtReg, physReg);
- }
- // ...Otherwise it's a spill.
- else {
-
- // Make sure we ignore this virtual reg on the next round
- // of allocation
- vregIntervalsToAlloc.erase(&lis->getInterval(virtReg));
-
- // Insert spill ranges for this live range
- const LiveInterval *spillInterval = node2LI[node];
- double oldSpillWeight = spillInterval->weight;
+ const PBQP::Graph &g = problem.getGraph();
+ // Iterate over the nodes mapping the PBQP solution to a register
+ // assignment.
+ for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(),
+ nodeEnd = g.nodesEnd();
+ node != nodeEnd; ++node) {
+ unsigned vreg = problem.getVRegForNode(node);
+ unsigned alloc = solution.getSelection(node);
+
+ if (problem.isPRegOption(vreg, alloc)) {
+ unsigned preg = problem.getPRegForOption(vreg, alloc);
+ DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
+ assert(preg != 0 && "Invalid preg selected.");
+ vrm->assignVirt2Phys(vreg, preg);
+ } else if (problem.isSpillOption(vreg, alloc)) {
+ vregsToAlloc.erase(vreg);
+ const LiveInterval* spillInterval = &lis->getInterval(vreg);
+ double oldWeight = spillInterval->weight;
SmallVector<LiveInterval*, 8> spillIs;
rmf->rememberUseDefs(spillInterval);
std::vector<LiveInterval*> newSpills =
@@ -768,42 +541,42 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
addStackInterval(spillInterval, mri);
rmf->rememberSpills(spillInterval, newSpills);
- (void) oldSpillWeight;
- DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: "
- << oldSpillWeight << ", New vregs: ");
+ (void) oldWeight;
+ DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
+ << oldWeight << ", New vregs: ");
// Copy any newly inserted live intervals into the list of regs to
// allocate.
for (std::vector<LiveInterval*>::const_iterator
itr = newSpills.begin(), end = newSpills.end();
itr != end; ++itr) {
-
assert(!(*itr)->empty() && "Empty spill range.");
-
DEBUG(dbgs() << (*itr)->reg << " ");
-
- vregIntervalsToAlloc.insert(*itr);
+ vregsToAlloc.insert((*itr)->reg);
}
DEBUG(dbgs() << ")\n");
// We need another round if spill intervals were added.
anotherRoundNeeded |= !newSpills.empty();
+ } else {
+ assert(false && "Unknown allocation option.");
}
}
return !anotherRoundNeeded;
}
-void PBQPRegAlloc::finalizeAlloc() const {
+
+void RegAllocPBQP::finalizeAlloc() const {
typedef LiveIntervals::iterator LIIterator;
typedef LiveInterval::Ranges::const_iterator LRIterator;
// First allocate registers for the empty intervals.
- for (LiveIntervalSet::const_iterator
- itr = emptyVRegIntervals.begin(), end = emptyVRegIntervals.end();
+ for (RegSet::const_iterator
+ itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
itr != end; ++itr) {
- LiveInterval *li = *itr;
+ LiveInterval *li = &lis->getInterval(*itr);
unsigned physReg = vrm->getRegAllocPref(li->reg);
@@ -828,11 +601,9 @@ void PBQPRegAlloc::finalizeAlloc() const {
// Get the physical register for this interval
if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
reg = li->reg;
- }
- else if (vrm->isAssignedReg(li->reg)) {
+ } else if (vrm->isAssignedReg(li->reg)) {
reg = vrm->getPhys(li->reg);
- }
- else {
+ } else {
// Ranges which are assigned a stack slot only are ignored.
continue;
}
@@ -849,7 +620,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
// Find the set of basic blocks which this range is live into...
if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) {
// And add the physreg for this interval to their live-in sets.
- for (unsigned i = 0; i < liveInMBBs.size(); ++i) {
+ for (unsigned i = 0; i != liveInMBBs.size(); ++i) {
if (liveInMBBs[i] != entryMBB) {
if (!liveInMBBs[i]->isLiveIn(reg)) {
liveInMBBs[i]->addLiveIn(reg);
@@ -863,7 +634,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
}
-bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
mf = &MF;
tm = &mf->getTarget();
@@ -894,7 +665,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
findVRegIntervalsToAlloc();
// If there are non-empty intervals allocate them using pbqp.
- if (!vregIntervalsToAlloc.empty()) {
+ if (!vregsToAlloc.empty()) {
bool pbqpAllocComplete = false;
unsigned round = 0;
@@ -902,11 +673,13 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
while (!pbqpAllocComplete) {
DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n");
- PBQP::Graph problem = constructPBQPProblem();
+ std::auto_ptr<PBQPRAProblem> problem =
+ builder->build(mf, lis, loopInfo, vregsToAlloc);
PBQP::Solution solution =
- PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(problem);
+ PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
+ problem->getGraph());
- pbqpAllocComplete = mapPBQPToRegAlloc(solution);
+ pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
++round;
}
@@ -917,12 +690,8 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
rmf->renderMachineFunction("After PBQP register allocation.", vrm);
- vregIntervalsToAlloc.clear();
- emptyVRegIntervals.clear();
- li2Node.clear();
- node2LI.clear();
- allowedSets.clear();
- problemNodes.clear();
+ vregsToAlloc.clear();
+ emptyIntervalVRegs.clear();
DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
@@ -934,9 +703,18 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-FunctionPass* llvm::createPBQPRegisterAllocator() {
- return new PBQPRegAlloc();
+FunctionPass* llvm::createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder> builder) {
+ return new RegAllocPBQP(builder);
}
+FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
+ if (pbqpCoalescing) {
+ return createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
+ } // else
+ return createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+}
#undef DEBUG_TYPE
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 02b5539..407559a 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -24,7 +24,8 @@
using namespace llvm;
// Register the RegisterCoalescer interface, providing a nice name to refer to.
-static RegisterAnalysisGroup<RegisterCoalescer> Z("Register Coalescer");
+INITIALIZE_ANALYSIS_GROUP(RegisterCoalescer, "Register Coalescer",
+ SimpleRegisterCoalescing)
char RegisterCoalescer::ID = 0;
// RegisterCoalescer destructor: DO NOT move this to the header file
diff --git a/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp b/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp
index 93426ee..cbfd5a2 100644
--- a/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/RenderMachineFunction.cpp
@@ -30,9 +30,14 @@
using namespace llvm;
char RenderMachineFunction::ID = 0;
-INITIALIZE_PASS(RenderMachineFunction, "rendermf",
+INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf",
"Render machine functions (and related info) to HTML pages",
- false, false);
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(RenderMachineFunction, "rendermf",
+ "Render machine functions (and related info) to HTML pages",
+ false, false)
static cl::opt<std::string>
outputFileSuffix("rmf-file-suffix",
@@ -458,14 +463,9 @@ namespace llvm {
liItr != liEnd; ++liItr) {
LiveInterval *li = liItr->second;
- const TargetRegisterClass *liTRC;
-
if (TargetRegisterInfo::isPhysicalRegister(li->reg))
continue;
- liTRC = mri->getRegClass(li->reg);
-
-
// For all ranges in the current interal.
for (LiveInterval::iterator lrItr = li->begin(),
lrEnd = li->end();
diff --git a/contrib/llvm/lib/CodeGen/RenderMachineFunction.h b/contrib/llvm/lib/CodeGen/RenderMachineFunction.h
index 8d56a82..8571992 100644
--- a/contrib/llvm/lib/CodeGen/RenderMachineFunction.h
+++ b/contrib/llvm/lib/CodeGen/RenderMachineFunction.h
@@ -202,7 +202,9 @@ namespace llvm {
public:
static char ID;
- RenderMachineFunction() : MachineFunctionPass(ID) {}
+ RenderMachineFunction() : MachineFunctionPass(ID) {
+ initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &au) const;
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index 7d39dc4..3388889 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "pre-RA-sched"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -33,6 +34,12 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
ScheduleDAG::~ScheduleDAG() {}
+/// getInstrDesc helper to handle SDNodes.
+const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+ if (!Node || !Node->isMachineOpcode()) return NULL;
+ return &TII->get(Node->getMachineOpcode());
+}
+
/// dump - dump the schedule.
void ScheduleDAG::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -68,12 +75,12 @@ void ScheduleDAG::Run(MachineBasicBlock *bb,
/// addPred - This adds the specified edge as a pred of the current node if
/// not already. It also adds the current node as a successor of the
/// specified node.
-void SUnit::addPred(const SDep &D) {
+bool SUnit::addPred(const SDep &D) {
// If this node already has this depenence, don't add a redundant one.
for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
I != E; ++I)
if (*I == D)
- return;
+ return false;
// Now add a corresponding succ to N.
SDep P = D;
P.setSUnit(this);
@@ -99,6 +106,7 @@ void SUnit::addPred(const SDep &D) {
this->setDepthDirty();
N->setHeightDirty();
}
+ return true;
}
/// removePred - This removes the specified edge as a pred of the current
@@ -278,6 +286,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
dbgs() << " # preds left : " << NumPredsLeft << "\n";
dbgs() << " # succs left : " << NumSuccsLeft << "\n";
+ dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
dbgs() << " Latency : " << Latency << "\n";
dbgs() << " Depth : " << Depth << "\n";
dbgs() << " Height : " << Height << "\n";
@@ -492,7 +501,7 @@ void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
/// all nodes affected by the edge insertion. These nodes will later get new
/// topological indexes by means of the Shift method.
void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
- bool& HasLoop) {
+ bool &HasLoop) {
std::vector<const SUnit*> WorkList;
WorkList.reserve(SUnits.size());
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp
index 0a2fb37..6b7a8c64 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -57,7 +57,7 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
assert(I->getReg() && "Unknown physical register!");
unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
.addReg(I->getReg());
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index ea93dd5..f17023e 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -16,6 +16,7 @@
#include "ScheduleDAGInstrs.h"
#include "llvm/Operator.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -32,9 +33,9 @@ using namespace llvm;
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo &mli,
const MachineDominatorTree &mdt)
- : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()),
- Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
- MFI = mf.getFrameInfo();
+ : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
+ InstrItins(mf.getTarget().getInstrItineraryData()),
+ Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
DbgValueVec.clear();
}
@@ -78,12 +79,12 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
} while (1);
}
-/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject
+/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
static const Value *getUnderlyingObject(const Value *V) {
// First just call Value::getUnderlyingObject to let it do what it does.
do {
- V = V->getUnderlyingObject();
+ V = GetUnderlyingObject(V);
// If it found an inttoptr, use special code to continue climing.
if (Operator::getOpcode(V) != Instruction::IntToPtr)
break;
@@ -141,6 +142,46 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
}
}
+/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+/// list of instructions being scheduled to scheduling barrier by adding
+/// the exit SU to the register defs and use list. This is because we want to
+/// make sure instructions which define registers that are either used by
+/// the terminator or are live-out are properly scheduled. This is
+/// especially important when the definition latency of the return value(s)
+/// are too high to be hidden by the branch or when the liveout registers
+/// used by instructions in the fallthrough block.
+void ScheduleDAGInstrs::AddSchedBarrierDeps() {
+ MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+ ExitSU.setInstr(ExitMI);
+ bool AllDepKnown = ExitMI &&
+ (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+ if (ExitMI && AllDepKnown) {
+ // If it's a call or a barrier, add dependencies on the defs and uses of
+ // instruction.
+ for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = ExitMI->getOperand(i);
+ if (!MO.isReg() || MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+ Uses[Reg].push_back(&ExitSU);
+ }
+ } else {
+ // For others, e.g. fallthrough, conditional branch, assume the exit
+ // uses all the registers that are livein to the successor blocks.
+ SmallSet<unsigned, 8> Seen;
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ if (Seen.insert(Reg))
+ Uses[Reg].push_back(&ExitSU);
+ }
+ }
+}
+
void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// We'll be allocating one SUnit for each instruction, plus one for
// the region exit node.
@@ -175,6 +216,10 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// without emitting the info from the previous call.
DbgValueVec.clear();
+ // Model data dependencies between instructions being scheduled and the
+ // ExitSU.
+ AddSchedBarrierDeps();
+
// Walk the list of instructions, from bottom moving up.
for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
MII != MIE; --MII) {
@@ -194,6 +239,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
"Cannot schedule terminators or labels!");
// Create the SUnit for this MI.
SUnit *SU = NewSUnit(MI);
+ SU->isCall = TID.isCall();
+ SU->isCommutable = TID.isCommutable();
// Assign the Latency field of SU using target-provided information.
if (UnitLatencies)
@@ -228,6 +275,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
SUnit *DefSU = DefList[i];
+ if (DefSU == &ExitSU)
+ continue;
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(Reg)))
@@ -237,6 +286,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
std::vector<SUnit *> &DefList = Defs[*Alias];
for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
SUnit *DefSU = DefList[i];
+ if (DefSU == &ExitSU)
+ continue;
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(*Alias)))
@@ -258,12 +309,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// TODO: Perhaps we should get rid of
// SpecialAddressLatency and just move this into
// adjustSchedDependency for the targets that care about it.
- if (SpecialAddressLatency != 0 && !UnitLatencies) {
+ if (SpecialAddressLatency != 0 && !UnitLatencies &&
+ UseSU != &ExitSU) {
MachineInstr *UseMI = UseSU->getInstr();
const TargetInstrDesc &UseTID = UseMI->getDesc();
int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
- if ((UseTID.mayLoad() || UseTID.mayStore()) &&
+ if (RegUseIndex >= 0 &&
+ (UseTID.mayLoad() || UseTID.mayStore()) &&
(unsigned)RegUseIndex < UseTID.getNumOperands() &&
UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
LDataLatency += SpecialAddressLatency;
@@ -357,7 +410,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// produce more precise dependence information.
#define STORE_LOAD_LATENCY 1
unsigned TrueMemOrderLatency = 0;
- if (TID.isCall() || TID.hasUnmodeledSideEffects() ||
+ if (TID.isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasVolatileMemoryRef() &&
(!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
// Be conservative with these and add dependencies on all memory
@@ -446,6 +499,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// Treat all other stores conservatively.
goto new_alias_chain;
}
+
+ if (!ExitSU.isPred(SU))
+ // Push store's up a bit to avoid them getting in between cmp
+ // and branches.
+ ExitSU.addPred(SDep(SU, SDep::Order, 0,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
} else if (TID.mayLoad()) {
bool MayAlias = true;
TrueMemOrderLatency = 0;
@@ -498,23 +559,22 @@ void ScheduleDAGInstrs::FinishBlock() {
}
void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-
// Compute the latency for the node.
- SU->Latency =
- InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
+ if (!InstrItins || InstrItins->isEmpty()) {
+ SU->Latency = 1;
- // Simplistic target-independent heuristic: assume that loads take
- // extra time.
- if (InstrItins.isEmpty())
+ // Simplistic target-independent heuristic: assume that loads take
+ // extra time.
if (SU->getInstr()->getDesc().mayLoad())
SU->Latency += 2;
+ } else {
+ SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
+ }
}
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
SDep& dep) const {
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- if (InstrItins.isEmpty())
+ if (!InstrItins || InstrItins->isEmpty())
return;
// For a data dependency with a known register...
@@ -528,14 +588,21 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
MachineInstr *DefMI = Def->getInstr();
int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
if (DefIdx != -1) {
- int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(),
- DefIdx);
- if (DefCycle >= 0) {
- MachineInstr *UseMI = Use->getInstr();
- const unsigned UseClass = UseMI->getDesc().getSchedClass();
-
- // For all uses of the register, calculate the maxmimum latency
- int Latency = -1;
+ const MachineOperand &MO = DefMI->getOperand(DefIdx);
+ if (MO.isReg() && MO.isImplicit() &&
+ DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
+ // This is an implicit def, getOperandLatency() won't return the correct
+ // latency. e.g.
+ // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
+ // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
+ // What we want is to compute latency between def of %D6/%D7 and use of
+ // %Q3 instead.
+ DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ }
+ MachineInstr *UseMI = Use->getInstr();
+ // For all uses of the register, calculate the maxmimum latency
+ int Latency = -1;
+ if (UseMI) {
for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = UseMI->getOperand(i);
if (!MO.isReg() || !MO.isUse())
@@ -544,15 +611,21 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
if (MOReg != Reg)
continue;
- int UseCycle = InstrItins.getOperandCycle(UseClass, i);
- if (UseCycle >= 0)
- Latency = std::max(Latency, DefCycle - UseCycle + 1);
+ int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
+ UseMI, i);
+ Latency = std::max(Latency, UseCycle);
}
-
- // If we found a latency, then replace the existing dependence latency.
- if (Latency >= 0)
- dep.setLatency(Latency);
+ } else {
+ // UseMI is null, then it must be a scheduling barrier.
+ if (!InstrItins || InstrItins->isEmpty())
+ return;
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
}
+
+ // If we found a latency, then replace the existing dependence latency.
+ if (Latency >= 0)
+ dep.setLatency(Latency);
}
}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h
index c8f543f..c878287 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.h
@@ -101,6 +101,7 @@ namespace llvm {
const MachineLoopInfo &MLI;
const MachineDominatorTree &MDT;
const MachineFrameInfo *MFI;
+ const InstrItineraryData *InstrItins;
/// Defs, Uses - Remember where defs and uses of each physical register
/// are as we iterate upward through the instructions. This is allocated
@@ -163,6 +164,15 @@ namespace llvm {
/// input.
virtual void BuildSchedGraph(AliasAnalysis *AA);
+ /// AddSchedBarrierDeps - Add dependencies from instructions in the current
+ /// list of instructions being scheduled to scheduling barrier. We want to
+ /// make sure instructions which define registers that are either used by
+ /// the terminator or are live-out are properly scheduled. This is
+ /// especially important when the definition latency of the return value(s)
+ /// are too high to be hidden by the branch or when the liveout registers
+ /// used by instructions in the fallthrough block.
+ void AddSchedBarrierDeps();
+
/// ComputeLatency - Compute node latency.
///
virtual void ComputeLatency(SUnit *SU);
diff --git a/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index cbde2b0..e6d7ded 100644
--- a/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -1,4 +1,4 @@
-//===----- PostRAHazardRecognizer.cpp - hazard recognizer -------- ---------===//
+//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,56 +7,81 @@
//
//===----------------------------------------------------------------------===//
//
-// This implements a hazard recognizer using the instructions itineraries
-// defined for the current target.
+// This file implements the ScoreboardHazardRecognizer class, which
+// encapsultes hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "post-RA-sched"
-#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetInstrItineraries.h"
using namespace llvm;
-PostRAHazardRecognizer::
-PostRAHazardRecognizer(const InstrItineraryData &LItinData) :
- ScheduleHazardRecognizer(), ItinData(LItinData) {
+#ifndef NDEBUG
+const char *ScoreboardHazardRecognizer::DebugType = "";
+#endif
+
+ScoreboardHazardRecognizer::
+ScoreboardHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *SchedDAG,
+ const char *ParentDebugType) :
+ ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0),
+ IssueCount(0) {
+
+#ifndef NDEBUG
+ DebugType = ParentDebugType;
+#endif
+
// Determine the maximum depth of any itinerary. This determines the
// depth of the scoreboard. We always make the scoreboard at least 1
// cycle deep to avoid dealing with the boundary condition.
unsigned ScoreboardDepth = 1;
- if (!ItinData.isEmpty()) {
+ if (ItinData && !ItinData->isEmpty()) {
+ IssueWidth = ItinData->IssueWidth;
+
for (unsigned idx = 0; ; ++idx) {
- if (ItinData.isEndMarker(idx))
+ if (ItinData->isEndMarker(idx))
break;
- const InstrStage *IS = ItinData.beginStage(idx);
- const InstrStage *E = ItinData.endStage(idx);
+ const InstrStage *IS = ItinData->beginStage(idx);
+ const InstrStage *E = ItinData->endStage(idx);
+ unsigned CurCycle = 0;
unsigned ItinDepth = 0;
- for (; IS != E; ++IS)
- ItinDepth += IS->getCycles();
+ for (; IS != E; ++IS) {
+ unsigned StageDepth = CurCycle + IS->getCycles();
+ if (ItinDepth < StageDepth) ItinDepth = StageDepth;
+ CurCycle += IS->getNextCycles();
+ }
- ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
+ // Find the next power-of-2 >= ItinDepth
+ while (ItinDepth > ScoreboardDepth) {
+ ScoreboardDepth *= 2;
+ }
}
+ MaxLookAhead = ScoreboardDepth;
}
ReservedScoreboard.reset(ScoreboardDepth);
RequiredScoreboard.reset(ScoreboardDepth);
- DEBUG(dbgs() << "Using post-ra hazard recognizer: ScoreboardDepth = "
+ DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
<< ScoreboardDepth << '\n');
}
-void PostRAHazardRecognizer::Reset() {
+void ScoreboardHazardRecognizer::Reset() {
+ IssueCount = 0;
RequiredScoreboard.reset();
ReservedScoreboard.reset();
}
-void PostRAHazardRecognizer::ScoreBoard::dump() const {
+void ScoreboardHazardRecognizer::Scoreboard::dump() const {
dbgs() << "Scoreboard:\n";
unsigned last = Depth - 1;
@@ -72,24 +97,46 @@ void PostRAHazardRecognizer::ScoreBoard::dump() const {
}
}
+bool ScoreboardHazardRecognizer::atIssueLimit() const {
+ if (IssueWidth == 0)
+ return false;
+
+ return IssueCount == IssueWidth;
+}
+
ScheduleHazardRecognizer::HazardType
-PostRAHazardRecognizer::getHazardType(SUnit *SU) {
- if (ItinData.isEmpty())
+ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (!ItinData || ItinData->isEmpty())
return NoHazard;
- unsigned cycle = 0;
+ // Note that stalls will be negative for bottom-up scheduling.
+ int cycle = Stalls;
// Use the itinerary for the underlying instruction to check for
// free FU's in the scoreboard at the appropriate future cycles.
- unsigned idx = SU->getInstr()->getDesc().getSchedClass();
- for (const InstrStage *IS = ItinData.beginStage(idx),
- *E = ItinData.endStage(idx); IS != E; ++IS) {
+
+ const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
+ if (TID == NULL) {
+ // Don't check hazards for non-machineinstr Nodes.
+ return NoHazard;
+ }
+ unsigned idx = TID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
// We must find one of the stage's units free for every cycle the
// stage is occupied. FIXME it would be more accurate to find the
// same unit free in all the cycles.
for (unsigned int i = 0; i < IS->getCycles(); ++i) {
- assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
- "Scoreboard depth exceeded!");
+ int StageCycle = cycle + (int)i;
+ if (StageCycle < 0)
+ continue;
+
+ if (StageCycle >= (int)RequiredScoreboard.getDepth()) {
+ assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() &&
+ "Scoreboard depth exceeded!");
+ // This stage was stalled beyond pipeline depth, so cannot conflict.
+ break;
+ }
unsigned freeUnits = IS->getUnits();
switch (IS->getReservationKind()) {
@@ -97,18 +144,18 @@ PostRAHazardRecognizer::getHazardType(SUnit *SU) {
assert(0 && "Invalid FU reservation");
case InstrStage::Required:
// Required FUs conflict with both reserved and required ones
- freeUnits &= ~ReservedScoreboard[cycle + i];
+ freeUnits &= ~ReservedScoreboard[StageCycle];
// FALLTHROUGH
case InstrStage::Reserved:
// Reserved FUs can conflict only with required ones.
- freeUnits &= ~RequiredScoreboard[cycle + i];
+ freeUnits &= ~RequiredScoreboard[StageCycle];
break;
}
if (!freeUnits) {
DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
- DEBUG(SU->getInstr()->dump());
+ DEBUG(DAG->dumpNode(SU));
return Hazard;
}
}
@@ -120,17 +167,24 @@ PostRAHazardRecognizer::getHazardType(SUnit *SU) {
return NoHazard;
}
-void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) {
- if (ItinData.isEmpty())
+void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+ if (!ItinData || ItinData->isEmpty())
return;
- unsigned cycle = 0;
-
// Use the itinerary for the underlying instruction to reserve FU's
// in the scoreboard at the appropriate future cycles.
- unsigned idx = SU->getInstr()->getDesc().getSchedClass();
- for (const InstrStage *IS = ItinData.beginStage(idx),
- *E = ItinData.endStage(idx); IS != E; ++IS) {
+ const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
+ assert(TID && "The scheduler must filter non-machineinstrs");
+ if (DAG->TII->isZeroCost(TID->Opcode))
+ return;
+
+ ++IssueCount;
+
+ unsigned cycle = 0;
+
+ unsigned idx = TID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
// We must reserve one of the stage's units for every cycle the
// stage is occupied. FIXME it would be more accurate to reserve
// the same unit free in all the cycles.
@@ -174,7 +228,16 @@ void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) {
DEBUG(RequiredScoreboard.dump());
}
-void PostRAHazardRecognizer::AdvanceCycle() {
+void ScoreboardHazardRecognizer::AdvanceCycle() {
+ IssueCount = 0;
ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
}
+
+void ScoreboardHazardRecognizer::RecedeCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0;
+ ReservedScoreboard.recede();
+ RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0;
+ RequiredScoreboard.recede();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c9c4d91..9035602 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -43,6 +42,7 @@ STATISTIC(NodesCombined , "Number of dag nodes combined");
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
namespace {
static cl::opt<bool>
@@ -185,7 +185,7 @@ namespace {
SDValue visitANY_EXTEND(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
- SDValue visitBIT_CONVERT(SDNode *N);
+ SDValue visitBITCAST(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
@@ -229,12 +229,13 @@ namespace {
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
- SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
+ SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue TransformFPLoadStorePair(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -248,16 +249,19 @@ namespace {
bool isAlias(SDValue Ptr1, int64_t Size1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
SDValue Ptr2, int64_t Size2,
const Value *SrcValue2, int SrcValueOffset2,
- unsigned SrcValueAlign2) const;
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const;
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size,
const Value *&SrcValue, int &SrcValueOffset,
- unsigned &SrcValueAlignment) const;
+ unsigned &SrcValueAlignment,
+ const MDNode *&TBAAInfo) const;
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
/// looking for a better chain (aliasing node.)
@@ -270,15 +274,15 @@ namespace {
/// Run - runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
-
+
SelectionDAG &getDAG() const { return DAG; }
-
+
/// getShiftAmountTy - Returns a type large enough to hold any valid
/// shift amount - before type legalization these can be huge.
EVT getShiftAmountTy() {
return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
}
-
+
/// isTypeLegal - This method returns true if we are running before type
/// legalization or if the specified VT is legal.
bool isTypeLegal(const EVT &VT) {
@@ -631,7 +635,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
// Replace the old value with the new one.
++NodesCombined;
- DEBUG(dbgs() << "\nReplacing.2 ";
+ DEBUG(dbgs() << "\nReplacing.2 ";
TLO.Old.getNode()->dump(&DAG);
dbgs() << "\nWith: ";
TLO.New.getNode()->dump(&DAG);
@@ -666,12 +670,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
- return DAG.getExtLoad(ExtType, PVT, dl,
+ return DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getPointerInfo(),
MemVT, LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
}
@@ -691,7 +696,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
unsigned ExtOpc =
Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
return DAG.getNode(ExtOpc, dl, PVT, Op);
- }
+ }
}
if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
@@ -889,11 +894,12 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
: LD->getExtensionType();
- SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl,
+ SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getPointerInfo(),
MemVT, LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
@@ -975,7 +981,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
- DEBUG(dbgs() << "\nReplacing.3 ";
+ DEBUG(dbgs() << "\nReplacing.3 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
RV.getNode()->dump(&DAG);
@@ -1054,7 +1060,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
- case ISD::BIT_CONVERT: return visitBIT_CONVERT(N);
+ case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
case ISD::FSUB: return visitFSUB(N);
@@ -1225,7 +1231,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
}
}
-
+
SDValue Result;
// If we've change things around then replace token factor.
@@ -1424,6 +1430,29 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
N0.getOperand(0).getOperand(1),
N0.getOperand(1)));
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue AndOp0 = N1.getOperand(0);
+ ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+ unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+
+ // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+ // and similar xforms where the inner op is either ~0 or 0.
+ if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+ DebugLoc DL = N->getDebugLoc();
+ return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+ }
+ }
+
+ // add (sext i1), X -> sub X, (zext i1)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getValueType() == MVT::i1 &&
+ !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+ }
+
return SDValue();
}
@@ -1438,7 +1467,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
if (N->hasNUsesOfValue(0, 1))
return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(), MVT::Flag));
+ N->getDebugLoc(), MVT::Glue));
// canonicalize constant to RHS.
if (N0C && !N1C)
@@ -1447,7 +1476,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
// fold (addc x, 0) -> x + no carry out
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(), MVT::Flag));
+ N->getDebugLoc(), MVT::Glue));
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
@@ -1464,7 +1493,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
(LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(), MVT::Flag));
+ N->getDebugLoc(), MVT::Glue));
}
return SDValue();
@@ -1489,6 +1518,22 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
return SDValue();
}
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
+ SelectionDAG &DAG, bool LegalOperations) {
+ if (!VT.isVector()) {
+ return DAG.getConstant(0, VT);
+ } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+ // Produce a vector of zeros.
+ SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+ std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
+ &Ops[0], Ops.size());
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1503,8 +1548,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// fold (sub x, x) -> 0
+ // FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
- return DAG.getConstant(0, N->getValueType(0));
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
// fold (sub c1, c2) -> c1-c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
@@ -1515,6 +1561,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
if (N0C && N0C->isAllOnesValue())
return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold A-(A-B) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+ return N1.getOperand(1);
// fold (A+B)-A -> B
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
return N0.getOperand(1);
@@ -1897,6 +1946,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
// fold (mulhs x, 0) -> 0
if (N1C && N1C->isNullValue())
@@ -1910,6 +1960,22 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
+ // If the type twice as wide is legal, transform the mulhs to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy()));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
return SDValue();
}
@@ -1918,6 +1984,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
// fold (mulhu x, 0) -> 0
if (N1C && N1C->isNullValue())
@@ -1929,6 +1996,22 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy()));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
return SDValue();
}
@@ -1992,6 +2075,29 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
if (Res.getNode()) return Res;
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
return SDValue();
}
@@ -1999,6 +2105,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
if (Res.getNode()) return Res;
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
return SDValue();
}
@@ -2116,7 +2245,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
APInt Mask = ~N1C->getAPIntValue();
- Mask.trunc(N0Op0.getValueSizeInBits());
+ Mask = Mask.trunc(N0Op0.getValueSizeInBits());
if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
N0.getValueType(), N0Op0);
@@ -2198,10 +2327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getPointerInfo(), MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
AddToWorkList(N);
@@ -2221,10 +2349,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
AddToWorkList(N);
@@ -2253,18 +2381,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (ExtVT == LoadedVT &&
(!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
-
- SDValue NewLoad =
- DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
+
+ SDValue NewLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
AddToWorkList(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-
+
// Do not change the width of a volatile load.
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
@@ -2288,12 +2416,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
AddToWorkList(NewPtr.getNode());
-
+
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue Load =
- DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
Alignment);
AddToWorkList(N);
@@ -2722,17 +2850,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
N01C->getAPIntValue(), VT));
}
// fold (xor x, x) -> 0
- if (N0 == N1) {
- if (!VT.isVector()) {
- return DAG.getConstant(0, VT);
- } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){
- // Produce a vector of zeros.
- SDValue El = DAG.getConstant(0, VT.getVectorElementType());
- std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
- &Ops[0], Ops.size());
- }
- }
+ if (N0 == N1)
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
if (N0.getOpcode() == N1.getOpcode()) {
@@ -2810,7 +2929,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
LHS->getOperand(1), N->getOperand(1));
// Create the new shift.
- SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),
+ SDValue NewShift = DAG.getNode(N->getOpcode(),
+ LHS->getOperand(0).getDebugLoc(),
VT, LHS->getOperand(0), N->getOperand(1));
// Create the new binop.
@@ -2850,7 +2970,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
DAG.getNode(ISD::TRUNCATE,
@@ -2868,11 +2988,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 > OpSizeInBits)
+ if (c1 + c2 >= OpSizeInBits)
return DAG.getConstant(0, VT);
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(c1 + c2, N1.getValueType()));
}
+
+ // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+ // For this to be valid, the second form must not preserve any of the bits
+ // that are shifted out by the inner shift in the first form. This means
+ // the outer shift size must be >= the number of bits added by the ext.
+ // As a corollary, we don't care what kind of ext it is.
+ if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ N0.getOperand(0).getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ if (c2 >= OpSizeInBits - InnerShiftSize) {
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT,
+ DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT,
+ N0.getOperand(0)->getOperand(0)),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+ }
+
// fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
// (srl (and x, (shl -1, c1)), (sub c1, c2))
if (N1C && N0.getOpcode() == ISD::SRL &&
@@ -2973,7 +3119,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (N01C && N1C) {
// Determine what the truncate's result bitsize and type would be.
EVT TruncVT =
- EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
+ EVT::getIntegerVT(*DAG.getContext(),
+ OpSizeInBits - N1C->getZExtValue());
// Determine the residual right-shift amount.
signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
@@ -3006,7 +3153,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(),
TruncVT,
@@ -3017,6 +3164,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
}
+ // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+ // if c1 is equal to the number of bits the trunc removes
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (N0.getOperand(0).getOpcode() == ISD::SRL ||
+ N0.getOperand(0).getOpcode() == ISD::SRA) &&
+ N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getOperand(1).hasOneUse() &&
+ N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+ EVT LargeVT = N0.getOperand(0).getValueType();
+ ConstantSDNode *LargeShiftAmt =
+ cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+ if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+ LargeShiftAmt->getZExtValue()) {
+ SDValue Amt =
+ DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+ getShiftAmountTy());
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+ N0.getOperand(0).getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+ }
+ }
+
// Simplify, based on bits shifted out of the LHS.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -3065,12 +3235,33 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 > OpSizeInBits)
+ if (c1 + c2 >= OpSizeInBits)
return DAG.getConstant(0, VT);
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(c1 + c2, N1.getValueType()));
}
-
+
+ // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+ if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+ if (c1 + OpSizeInBits == InnerShiftSize) {
+ if (c1 + c2 >= InnerShiftSize)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT,
+ DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT,
+ N0.getOperand(0)->getOperand(0),
+ DAG.getConstant(c1 + c2, ShiftCountVT)));
+ }
+ }
+
// fold (srl (shl x, c), c) -> (and x, cst2)
if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
N0.getValueSizeInBits() <= 64) {
@@ -3078,7 +3269,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(~0ULL >> ShAmt, VT));
}
-
+
// fold (srl (anyextend x), c) -> (anyextend (srl x, c))
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -3147,7 +3338,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(),
TruncVT,
@@ -3182,7 +3373,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// brcond i32 %c ...
//
// into
- //
+ //
// %a = ...
// %b = and %a, 2
// %c = setcc eq %b, 0
@@ -3422,7 +3613,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
}
if (BothLiveOut)
// Both unextended and extended values are live out. There had better be
- // good a reason for the transformation.
+ // a good reason for the transformation.
return ExtendNodes.size();
}
return true;
@@ -3503,10 +3694,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -3547,10 +3737,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -3611,7 +3801,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
NegOne, DAG.getConstant(0, VT));
- }
+ }
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -3652,6 +3842,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE &&
(!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3677,7 +3881,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask.zext(VT.getSizeInBits());
+ Mask = Mask.zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
X, DAG.getConstant(Mask, VT));
}
@@ -3692,10 +3896,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -3736,10 +3939,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -3805,21 +4008,27 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
isa<ConstantSDNode>(N0.getOperand(1)) &&
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
N0.hasOneUse()) {
+ SDValue ShAmt = N0.getOperand(1);
+ unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
if (N0.getOpcode() == ISD::SHL) {
+ SDValue InnerZExt = N0.getOperand(0);
// If the original shl may be shifting out bits, do not perform this
// transformation.
- unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
- N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
- if (ShAmt > KnownZeroBits)
+ unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
+ InnerZExt.getOperand(0).getValueType().getSizeInBits();
+ if (ShAmtVal > KnownZeroBits)
return SDValue();
}
- DebugLoc dl = N->getDebugLoc();
- return DAG.getNode(N0.getOpcode(), dl, VT,
- DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
- DAG.getNode(ISD::ZERO_EXTEND, dl,
- N0.getOperand(1).getValueType(),
- N0.getOperand(1)));
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Ensure that the shift amount is wide enough for the shifted value.
+ if (VT.getSizeInBits() >= 256)
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+
+ return DAG.getNode(N0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+ ShAmt);
}
return SDValue();
@@ -3879,7 +4088,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask.zext(VT.getSizeInBits());
+ Mask = Mask.zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
X, DAG.getConstant(Mask, VT));
}
@@ -3894,10 +4103,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -3938,11 +4146,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
- SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT,
- N->getDebugLoc(),
- LN0->getChain(), LN0->getBasePtr(),
- LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -4053,11 +4259,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (Opc == ISD::SIGN_EXTEND_INREG) {
ExtType = ISD::SEXTLOAD;
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
- if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
- return SDValue();
} else if (Opc == ISD::SRL) {
- // Annother special-case: SRL is basically zero-extending a narrower
- // value.
+ // Another special-case: SRL is basically zero-extending a narrower value.
ExtType = ISD::ZEXTLOAD;
N0 = SDValue(N, 0);
ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -4065,10 +4268,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
VT.getSizeInBits() - N01->getZExtValue());
}
+ if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+ return SDValue();
unsigned EVTBits = ExtVT.getSizeInBits();
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!ExtVT.isRound())
+ return SDValue();
+
unsigned ShAmt = 0;
- if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
ShAmt = N01->getZExtValue();
// Is the shift amount a multiple of size of VT?
@@ -4078,52 +4289,88 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
return SDValue();
}
+
+ // At this point, we must have a load or else we can't do the transform.
+ if (!isa<LoadSDNode>(N0)) return SDValue();
+
+ // If the shift amount is larger than the input type then we're not
+ // accessing any of the loaded bytes. If the load was a zextload/extload
+ // then the result of the shift+trunc is zero/undef (handled elsewhere).
+ // If the load was a sextload then the result is a splat of the sign bit
+ // of the extended byte. This is not worth optimizing for.
+ if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+ return SDValue();
}
}
- // Do not generate loads of non-round integer types since these can
- // be expensive (and would be wrong if the type is not byte sized).
- if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
- cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits &&
- // Do not change the width of a volatile load.
- !cast<LoadSDNode>(N0)->isVolatile()) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT PtrType = N0.getOperand(1).getValueType();
-
- // For big endian targets, we need to adjust the offset to the pointer to
- // load the correct bytes.
- if (TLI.isBigEndian()) {
- unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
- unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
- ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
- }
-
- uint64_t PtrOff = ShAmt / 8;
- unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
- SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
- PtrType, LN0->getBasePtr(),
- DAG.getConstant(PtrOff, PtrType));
- AddToWorkList(NewPtr.getNode());
-
- SDValue Load = (ExtType == ISD::NON_EXTLOAD)
- ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
- LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
- : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
- ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- NewAlign);
-
- // Replace the old load's chain with the new load's chain.
- WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
- &DeadNodes);
+ // If the load is shifted left (and the result isn't shifted back right),
+ // we can fold the truncate through the shift.
+ unsigned ShLeftAmt = 0;
+ if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShLeftAmt = N01->getZExtValue();
+ N0 = N0.getOperand(0);
+ }
+ }
+
+ // If we haven't found a load, we can't narrow it. Don't transform one with
+ // multiple uses, this would require adding a new load.
+ if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
+ // Don't change the width of a volatile load.
+ cast<LoadSDNode>(N0)->isVolatile())
+ return SDValue();
+
+ // Verify that we are actually reducing a load width here.
+ if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT PtrType = N0.getOperand(1).getValueType();
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+ }
+
+ uint64_t PtrOff = ShAmt / 8;
+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+ PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, PtrType));
+ AddToWorkList(NewPtr.getNode());
+
+ SDValue Load;
+ if (ExtType == ISD::NON_EXTLOAD)
+ Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+ else
+ Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ NewAlign);
+
+ // Replace the old load's chain with the new load's chain.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+ &DeadNodes);
- // Return the new loaded value.
- return Load;
+ // Shift the result left, if we've swallowed a left shift.
+ SDValue Result = Load;
+ if (ShLeftAmt != 0) {
+ EVT ShImmTy = getShiftAmountTy();
+ if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+ ShImmTy = VT;
+ Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+ Result, DAG.getConstant(ShLeftAmt, ShImmTy));
}
- return SDValue();
+ // Return the new loaded value.
+ return Result;
}
SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
@@ -4196,10 +4443,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -4213,10 +4460,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -4295,7 +4542,9 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
- if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ LD1->getPointerInfo().getAddrSpace() !=
+ LD2->getPointerInfo().getAddrSpace())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
@@ -4313,14 +4562,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
if (NewAlign <= Align &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
- LD1->getBasePtr(), LD1->getSrcValue(),
- LD1->getSrcValueOffset(), false, false, Align);
+ LD1->getBasePtr(), LD1->getPointerInfo(),
+ false, false, Align);
}
return SDValue();
}
-SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4344,12 +4593,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
assert(!DestEltVT.isVector() &&
"Element type of vector ValueType must not be vector!");
if (isSimple)
- return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+ return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
}
// If the input is a constant, let getNode fold it.
if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
- SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
+ SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);
if (Res.getNode() != N) {
if (!LegalOperations ||
TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
@@ -4365,8 +4614,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
}
// (conv (conv x, t1), t2) -> (conv x, t2)
- if (N0.getOpcode() == ISD::BIT_CONVERT)
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,
+ if (N0.getOpcode() == ISD::BITCAST)
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,
N0.getOperand(0));
// fold (conv (load x)) -> (load (conv*)x)
@@ -4382,13 +4631,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
if (Align <= OrigAlign) {
SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
- LN0->getBasePtr(),
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
OrigAlign);
AddToWorkList(N);
CombineTo(N0.getNode(),
- DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
N0.getValueType(), Load),
Load.getValue(1));
return Load;
@@ -4400,7 +4648,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
// This often reduces constant pool loads.
if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
- SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,
+ SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
N0.getOperand(0));
AddToWorkList(NewConv.getNode());
@@ -4423,7 +4671,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
if (isTypeLegal(IntXVT)) {
- SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
IntXVT, N0.getOperand(1));
AddToWorkList(X.getNode());
@@ -4448,7 +4696,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
X, DAG.getConstant(SignBit, VT));
AddToWorkList(X.getNode());
- SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
VT, N0.getOperand(0));
Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
Cst, DAG.getConstant(~SignBit, VT));
@@ -4473,11 +4721,11 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
return CombineConsecutiveLoads(N, VT);
}
-/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
/// destination element value type.
SDValue DAGCombiner::
-ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
// If this is already the right type, we're done.
@@ -4495,10 +4743,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// Due to the FP element handling below calling this routine recursively,
// we can end up with a scalar-to-vector node here.
if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
- DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
DstEltVT, BV->getOperand(0)));
-
+
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
SDValue Op = BV->getOperand(i);
@@ -4506,7 +4754,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// are promoted and implicitly truncated. Make that explicit here.
if (Op.getValueType() != SrcEltVT)
Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
- Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
DstEltVT, Op));
AddToWorkList(Ops.back().getNode());
}
@@ -4522,7 +4770,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// same sizes.
assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
- BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
+ BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
SrcEltVT = IntVT;
}
@@ -4531,10 +4779,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
if (DstEltVT.isFloatingPoint()) {
assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
- SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
+ SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
// Next, convert to FP elements of the same size.
- return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+ return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
// Okay, we know the src/dst types are both integers of differing types.
@@ -4556,7 +4804,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
if (Op.getOpcode() == ISD::UNDEF) continue;
EltIsUndef = false;
- NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+ NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
zextOrTrunc(SrcBitSize).zext(DstBitSize);
}
@@ -4586,13 +4834,13 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
continue;
}
- APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->
- getAPIntValue()).zextOrTrunc(SrcBitSize);
+ APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+ getAPIntValue().zextOrTrunc(SrcBitSize);
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
- APInt ThisVal = APInt(OpVal).trunc(DstBitSize);
+ APInt ThisVal = OpVal.trunc(DstBitSize);
Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
- if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)
+ if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
// Simply turn this into a SCALAR_TO_VECTOR of the new type.
return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
Ops[0]);
@@ -4984,10 +5232,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -5011,7 +5258,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BIT_CONVERT &&
+ if (N0.getOpcode() == ISD::BITCAST &&
!VT.isVector() &&
N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger()) {
@@ -5021,7 +5268,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
AddToWorkList(Int.getNode());
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
VT, Int);
}
}
@@ -5047,7 +5294,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
// Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
+ if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger() &&
!N0.getOperand(0).getValueType().isVector()) {
SDValue Int = N0.getOperand(0);
@@ -5056,7 +5303,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
AddToWorkList(Int.getNode());
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
N->getValueType(0), Int);
}
}
@@ -5084,14 +5331,17 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
N1.getOperand(0), N1.getOperand(1), N2);
}
- SDNode *Trunc = 0;
- if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
- // Look past truncate.
- Trunc = N1.getNode();
- N1 = N1.getOperand(0);
- }
+ if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
+ ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
+ (N1.getOperand(0).hasOneUse() &&
+ N1.getOperand(0).getOpcode() == ISD::SRL))) {
+ SDNode *Trunc = 0;
+ if (N1.getOpcode() == ISD::TRUNCATE) {
+ // Look pass the truncate.
+ Trunc = N1.getNode();
+ N1 = N1.getOperand(0);
+ }
- if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
// Match this pattern so that we can generate simpler code:
//
// %a = ...
@@ -5100,7 +5350,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
// brcond i32 %c ...
//
// into
- //
+ //
// %a = ...
// %b = and i32 %a, 2
// %c = setcc eq %b, 0
@@ -5146,8 +5396,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
}
}
}
+
+ if (Trunc)
+ // Restore N1 if the above transformation doesn't match.
+ N1 = N->getOperand(1);
}
-
+
// Transform br(xor(x, y)) -> br(x != y)
// Transform br(xor(xor(x,y), 1)) -> br (x == y)
if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
@@ -5181,9 +5435,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Equal = true;
}
- SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1;
-
- EVT SetCCVT = NodeToReplace.getValueType();
+ EVT SetCCVT = N1.getValueType();
if (LegalTypes)
SetCCVT = TLI.getSetCCResultType(SetCCVT);
SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
@@ -5192,9 +5444,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes);
- removeFromWorkList(NodeToReplace.getNode());
- DAG.DeleteNode(NodeToReplace.getNode());
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
MVT::Other, Chain, SetCC, N2);
}
@@ -5568,10 +5820,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getAlignment())
- return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
- N->getDebugLoc(),
- Chain, Ptr, LD->getSrcValue(),
- LD->getSrcValueOffset(), LD->getMemoryVT(),
+ return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+ LD->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(),
+ LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), Align);
}
}
@@ -5587,15 +5839,13 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Replace the chain to void dependency.
if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
- BetterChain, Ptr,
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ BetterChain, Ptr, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
} else {
- ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
- LD->getDebugLoc(),
- BetterChain, Ptr, LD->getSrcValue(),
- LD->getSrcValueOffset(),
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getPointerInfo(),
LD->getMemoryVT(),
LD->isVolatile(),
LD->isNonTemporal(),
@@ -5605,10 +5855,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Create token factor to keep old chain connected.
SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
MVT::Other, Chain, ReplLoad.getValue(1));
-
+
// Make sure the new and old chains are cleaned up.
AddToWorkList(Token.getNode());
-
+
// Replace uses with load result and token factor. Don't add users
// to work list.
return CombineTo(N, ReplLoad.getValue(0), Token, false);
@@ -5628,17 +5878,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
static std::pair<unsigned, unsigned>
CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
std::pair<unsigned, unsigned> Result(0, 0);
-
+
// Check for the structure we're looking for.
if (V->getOpcode() != ISD::AND ||
!isa<ConstantSDNode>(V->getOperand(1)) ||
!ISD::isNormalLoad(V->getOperand(0).getNode()))
return Result;
-
+
// Check the chain and pointer.
LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
-
+
// The store should be chained directly to the load or be an operand of a
// tokenfactor.
if (LD == Chain.getNode())
@@ -5654,7 +5904,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
}
if (!isOk) return Result;
}
-
+
// This only handles simple types.
if (V.getValueType() != MVT::i16 &&
V.getValueType() != MVT::i32 &&
@@ -5670,7 +5920,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
if (NotMaskLZ == 64) return Result; // All zero mask.
-
+
// See if we have a continuous run of bits. If so, we have 0*1+0*
if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
return Result;
@@ -5678,19 +5928,19 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
if (V.getValueType() != MVT::i64 && NotMaskLZ)
NotMaskLZ -= 64-V.getValueSizeInBits();
-
+
unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
switch (MaskedBytes) {
- case 1:
- case 2:
+ case 1:
+ case 2:
case 4: break;
default: return Result; // All one mask, or 5-byte mask.
}
-
+
// Verify that the first bit starts at a multiple of mask so that the access
// is aligned the same as the access width.
if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
-
+
Result.first = MaskedBytes;
Result.second = NotMaskTZ/8;
return Result;
@@ -5707,20 +5957,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
unsigned NumBytes = MaskInfo.first;
unsigned ByteShift = MaskInfo.second;
SelectionDAG &DAG = DC->getDAG();
-
+
// Check to see if IVal is all zeros in the part being masked in by the 'or'
// that uses this. If not, this is not a replacement.
APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
ByteShift*8, (ByteShift+NumBytes)*8);
if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
-
+
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
// legalization.
MVT VT = MVT::getIntegerVT(NumBytes*8);
if (!DC->isTypeLegal(VT))
return 0;
-
+
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
if (ByteShift)
@@ -5735,20 +5985,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
StOffset = ByteShift;
else
StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
-
+
SDValue Ptr = St->getBasePtr();
if (StOffset) {
Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
NewAlign = MinAlign(NewAlign, StOffset);
}
-
+
// Truncate down to the new size.
IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
-
+
++OpsNarrowed;
- return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
- St->getSrcValue(), St->getSrcValueOffset()+StOffset,
+ return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset),
false, false, NewAlign).getNode();
}
@@ -5771,7 +6021,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
unsigned Opc = Value.getOpcode();
-
+
// If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
// is a byte mask indicating a consecutive number of bytes, check to see if
// Y is known to provide just those bytes. If so, we try to replace the
@@ -5784,7 +6034,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(1), ST,this))
return SDValue(NewST, 0);
-
+
// Or is commutative, so try swapping X and Y.
MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
if (MaskedLoad.first)
@@ -5792,7 +6042,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
Value.getOperand(0), ST,this))
return SDValue(NewST, 0);
}
-
+
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
Value.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();
@@ -5801,7 +6051,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
Chain == SDValue(N0.getNode(), 1)) {
LoadSDNode *LD = cast<LoadSDNode>(N0);
- if (LD->getBasePtr() != Ptr)
+ if (LD->getBasePtr() != Ptr ||
+ LD->getPointerInfo().getAddrSpace() !=
+ ST->getPointerInfo().getAddrSpace())
return SDValue();
// Find the type to narrow it the load / op / store to.
@@ -5850,14 +6102,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
DAG.getConstant(PtrOff, Ptr.getValueType()));
SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
LD->getChain(), NewPtr,
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getPointerInfo().getWithOffset(PtrOff),
LD->isVolatile(), LD->isNonTemporal(),
NewAlign);
SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
NewVal, NewPtr,
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo().getWithOffset(PtrOff),
false, false, NewAlign);
AddToWorkList(NewPtr.getNode());
@@ -5874,6 +6126,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
}
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+ Value.hasOneUse() &&
+ Chain == SDValue(Value.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(Value);
+ EVT VT = LD->getMemoryVT();
+ if (!VT.isFloatingPoint() ||
+ VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() ||
+ ST->isNonTemporal() ||
+ LD->getPointerInfo().getAddrSpace() != 0 ||
+ ST->getPointerInfo().getAddrSpace() != 0)
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+ !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+ return SDValue();
+
+ unsigned LDAlign = LD->getAlignment();
+ unsigned STAlign = ST->getAlignment();
+ const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+ if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ return SDValue();
+
+ SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ false, false, LDAlign);
+
+ SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+ NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(),
+ false, false, STAlign);
+
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewST.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++LdStFP2Int;
+ return NewST;
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -5882,7 +6191,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// If this is a store of a bit convert, store the input value if the
// resultant store does not need a higher alignment than the original.
- if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+ if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
unsigned OrigAlign = ST->getAlignment();
EVT SVT = Value.getOperand(0).getValueType();
@@ -5892,8 +6201,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
((!LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), OrigAlign);
}
@@ -5917,8 +6225,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
bitcastToAPInt().getZExtValue(), MVT::i32);
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
}
break;
@@ -5929,8 +6236,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
getZExtValue(), MVT::i64);
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
} else if (!ST->isVolatile() &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
@@ -5942,23 +6248,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
- int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(),
+ Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal,
ST->getAlignment());
Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
- SVOffset += 4;
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
- Ptr, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal,
+ Ptr, ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal,
Alignment);
return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
St0, St1);
@@ -5974,12 +6277,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > ST->getAlignment())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(), Align);
}
}
+ // Try transforming a pair floating point load / store ops to integer
+ // load / store ops.
+ SDValue NewST = TransformFPLoadStorePair(N);
+ if (NewST.getNode())
+ return NewST;
+
if (CombinerAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -5991,12 +6299,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Replace the chain to avoid dependency.
if (ST->isTruncatingStore()) {
ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
- ST->getSrcValue(),ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->getMemoryVT(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
} else {
ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
}
@@ -6030,17 +6338,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
AddToWorkList(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
if (SimplifyDemandedBits(Value,
- APInt::getLowBitsSet(
- Value.getValueType().getScalarType().getSizeInBits(),
- ST->getMemoryVT().getScalarType().getSizeInBits())))
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits())))
return SDValue(N, 0);
}
@@ -6064,8 +6371,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
ST->getMemoryVT())) {
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
}
@@ -6082,6 +6388,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (InVal.getOpcode() == ISD::UNDEF)
return InVec;
+ EVT VT = InVec.getValueType();
+
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
// If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
// vector with the inserted element.
if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
@@ -6091,13 +6403,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (Elt < Ops.size())
Ops[Elt] = InVal;
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
- InVec.getValueType(), &Ops[0], Ops.size());
+ VT, &Ops[0], Ops.size());
}
- // If the invec is an UNDEF and if EltNo is a constant, create a new
+ // If the invec is an UNDEF and if EltNo is a constant, create a new
// BUILD_VECTOR with undef elements and the inserted element.
- if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
+ if (InVec.getOpcode() == ISD::UNDEF &&
isa<ConstantSDNode>(EltNo)) {
- EVT VT = InVec.getValueType();
EVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
@@ -6106,7 +6417,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (Elt < Ops.size())
Ops[Elt] = InVal;
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
- InVec.getValueType(), &Ops[0], Ops.size());
+ VT, &Ops[0], Ops.size());
}
return SDValue();
}
@@ -6138,14 +6449,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue EltNo = N->getOperand(1);
if (isa<ConstantSDNode>(EltNo)) {
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
bool NewLoad = false;
bool BCNumEltsChanged = false;
EVT VT = InVec.getValueType();
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
- if (InVec.getOpcode() == ISD::BIT_CONVERT) {
+ if (InVec.getOpcode() == ISD::BITCAST) {
EVT BCVT = InVec.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
@@ -6176,10 +6487,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = VT.getVectorNumElements();
- int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);
+ int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
- if (InVec.getOpcode() == ISD::BIT_CONVERT)
+ if (InVec.getOpcode() == ISD::BITCAST)
InVec = InVec.getOperand(0);
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
@@ -6190,12 +6501,17 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
return SDValue();
+ // If Idx was -1 above, Elt is going to be -1, so just return undef.
+ if (Elt == -1)
+ return DAG.getUNDEF(LN0->getBasePtr().getValueType());
+
unsigned Align = LN0->getAlignment();
if (NewLoad) {
// Check the resultant load doesn't need a higher alignment than the
// original load.
unsigned NewAlign =
- TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+ TLI.getTargetData()
+ ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
return SDValue();
@@ -6204,8 +6520,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
SDValue NewPtr = LN0->getBasePtr();
+ unsigned PtrOff = 0;
+
if (Elt) {
- unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
+ PtrOff = LVT.getSizeInBits() * Elt / 8;
EVT PtrType = NewPtr.getValueType();
if (TLI.isBigEndian())
PtrOff = VT.getSizeInBits() / 8 - PtrOff;
@@ -6214,7 +6532,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(), Align);
}
@@ -6280,7 +6598,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
if (ExtIndex > VT.getVectorNumElements())
return SDValue();
-
+
Mask.push_back(ExtIndex);
continue;
}
@@ -6328,15 +6646,16 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// FIXME: implement canonicalizations from DAG.getVectorShuffle()
- // If it is a splat, check if the argument vector is a build_vector with
- // all scalar elements the same.
- if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
+ // If it is a splat, check if the argument vector is another splat or a
+ // build_vector with all scalar elements the same.
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
SDNode *V = N0.getNode();
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
// look though conversions that change things like v4f32 to v2f64.
- if (V->getOpcode() == ISD::BIT_CONVERT) {
+ if (V->getOpcode() == ISD::BITCAST) {
SDValue ConvInput = V->getOperand(0);
if (ConvInput.getValueType().isVector() &&
ConvInput.getValueType().getVectorNumElements() == NumElts)
@@ -6344,30 +6663,28 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
if (V->getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElems = V->getNumOperands();
- unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
- if (NumElems > BaseIdx) {
- SDValue Base;
- bool AllSame = true;
- for (unsigned i = 0; i != NumElems; ++i) {
- if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
- Base = V->getOperand(i);
- break;
- }
+ assert(V->getNumOperands() == NumElts &&
+ "BUILD_VECTOR has wrong number of operands");
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+ Base = V->getOperand(i);
+ break;
}
- // Splat of <u, u, u, u>, return <u, u, u, u>
- if (!Base.getNode())
- return N0;
- for (unsigned i = 0; i != NumElems; ++i) {
- if (V->getOperand(i) != Base) {
- AllSame = false;
- break;
- }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
}
- // Splat of <x, x, x, x>, return <x, x, x, x>
- if (AllSame)
- return N0;
}
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
}
}
return SDValue();
@@ -6436,7 +6753,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (N->getOpcode() == ISD::AND) {
- if (RHS.getOpcode() == ISD::BIT_CONVERT)
+ if (RHS.getOpcode() == ISD::BITCAST)
RHS = RHS.getOperand(0);
if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<int, 8> Indices;
@@ -6464,9 +6781,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
DAG.getConstant(0, EltVT));
SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
RVT, &ZeroOps[0], ZeroOps.size());
- LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
+ LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
}
}
@@ -6480,10 +6797,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
// things. Simplifying them may result in a loss of legality.
if (LegalOperations) return SDValue();
- EVT VT = N->getValueType(0);
- assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
+ assert(N->getValueType(0).isVector() &&
+ "SimplifyVBinOp only works on vectors!");
- EVT EltType = VT.getVectorElementType();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Shuffle = XformToShuffleWithZero(N);
@@ -6516,14 +6832,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
break;
}
- // If the vector element type is not legal, the BUILD_VECTOR operands
- // are promoted and implicitly truncated. Make that explicit here.
- if (LHSOp.getValueType() != EltType)
- LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp);
- if (RHSOp.getValueType() != EltType)
- RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp);
-
- SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType,
+ EVT VT = LHSOp.getValueType();
+ assert(RHSOp.getValueType() == VT &&
+ "SimplifyVBinOp with different BUILD_VECTOR element types");
+ SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
LHSOp, RHSOp);
if (FoldOp.getOpcode() != ISD::UNDEF &&
FoldOp.getOpcode() != ISD::Constant &&
@@ -6533,11 +6845,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
AddToWorkList(FoldOp.getNode());
}
- if (Ops.size() == LHS.getNumOperands()) {
- EVT VT = LHS.getValueType();
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
- &Ops[0], Ops.size());
- }
+ if (Ops.size() == LHS.getNumOperands())
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ LHS.getValueType(), &Ops[0], Ops.size());
}
return SDValue();
@@ -6580,103 +6890,101 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
+ // Cannot simplify select with vector condition
+ if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
// If this is a select from two identical things, try to pull the operation
// through the select.
- if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
- // If this is a load and the token chain is identical, replace the select
- // of two loads with a load through a select of the address to load from.
- // This triggers in things like "select bool X, 10.0, 123.0" after the FP
- // constants have been dropped into the constant pool.
- if (LHS.getOpcode() == ISD::LOAD &&
+ if (LHS.getOpcode() != RHS.getOpcode() ||
+ !LHS.hasOneUse() || !RHS.hasOneUse())
+ return false;
+
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // Token chains must be identical.
+ if (LHS.getOperand(0) != RHS.getOperand(0) ||
// Do not let this transformation reduce the number of volatile loads.
- !cast<LoadSDNode>(LHS)->isVolatile() &&
- !cast<LoadSDNode>(RHS)->isVolatile() &&
- // Token chains must be identical.
- LHS.getOperand(0) == RHS.getOperand(0)) {
- LoadSDNode *LLD = cast<LoadSDNode>(LHS);
- LoadSDNode *RLD = cast<LoadSDNode>(RHS);
-
- // If this is an EXTLOAD, the VT's must match.
- if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
+ LLD->isVolatile() || RLD->isVolatile() ||
+ // If this is an EXTLOAD, the VT's must match.
+ LLD->getMemoryVT() != RLD->getMemoryVT() ||
+ // If this is an EXTLOAD, the kind of extension must match.
+ (LLD->getExtensionType() != RLD->getExtensionType() &&
+ // The only exception is if one of the extensions is anyext.
+ LLD->getExtensionType() != ISD::EXTLOAD &&
+ RLD->getExtensionType() != ISD::EXTLOAD) ||
// FIXME: this discards src value information. This is
// over-conservative. It would be beneficial to be able to remember
// both potential memory locations. Since we are discarding
// src value info, don't do the transformation if the memory
// locations are not in the default address space.
- unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
- if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
- if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
- LLDAddrSpace = PT->getAddressSpace();
- }
- if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
- if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
- RLDAddrSpace = PT->getAddressSpace();
- }
- SDValue Addr;
- if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
- if (TheSelect->getOpcode() == ISD::SELECT) {
- // Check that the condition doesn't reach either load. If so, folding
- // this will induce a cycle into the DAG.
- if ((!LLD->hasAnyUseOfValue(1) ||
- !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
- (!RLD->hasAnyUseOfValue(1) ||
- !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
- Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
- LLD->getBasePtr().getValueType(),
- TheSelect->getOperand(0), LLD->getBasePtr(),
- RLD->getBasePtr());
- }
- } else {
- // Check that the condition doesn't reach either load. If so, folding
- // this will induce a cycle into the DAG.
- if ((!LLD->hasAnyUseOfValue(1) ||
- (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
- (!RLD->hasAnyUseOfValue(1) ||
- (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
- Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
- LLD->getBasePtr().getValueType(),
- TheSelect->getOperand(0),
- TheSelect->getOperand(1),
- LLD->getBasePtr(), RLD->getBasePtr(),
- TheSelect->getOperand(4));
- }
- }
- }
-
- if (Addr.getNode()) {
- SDValue Load;
- if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
- Load = DAG.getLoad(TheSelect->getValueType(0),
- TheSelect->getDebugLoc(),
- LLD->getChain(),
- Addr, 0, 0,
- LLD->isVolatile(),
- LLD->isNonTemporal(),
- LLD->getAlignment());
- } else {
- Load = DAG.getExtLoad(LLD->getExtensionType(),
- TheSelect->getValueType(0),
- TheSelect->getDebugLoc(),
- LLD->getChain(), Addr, 0, 0,
- LLD->getMemoryVT(),
- LLD->isVolatile(),
- LLD->isNonTemporal(),
- LLD->getAlignment());
- }
+ LLD->getPointerInfo().getAddrSpace() != 0 ||
+ RLD->getPointerInfo().getAddrSpace() != 0)
+ return false;
- // Users of the select now use the result of the load.
- CombineTo(TheSelect, Load);
+ // Check that the select condition doesn't reach either load. If so,
+ // folding this will induce a cycle into the DAG. If not, this is safe to
+ // xform, so create a select of the addresses.
+ SDValue Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ SDNode *CondNode = TheSelect->getOperand(0).getNode();
+ if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
+ (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
+ return false;
+ Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ } else { // Otherwise SELECT_CC
+ SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+ SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+ (LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+ return false;
- // Users of the old loads now use the new load's chain. We know the
- // old-load value is dead now.
- CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
- CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
- return true;
- }
- }
- }
+ Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+
+ SDValue Load;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ Load = DAG.getLoad(TheSelect->getValueType(0),
+ TheSelect->getDebugLoc(),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->isVolatile(), LLD->isNonTemporal(),
+ LLD->getAlignment());
+ } else {
+ Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
+ RLD->getExtensionType() : LLD->getExtensionType(),
+ TheSelect->getDebugLoc(),
+ TheSelect->getValueType(0),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->getMemoryVT(), LLD->isVolatile(),
+ LLD->isNonTemporal(), LLD->getAlignment());
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
}
return false;
@@ -6689,7 +6997,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
ISD::CondCode CC, bool NotExtCompare) {
// (x ? y : y) -> y.
if (N2 == N3) return N2;
-
+
EVT VT = N2.getValueType();
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
@@ -6725,7 +7033,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
return DAG.getNode(ISD::FABS, DL, VT, N3);
}
}
-
+
// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
// in it. This is a win when the constant is not otherwise available because
@@ -6748,7 +7056,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
};
const Type *FPTy = Elts[0]->getType();
const TargetData &TD = *TLI.getTargetData();
-
+
// Create a ConstantArray of the two constants.
Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
@@ -6760,7 +7068,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
SDValue Zero = DAG.getIntPtrConstant(0);
unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
SDValue One = DAG.getIntPtrConstant(EltSize);
-
+
SDValue Cond = DAG.getSetCC(DL,
TLI.getSetCCResultType(N0.getValueType()),
N0, N1, CC);
@@ -6769,11 +7077,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
CstOffset);
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0, false,
+ MachinePointerInfo::getConstantPool(), false,
false, Alignment);
}
- }
+ }
// Check to see if we can perform the "gzip trick", transforming
// (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
@@ -6818,6 +7126,35 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
}
}
+ // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+ // where y is has a single bit set.
+ // A plaintext description would be, we can turn the SELECT_CC into an AND
+ // when the condition can be materialized as an all-ones register. Any
+ // single bit-test can be materialized as an all-ones register with
+ // shift-left and shift-right-arith.
+ if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+ N0->getValueType(0) == VT &&
+ N1C && N1C->isNullValue() &&
+ N2C && N2C->isNullValue()) {
+ SDValue AndLHS = N0->getOperand(0);
+ ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+ // Shift the tested bit over the sign bit.
+ APInt AndMask = ConstAndRHS->getAPIntValue();
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy());
+ SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is either
+ // all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy());
+ SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
+ }
+
// fold select C, 16, 0 -> shl C, 4
if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
@@ -6971,7 +7308,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
}
/// FindBaseOffset - Return true if base is a frame index, which is known not
-// to alias with anything but itself. Provides base object and offset as results.
+// to alias with anything but itself. Provides base object and offset as
+// results.
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
const GlobalValue *&GV, void *&CV) {
// Assume it is a primitive operation.
@@ -6984,7 +7322,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
Offset += C->getZExtValue();
}
}
-
+
// Return the underlying GlobalValue, and update the Offset. Return false
// for GlobalAddressSDNode since the same GlobalAddress may be represented
// by multiple nodes with different offsets.
@@ -7012,9 +7350,11 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
SDValue Ptr2, int64_t Size2,
const Value *SrcValue2, int SrcValueOffset2,
- unsigned SrcValueAlign2) const {
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const {
// If they are the same then they must be aliases.
if (Ptr1 == Ptr2) return true;
@@ -7030,8 +7370,19 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
- // If we know what the bases are, and they aren't identical, then we know they
- // cannot alias.
+ // It is possible for different frame indices to alias each other, mostly
+ // when tail call optimization reuses return address slots for arguments.
+ // To catch this case, look up the actual index of frame indices to compute
+ // the real alias relationship.
+ if (isFrameIndex1 && isFrameIndex2) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+ Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+ }
+
+ // Otherwise, if we know what the bases are, and they aren't identical, then
+ // we know they cannot alias.
if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
return false;
@@ -7044,20 +7395,21 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
(Size1 == Size2) && (SrcValueAlign1 > Size1)) {
int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
-
+
// There is no overlap between these relatively aligned accesses of similar
// size, return no alias.
if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
return false;
}
-
+
if (CombinerGlobalAA) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
AliasAnalysis::AliasResult AAResult =
- AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+ AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
+ AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
if (AAResult == AliasAnalysis::NoAlias)
return false;
}
@@ -7070,15 +7422,17 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
/// node. Returns true if the operand was a load.
bool DAGCombiner::FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size,
- const Value *&SrcValue,
+ const Value *&SrcValue,
int &SrcValueOffset,
- unsigned &SrcValueAlign) const {
+ unsigned &SrcValueAlign,
+ const MDNode *&TBAAInfo) const {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
Size = LD->getMemoryVT().getSizeInBits() >> 3;
SrcValue = LD->getSrcValue();
SrcValueOffset = LD->getSrcValueOffset();
SrcValueAlign = LD->getOriginalAlignment();
+ TBAAInfo = LD->getTBAAInfo();
return true;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
@@ -7086,6 +7440,7 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
SrcValue = ST->getSrcValue();
SrcValueOffset = ST->getSrcValueOffset();
SrcValueAlign = ST->getOriginalAlignment();
+ TBAAInfo = ST->getTBAAInfo();
} else {
llvm_unreachable("FindAliasInfo expected a memory operand");
}
@@ -7106,26 +7461,27 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
const Value *SrcValue;
int SrcValueOffset;
unsigned SrcValueAlign;
- bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
- SrcValueAlign);
+ const MDNode *SrcTBAAInfo;
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+ SrcValueAlign, SrcTBAAInfo);
// Starting off.
Chains.push_back(OriginalChain);
unsigned Depth = 0;
-
+
// Look at each chain and determine if it is an alias. If so, add it to the
// aliases list. If not, then continue up the chain looking for the next
// candidate.
while (!Chains.empty()) {
SDValue Chain = Chains.back();
Chains.pop_back();
-
- // For TokenFactor nodes, look at each operand and only continue up the
- // chain until we find two aliases. If we've seen two aliases, assume we'll
+
+ // For TokenFactor nodes, look at each operand and only continue up the
+ // chain until we find two aliases. If we've seen two aliases, assume we'll
// find more and revert to original chain since the xform is unlikely to be
// profitable.
- //
- // FIXME: The depth check could be made to return the last non-aliasing
+ //
+ // FIXME: The depth check could be made to return the last non-aliasing
// chain we found before we hit a tokenfactor rather than the original
// chain.
if (Depth > 6 || Aliases.size() == 2) {
@@ -7151,15 +7507,18 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
const Value *OpSrcValue;
int OpSrcValueOffset;
unsigned OpSrcValueAlign;
+ const MDNode *OpSrcTBAAInfo;
bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
OpSrcValue, OpSrcValueOffset,
- OpSrcValueAlign);
+ OpSrcValueAlign,
+ OpSrcTBAAInfo);
// If chain is alias then stop here.
if (!(IsLoad && IsOpLoad) &&
isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+ SrcTBAAInfo,
OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
- OpSrcValueAlign)) {
+ OpSrcValueAlign, OpSrcTBAAInfo)) {
Aliases.push_back(Chain);
} else {
// Look further up the chain.
@@ -7206,9 +7565,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
// If a single operand then chain to it. We don't need to revisit it.
return Aliases[0];
}
-
+
// Construct a custom tailored token factor.
- return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
&Aliases[0], Aliases.size());
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index a4eed71..490b857 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -55,6 +55,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
/// startNewBlock - Set the current block to which generated machine
@@ -197,12 +198,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
}
-
+
// If target-independent code couldn't handle the value, give target-specific
// code a try.
if (!Reg && isa<Constant>(V))
Reg = TargetMaterializeConstant(cast<Constant>(V));
-
+
// Don't cache constant materializations in the general ValueMap.
// To do so would require tracking what uses they dominate.
if (Reg != 0) {
@@ -234,7 +235,7 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
LocalValueMap[I] = Reg;
return Reg;
}
-
+
unsigned &AssignedReg = FuncInfo.ValueMap[I];
if (AssignedReg == 0)
// Use the new register.
@@ -414,7 +415,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
// If this is a constant subscript, handle it quickly.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
- uint64_t Offs =
+ uint64_t Offs =
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
if (N == 0)
@@ -423,7 +424,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
NIsKill = true;
continue;
}
-
+
// N = N + Idx * ElementSize;
uint64_t ElementSize = TD.getTypeAllocSize(Ty);
std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
@@ -467,16 +468,28 @@ bool FastISel::SelectCall(const User *I) {
return true;
const Value *Address = DI->getAddress();
- if (!Address)
+ if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
return true;
- if (isa<UndefValue>(Address))
- return true;
- const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
- // Don't handle byval struct arguments or VLAs, for example.
- if (!AI)
- // Building the map above is target independent. Generating DBG_VALUE
- // inline is target dependent; do this now.
- (void)TargetSelectInstruction(cast<Instruction>(I));
+
+ unsigned Reg = 0;
+ unsigned Offset = 0;
+ if (const Argument *Arg = dyn_cast<Argument>(Address)) {
+ if (Arg->hasByValAttr()) {
+ // Byval arguments' frame index is recorded during argument lowering.
+ // Use this info directly.
+ Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+ if (Offset)
+ Reg = TRI.getFrameRegister(*FuncInfo.MF);
+ }
+ }
+ if (!Reg)
+ Reg = getRegForValue(Address);
+
+ if (Reg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(Reg, RegState::Debug).addImm(Offset)
+ .addMetadata(DI->getVariable());
return true;
}
case Intrinsic::dbg_value: {
@@ -505,11 +518,8 @@ bool FastISel::SelectCall(const User *I) {
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
- // Insert an undef so we can see what we dropped.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
- .addReg(0U).addImm(DI->getOffset())
- .addMetadata(DI->getVariable());
- }
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ }
return true;
}
case Intrinsic::eh_exception: {
@@ -582,12 +592,12 @@ bool FastISel::SelectCall(const User *I) {
bool FastISel::SelectCast(const User *I, unsigned Opcode) {
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
-
+
if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
DstVT == MVT::Other || !DstVT.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
-
+
// Check if the destination type is legal. Or as a special case,
// it may be i1 if we're doing a truncate because that's
// easy and somewhat common.
@@ -629,7 +639,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
InputReg, InputRegIsKill);
if (!ResultReg)
return false;
-
+
UpdateValueMap(I, ResultReg);
return true;
}
@@ -644,23 +654,23 @@ bool FastISel::SelectBitCast(const User *I) {
return true;
}
- // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators.
+ // Bitcasts of other values become reg-reg copies or BITCAST operators.
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
-
+
if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
DstVT == MVT::Other || !DstVT.isSimple() ||
!TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
// Unhandled type. Halt "fast" selection and bail.
return false;
-
+
unsigned Op0 = getRegForValue(I->getOperand(0));
if (Op0 == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
-
+
// First, try to perform the bitcast by inserting a reg-reg copy.
unsigned ResultReg = 0;
if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
@@ -673,15 +683,15 @@ bool FastISel::SelectBitCast(const User *I) {
ResultReg).addReg(Op0);
}
}
-
- // If the reg-reg copy failed, select a BIT_CONVERT opcode.
+
+ // If the reg-reg copy failed, select a BITCAST opcode.
if (!ResultReg)
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
- ISD::BIT_CONVERT, Op0, Op0IsKill);
-
+ ISD::BITCAST, Op0, Op0IsKill);
+
if (!ResultReg)
return false;
-
+
UpdateValueMap(I, ResultReg);
return true;
}
@@ -753,7 +763,7 @@ FastISel::SelectFNeg(const User *I) {
return false;
unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
- ISD::BIT_CONVERT, OpReg, OpRegIsKill);
+ ISD::BITCAST, OpReg, OpRegIsKill);
if (IntReg == 0)
return false;
@@ -765,7 +775,7 @@ FastISel::SelectFNeg(const User *I) {
return false;
ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
- ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true);
+ ISD::BITCAST, IntResultReg, /*Kill=*/true);
if (ResultReg == 0)
return false;
@@ -845,10 +855,10 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
// Dynamic-sized alloca is not handled yet.
return false;
-
+
case Instruction::Call:
return SelectCall(I);
-
+
case Instruction::BitCast:
return SelectBitCast(I);
@@ -911,7 +921,7 @@ unsigned FastISel::FastEmit_r(MVT, MVT,
return 0;
}
-unsigned FastISel::FastEmit_rr(MVT, MVT,
+unsigned FastISel::FastEmit_rr(MVT, MVT,
unsigned,
unsigned /*Op0*/, bool /*Op0IsKill*/,
unsigned /*Op1*/, bool /*Op1IsKill*/) {
@@ -1139,7 +1149,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
-
+
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
else {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 5ef6404..98582ba 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -29,7 +29,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 61c2a90..e309def 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -31,11 +31,11 @@
using namespace llvm;
/// CountResults - The results of target nodes have register or immediate
-/// operands first, then an optional chain, and optional flag operands (which do
+/// operands first, then an optional chain, and optional glue operands (which do
/// not go into the resulting MachineInstr).
unsigned InstrEmitter::CountResults(SDNode *Node) {
unsigned N = Node->getNumValues();
- while (N && Node->getValueType(N - 1) == MVT::Flag)
+ while (N && Node->getValueType(N - 1) == MVT::Glue)
--N;
if (N && Node->getValueType(N - 1) == MVT::Other)
--N; // Skip over chain result.
@@ -43,12 +43,12 @@ unsigned InstrEmitter::CountResults(SDNode *Node) {
}
/// CountOperands - The inputs to target nodes have any actual inputs first,
-/// followed by an optional chain operand, then an optional flag operand.
+/// followed by an optional chain operand, then an optional glue operand.
/// Compute the number of actual operands that will go into the resulting
/// MachineInstr.
unsigned InstrEmitter::CountOperands(SDNode *Node) {
unsigned N = Node->getNumOperands();
- while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
--N;
if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
--N; // Ignore chain if it exists.
@@ -67,7 +67,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
return;
}
@@ -96,7 +96,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (Op.getNode() != Node || Op.getResNo() != ResNo)
continue;
EVT VT = Node->getValueType(Op.getResNo());
- if (VT == MVT::Other || VT == MVT::Flag)
+ if (VT == MVT::Other || VT == MVT::Glue)
continue;
Match = false;
if (User->isMachineOpcode()) {
@@ -150,7 +150,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -224,7 +224,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
}
@@ -264,8 +264,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
DenseMap<SDValue, unsigned> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned) {
assert(Op.getValueType() != MVT::Other &&
- Op.getValueType() != MVT::Flag &&
- "Chain and flag operands should occur at end of operand list!");
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
// Get/emit the operand.
unsigned VReg = getVR(Op, VRBaseMap);
assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
@@ -377,8 +377,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
BA->getTargetFlags()));
} else {
assert(Op.getValueType() != MVT::Other &&
- Op.getValueType() != MVT::Flag &&
- "Chain and flag operands should occur at end of operand list!");
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
IsDebug, IsClone, IsCloned);
}
@@ -428,31 +428,47 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// Figure out the register class to create for the destreg.
unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
- const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
- const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
- assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
-
- // Figure out the register class to create for the destreg.
- // Note that if we're going to directly use an existing register,
- // it must be precisely the required class, and not a subclass
- // thereof.
- if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
- // Create the reg
- assert(SRC && "Couldn't find source register class");
- VRBase = MRI->createVirtualRegister(SRC);
- }
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ unsigned SrcReg, DstReg, DefSubIdx;
+ if (DefMI &&
+ TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+ SubIdx == DefSubIdx) {
+ // Optimize these:
+ // r1025 = s/zext r1024, 4
+ // r1026 = extract_subreg r1025, 4
+ // to a copy
+ // r1026 = copy r1024
+ const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg);
+ VRBase = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+ } else {
+ const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+ assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+ // Figure out the register class to create for the destreg.
+ // Note that if we're going to directly use an existing register,
+ // it must be precisely the required class, and not a subclass
+ // thereof.
+ if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+ // Create the reg
+ assert(SRC && "Couldn't find source register class");
+ VRBase = MRI->createVirtualRegister(SRC);
+ }
- // Create the extract_subreg machine instruction.
- MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
- TII->get(TargetOpcode::COPY), VRBase);
+ // Create the extract_subreg machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase);
- // Add source, and subreg index
- AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
- IsClone, IsCloned);
- assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) &&
- "Cannot yet extract from physregs");
- MI->getOperand(1).setSubReg(SubIdx);
- MBB->insert(InsertPos, MI);
+ // Add source, and subreg index
+ AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&&
+ "Cannot yet extract from physregs");
+ MI->getOperand(1).setSubReg(SubIdx);
+ MBB->insert(InsertPos, MI);
+ }
} else if (Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
SDValue N0 = Node->getOperand(0);
@@ -496,7 +512,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -518,7 +534,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -543,9 +559,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
const TargetRegisterClass *SRC =
TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
- if (!SRC)
- llvm_unreachable("Invalid subregister index in REG_SEQUENCE");
- if (SRC != RC) {
+ if (SRC && SRC != RC) {
MRI->setRegClass(NewVReg, SRC);
RC = SRC;
}
@@ -557,7 +571,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
MBB->insert(InsertPos, MI);
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -673,10 +687,10 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// The MachineInstr constructor adds implicit-def operands. Scan through
// these to determine which are dead.
if (MI->getNumOperands() != 0 &&
- Node->getValueType(Node->getNumValues()-1) == MVT::Flag) {
+ Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
// First, collect all used registers.
SmallVector<unsigned, 8> UsedRegs;
- for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser())
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
if (F->getOpcode() == ISD::CopyFromReg)
UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
else {
@@ -689,7 +703,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
unsigned Reg = R->getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
UsedRegs.push_back(Reg);
}
}
@@ -721,20 +735,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// hook knows where in the block to insert the replacement code.
MBB->insert(InsertPos, MI);
- if (II.usesCustomInsertionHook()) {
- // Insert this instruction into the basic block using a target
- // specific inserter which may returns a new basic block.
- bool AtEnd = InsertPos == MBB->end();
- MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
- if (NewMBB != MBB) {
- if (AtEnd)
- InsertPos = NewMBB->end();
- MBB = NewMBB;
- }
- return;
- }
-
- // Additional results must be an physical register def.
+ // Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
@@ -742,17 +743,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
// If there are no uses, mark the register as dead now, so that
// MachineLICM/Sink can see that it's dead. Don't do this if the
- // node has a Flag value, for the benefit of targets still using
- // Flag for values in physregs.
- else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+ // node has a Glue value, for the benefit of targets still using
+ // Glue for values in physregs.
+ else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
MI->addRegisterDead(Reg, TRI);
}
}
// If the instruction has implicit defs and the node doesn't, mark the
- // implicit def as dead. If the node has any flag outputs, we don't do this
- // because we don't know what implicit defs are being used by flagged nodes.
- if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+ // implicit def as dead. If the node has any glue outputs, we don't do this
+ // because we don't know what implicit defs are being used by glued nodes.
+ if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
if (const unsigned *IDList = II.getImplicitDefs()) {
for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
i != e; ++i)
@@ -808,8 +809,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::INLINEASM: {
unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
- --NumOps; // Ignore the flag operand.
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
// Create the inline asm machine instruction.
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
@@ -820,11 +821,11 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
MI->addOperand(MachineOperand::CreateES(AsmStr));
- // Add the isAlignStack bit.
- int64_t isAlignStack =
- cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))->
+ // Add the HasSideEffect and isAlignStack bits.
+ int64_t ExtraInfo =
+ cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
getZExtValue();
- MI->addOperand(MachineOperand::CreateImm(isAlignStack));
+ MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
// Add all of the operand registers to the instruction.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2981cd3..49c862c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,14 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -65,11 +66,6 @@ class SelectionDAGLegalize {
/// against each other, including inserted libcalls.
SDValue LastCALLSEQ_END;
- /// IsLegalizingCall - This member is used *only* for purposes of providing
- /// helpful assertions that a libcall isn't created while another call is
- /// being legalized (which could lead to non-serialized call sequences).
- bool IsLegalizingCall;
-
enum LegalizeAction {
Legal, // The target natively supports this operation.
Promote, // This operation should be executed in a larger type.
@@ -91,6 +87,9 @@ class SelectionDAGLegalize {
// If someone requests legalization of the new node, return itself.
if (From != To)
LegalizedNodes.insert(std::make_pair(To, To));
+
+ // Transfer SDDbgValues.
+ DAG.TransferDbgValues(From, To);
}
public:
@@ -172,6 +171,7 @@ private:
SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+ SDValue ExpandInsertToVectorThroughStack(SDValue Op);
SDValue ExpandVectorBuildThroughStack(SDNode* Node);
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
@@ -224,7 +224,6 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
void SelectionDAGLegalize::LegalizeDAG() {
LastCALLSEQ_END = DAG.getEntryNode();
- IsLegalizingCall = false;
// The legalize process is inherently a bottom-up recursive process (users
// legalize their uses before themselves). Given infinite stack space, we
@@ -251,9 +250,16 @@ void SelectionDAGLegalize::LegalizeDAG() {
/// FindCallEndFromCallStart - Given a chained node that is part of a call
/// sequence, find the CALLSEQ_END node that terminates the call sequence.
-static SDNode *FindCallEndFromCallStart(SDNode *Node) {
- if (Node->getOpcode() == ISD::CALLSEQ_END)
- return Node;
+static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
+ // Nested CALLSEQ_START/END constructs aren't yet legal,
+ // but we can DTRT and handle them correctly here.
+ if (Node->getOpcode() == ISD::CALLSEQ_START)
+ depth++;
+ else if (Node->getOpcode() == ISD::CALLSEQ_END) {
+ depth--;
+ if (depth == 0)
+ return Node;
+ }
if (Node->use_empty())
return 0; // No CallSeqEnd
@@ -283,7 +289,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) {
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
if (User->getOperand(i) == TheChain)
- if (SDNode *Result = FindCallEndFromCallStart(User))
+ if (SDNode *Result = FindCallEndFromCallStart(User, depth))
return Result;
}
return 0;
@@ -292,12 +298,26 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) {
/// FindCallStartFromCallEnd - Given a chained node that is part of a call
/// sequence, find the CALLSEQ_START node that initiates the call sequence.
static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+ int nested = 0;
assert(Node && "Didn't find callseq_start for a call??");
- if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
-
- assert(Node->getOperand(0).getValueType() == MVT::Other &&
- "Node doesn't have a token chain argument!");
- return FindCallStartFromCallEnd(Node->getOperand(0).getNode());
+ while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
+ Node = Node->getOperand(0).getNode();
+ assert(Node->getOperand(0).getValueType() == MVT::Other &&
+ "Node doesn't have a token chain argument!");
+ switch (Node->getOpcode()) {
+ default:
+ break;
+ case ISD::CALLSEQ_START:
+ if (!nested)
+ return Node;
+ nested--;
+ break;
+ case ISD::CALLSEQ_END:
+ nested++;
+ break;
+ }
+ }
+ return 0;
}
/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
@@ -377,12 +397,12 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
if (Extend)
- return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl,
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
DAG.getEntryNode(),
- CPIdx, PseudoSourceValue::getConstantPool(),
- 0, VT, false, false, Alignment);
+ CPIdx, MachinePointerInfo::getConstantPool(),
+ VT, false, false, Alignment);
return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0, false, false,
+ MachinePointerInfo::getConstantPool(), false, false,
Alignment);
}
@@ -395,7 +415,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
SDValue Val = ST->getValue();
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
- int SVOffset = ST->getSrcValueOffset();
DebugLoc dl = ST->getDebugLoc();
if (ST->getMemoryVT().isFloatingPoint() ||
ST->getMemoryVT().isVector()) {
@@ -404,10 +423,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Expand to a bitconvert of the value to the integer type of the
// same size, then a (misaligned) int store.
// FIXME: Does not handle truncating floating point stores!
- SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
- return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
- SVOffset, ST->isVolatile(), ST->isNonTemporal(),
- Alignment);
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+ return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
} else {
// Do a (aligned) store to a stack slot, then copy from the stack slot
// to the final destination using (unaligned) integer loads and stores.
@@ -425,8 +443,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Perform the original store, only redirected to the stack slot.
SDValue Store = DAG.getTruncStore(Chain, dl,
- Val, StackPtr, NULL, 0, StoredVT,
- false, false, 0);
+ Val, StackPtr, MachinePointerInfo(),
+ StoredVT, false, false, 0);
SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
SmallVector<SDValue, 8> Stores;
unsigned Offset = 0;
@@ -434,11 +452,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Do all but one copies using the full register width.
for (unsigned i = 1; i < NumRegs; i++) {
// Load one integer register's worth from the stack slot.
- SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0,
+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+ MachinePointerInfo(),
false, false, 0);
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
- ST->getSrcValue(), SVOffset + Offset,
+ ST->getPointerInfo().getWithOffset(Offset),
ST->isVolatile(), ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset)));
// Increment the pointers.
@@ -455,11 +474,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
8 * (StoredBytes - Offset));
// Load from the stack slot.
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr,
- NULL, 0, MemVT, false, false, 0);
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
- ST->getSrcValue(), SVOffset + Offset,
+ ST->getPointerInfo()
+ .getWithOffset(Offset),
MemVT, ST->isVolatile(),
ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset)));
@@ -484,13 +505,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Store the two parts
SDValue Store1, Store2;
Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
- ST->getSrcValue(), SVOffset, NewStoredVT,
+ ST->getPointerInfo(), NewStoredVT,
ST->isVolatile(), ST->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
Alignment = MinAlign(Alignment, IncrementSize);
Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
- ST->getSrcValue(), SVOffset + IncrementSize,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
Alignment);
@@ -501,7 +522,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
static
SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
const TargetLowering &TLI) {
- int SVOffset = LD->getSrcValueOffset();
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
EVT VT = LD->getValueType(0);
@@ -512,74 +532,75 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
if (TLI.isTypeLegal(intVT)) {
// Expand to a (misaligned) integer load of the same size,
// then bitconvert to floating point or vector.
- SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset, LD->isVolatile(),
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
- SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
if (VT.isFloatingPoint() && LoadedVT != VT)
Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
SDValue Ops[] = { Result, Chain };
return DAG.getMergeValues(Ops, 2, dl);
- } else {
- // Copy the value to a (aligned) stack slot using (unaligned) integer
- // loads and stores, then do a (aligned) load from the stack slot.
- EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
- unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
- unsigned RegBytes = RegVT.getSizeInBits() / 8;
- unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
-
- // Make sure the stack slot is also aligned for the register type.
- SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
-
- SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
- SmallVector<SDValue, 8> Stores;
- SDValue StackPtr = StackBase;
- unsigned Offset = 0;
-
- // Do all but one copies using the full register width.
- for (unsigned i = 1; i < NumRegs; i++) {
- // Load one integer register's worth from the original location.
- SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset + Offset, LD->isVolatile(),
- LD->isNonTemporal(),
- MinAlign(LD->getAlignment(), Offset));
- // Follow the load with a store to the stack slot. Remember the store.
- Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
- NULL, 0, false, false, 0));
- // Increment the pointers.
- Offset += RegBytes;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- Increment);
- }
+ }
- // The last copy may be partial. Do an extending load.
- EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
- 8 * (LoadedBytes - Offset));
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr,
- LD->getSrcValue(), SVOffset + Offset,
- MemVT, LD->isVolatile(),
- LD->isNonTemporal(),
- MinAlign(LD->getAlignment(), Offset));
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ MinAlign(LD->getAlignment(), Offset));
// Follow the load with a store to the stack slot. Remember the store.
- // On big-endian machines this requires a truncating store to ensure
- // that the bits end up in the right place.
- Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
- NULL, 0, MemVT, false, false, 0));
-
- // The order of the stores doesn't matter - say it with a TokenFactor.
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
- Stores.size());
-
- // Finally, perform the original load only redirected to the stack slot.
- Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase,
- NULL, 0, LoadedVT, false, false, 0);
-
- // Callers expect a MERGE_VALUES node.
- SDValue Ops[] = { Load, TF };
- return DAG.getMergeValues(Ops, 2, dl);
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), false, false, 0));
+ // Increment the pointers.
+ Offset += RegBytes;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
}
+
+ // The last copy may be partial. Do an extending load.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (LoadedBytes - Offset));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), MemVT,
+ false, false, 0));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ MachinePointerInfo(), LoadedVT, false, false, 0);
+
+ // Callers expect a MERGE_VALUES node.
+ SDValue Ops[] = { Load, TF };
+ return DAG.getMergeValues(Ops, 2, dl);
}
assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
"Unaligned load of unsupported type.");
@@ -602,22 +623,24 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// Load the value in two parts
SDValue Lo, Hi;
if (TLI.isLittleEndian()) {
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset, NewLoadedVT, LD->isVolatile(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
- Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
} else {
- Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset, NewLoadedVT, LD->isVolatile(),
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
}
@@ -660,7 +683,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
// Store the vector.
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
+ MachinePointerInfo::getFixedStack(SPFI),
false, false, 0);
// Truncate or zero extend offset to target pointer type.
@@ -671,13 +694,11 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
// Store the scalar value.
- Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
- PseudoSourceValue::getFixedStack(SPFI), 0, EltVT,
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
false, false, 0);
// Load the updated vector.
return DAG.getLoad(VT, dl, Ch, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
- false, false, 0);
+ MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
}
@@ -719,7 +740,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Tmp1 = ST->getChain();
SDValue Tmp2 = ST->getBasePtr();
SDValue Tmp3;
- int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -730,29 +750,34 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
Tmp3 = DAG.getConstant(CFP->getValueAPF().
bitcastToAPInt().zextOrTrunc(32),
MVT::i32);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal, Alignment);
- } else if (CFP->getValueType(0) == MVT::f64) {
+ return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (CFP->getValueType(0) == MVT::f64) {
// If this target supports 64-bit registers, do a single 64-bit store.
if (getTypeAction(MVT::i64) == Legal) {
Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), MVT::i64);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal, Alignment);
- } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+ return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
// Otherwise, if the target supports 32-bit registers, use 2 32-bit
// stores. If the target supports neither 32- nor 64-bits, this
// xform is certainly not worth it.
const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
- SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
+ SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
- Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal, Alignment);
+ Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(4));
- Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
+ Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2,
+ ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
@@ -792,7 +817,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
bool isCustom = false;
// Figure out the correct action; the way to query this varies by opcode
- TargetLowering::LegalizeAction Action;
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
bool SimpleFinishLegalizing = true;
switch (Node->getOpcode()) {
case ISD::INTRINSIC_W_CHAIN:
@@ -860,6 +885,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::FRAME_TO_ARGS_OFFSET:
case ISD::EH_SJLJ_SETJMP:
case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_DISPATCHSETUP:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -996,6 +1022,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
break;
case ISD::CALLSEQ_START: {
+ static int depth = 0;
SDNode *CallEnd = FindCallEndFromCallStart(Node);
// Recursively Legalize all of the inputs of the call end that do not lead
@@ -1013,7 +1040,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Merge in the last call to ensure that this call starts after the last
// call ended.
- if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+ if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) {
Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Tmp1, LastCALLSEQ_END);
Tmp1 = LegalizeOp(Tmp1);
@@ -1036,14 +1063,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// sequence have been legalized, legalize the call itself. During this
// process, no libcalls can/will be inserted, guaranteeing that no calls
// can overlap.
- assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
+
+ SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ;
// Note that we are selecting this call!
LastCALLSEQ_END = SDValue(CallEnd, 0);
- IsLegalizingCall = true;
+ depth++;
// Legalize the call, starting from the CALLSEQ_END.
LegalizeOp(LastCALLSEQ_END);
- assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+ depth--;
+ assert(depth >= 0 && "Un-matched CALLSEQ_START?");
+ if (depth > 0)
+ LastCALLSEQ_END = Saved_LastCALLSEQ_END;
return Result;
}
case ISD::CALLSEQ_END:
@@ -1062,7 +1093,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
// Do not try to legalize the target-specific arguments (#1+), except for
// an optional flag input.
- if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+ if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
if (Tmp1 != Node->getOperand(0)) {
SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
Ops[0] = Tmp1;
@@ -1082,10 +1113,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result.getResNo());
}
}
- assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
// This finishes up call legalization.
- IsLegalizingCall = false;
-
// If the CALLSEQ_END node has a flag, remember that we legalized it.
AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
if (Node->getNumValues() == 2)
@@ -1136,11 +1164,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Change base type to a different vector type.
EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
- Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
- LD->getSrcValueOffset(),
+ Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
- Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
+ Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
Tmp4 = LegalizeOp(Tmp1.getValue(1));
break;
}
@@ -1150,227 +1177,224 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
AddLegalizedOperand(SDValue(Node, 0), Tmp3);
AddLegalizedOperand(SDValue(Node, 1), Tmp4);
return Op.getResNo() ? Tmp4 : Tmp3;
- } else {
- EVT SrcVT = LD->getMemoryVT();
- unsigned SrcWidth = SrcVT.getSizeInBits();
- int SVOffset = LD->getSrcValueOffset();
- unsigned Alignment = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
-
- if (SrcWidth != SrcVT.getStoreSizeInBits() &&
- // Some targets pretend to have an i1 loading operation, and actually
- // load an i8. This trick is correct for ZEXTLOAD because the top 7
- // bits are guaranteed to be zero; it helps the optimizers understand
- // that these bits are zero. It is also useful for EXTLOAD, since it
- // tells the optimizers that those bits are undefined. It would be
- // nice to have an effective generic way of getting these benefits...
- // Until such a way is found, don't insist on promoting i1 here.
- (SrcVT != MVT::i1 ||
- TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
- // Promote to a byte-sized load if not loading an integral number of
- // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
- unsigned NewWidth = SrcVT.getStoreSizeInBits();
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
- SDValue Ch;
-
- // The extra bits are guaranteed to be zero, since we stored them that
- // way. A zext load from NVT thus automatically gives zext from SrcVT.
-
- ISD::LoadExtType NewExtType =
- ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
-
- Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl,
- Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
- NVT, isVolatile, isNonTemporal, Alignment);
-
- Ch = Result.getValue(1); // The chain.
-
- if (ExtType == ISD::SEXTLOAD)
- // Having the top bits zero doesn't help when sign extending.
- Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
- // All the top bits are guaranteed to be zero - inform the optimizers.
- Result = DAG.getNode(ISD::AssertZext, dl,
- Result.getValueType(), Result,
- DAG.getValueType(SrcVT));
-
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
- } else if (SrcWidth & (SrcWidth - 1)) {
- // If not loading a power-of-2 number of bits, expand as two loads.
- assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned RoundWidth = 1 << Log2_32(SrcWidth);
- assert(RoundWidth < SrcWidth);
- unsigned ExtraWidth = SrcWidth - RoundWidth;
- assert(ExtraWidth < RoundWidth);
- assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
- "Load size not an integral number of bytes!");
- EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
- EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
- SDValue Lo, Hi, Ch;
- unsigned IncrementSize;
+ }
- if (TLI.isLittleEndian()) {
- // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
- // Load the bottom RoundWidth bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl,
- Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
- isNonTemporal, Alignment);
-
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset + IncrementSize,
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+ dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
- // Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else {
+ switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp2, LD->getOffset()),
+ Result.getResNo());
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+
+ if (isCustom) {
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.getNode()) {
+ Tmp1 = LegalizeOp(Tmp3);
+ Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ }
} else {
- // Big endian - avoid unaligned loads.
- // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
- // Load the top RoundWidth bits.
- Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
- isNonTemporal, Alignment);
-
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
- Node->getValueType(0), dl, Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset + IncrementSize,
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
-
- // Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
- }
-
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
- } else {
- switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
- default: assert(0 && "This action is not supported yet!");
- case TargetLowering::Custom:
- isCustom = true;
- // FALLTHROUGH
- case TargetLowering::Legal:
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp2, LD->getOffset()),
- Result.getResNo());
- Tmp1 = Result.getValue(0);
- Tmp2 = Result.getValue(1);
-
- if (isCustom) {
- Tmp3 = TLI.LowerOperation(Result, DAG);
- if (Tmp3.getNode()) {
- Tmp1 = LegalizeOp(Tmp3);
- Tmp2 = LegalizeOp(Tmp3.getValue(1));
- }
- } else {
- // If this is an unaligned load and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- const Type *Ty =
- LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment =
- TLI.getTargetData()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
- DAG, TLI);
- Tmp1 = Result.getOperand(0);
- Tmp2 = Result.getOperand(1);
- Tmp1 = LegalizeOp(Tmp1);
- Tmp2 = LegalizeOp(Tmp2);
- }
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ const Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+ DAG, TLI);
+ Tmp1 = Result.getOperand(0);
+ Tmp2 = Result.getOperand(1);
+ Tmp1 = LegalizeOp(Tmp1);
+ Tmp2 = LegalizeOp(Tmp2);
}
}
- break;
- case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
- SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
- LD->getSrcValueOffset(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- unsigned ExtendOp;
- switch (ExtType) {
- case ISD::EXTLOAD:
- ExtendOp = (SrcVT.isFloatingPoint() ?
- ISD::FP_EXTEND : ISD::ANY_EXTEND);
- break;
- case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
- default: llvm_unreachable("Unexpected extend load type!");
- }
- Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Load.getValue(1));
+ }
+ break;
+ case TargetLowering::Expand:
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
+ LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
}
- // FIXME: This does not work for vectors on most targets. Sign- and
- // zero-extend operations are currently folded into extending loads,
- // whether they are legal or not, and then we end up here without any
- // support for legalizing them.
- assert(ExtType != ISD::EXTLOAD &&
- "EXTLOAD should always be supported!");
- // Turn the unsupported load into an EXTLOAD followed by an explicit
- // zero/sign extend inreg.
- Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl,
- Tmp1, Tmp2, LD->getSrcValue(),
- LD->getSrcValueOffset(), SrcVT,
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- SDValue ValRes;
- if (ExtType == ISD::SEXTLOAD)
- ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else
- ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Load.getValue(1));
break;
}
+ // FIXME: This does not work for vectors on most targets. Sign- and
+ // zero-extend operations are currently folded into extending loads,
+ // whether they are legal or not, and then we end up here without any
+ // support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ break;
}
-
- // Since loads produce two values, make sure to remember that we legalized
- // both of them.
- AddLegalizedOperand(SDValue(Node, 0), Tmp1);
- AddLegalizedOperand(SDValue(Node, 1), Tmp2);
- return Op.getResNo() ? Tmp2 : Tmp1;
}
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+ AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+ return Op.getResNo() ? Tmp2 : Tmp1;
}
case ISD::STORE: {
StoreSDNode *ST = cast<StoreSDNode>(Node);
Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.
Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.
- int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -1408,10 +1432,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
break;
case TargetLowering::Promote:
assert(VT.isVector() && "Unknown legal promote case!");
- Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ Tmp3 = DAG.getNode(ISD::BITCAST, dl,
TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getSrcValue(), SVOffset, isVolatile,
+ ST->getPointerInfo(), isVolatile,
isNonTemporal, Alignment);
break;
}
@@ -1430,9 +1454,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
StVT.getStoreSizeInBits());
Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
- Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, NVT, isVolatile, isNonTemporal,
- Alignment);
+ Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1450,8 +1473,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (TLI.isLittleEndian()) {
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
// Store the bottom RoundWidth bits.
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, RoundVT,
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ RoundVT,
isVolatile, isNonTemporal, Alignment);
// Store the remaining ExtraWidth bits.
@@ -1460,9 +1483,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
- SVOffset + IncrementSize, ExtraVT, isVolatile,
- isNonTemporal,
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
} else {
// Big endian - avoid unaligned stores.
@@ -1470,17 +1493,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Store the top RoundWidth bits.
Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
- SVOffset, RoundVT, isVolatile, isNonTemporal,
- Alignment);
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
+ RoundVT, isVolatile, isNonTemporal, Alignment);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset + IncrementSize, ExtraVT, isVolatile,
- isNonTemporal,
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
}
@@ -1514,9 +1536,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// TRUNCSTORE:i16 i32 -> STORE i16
assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
- Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal,
- Alignment);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
break;
}
}
@@ -1543,8 +1564,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
DebugLoc dl = Op.getDebugLoc();
// Store the value to a temporary stack slot, then LOAD the returned part.
SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Add the offset to the index.
unsigned EltSize =
@@ -1560,12 +1581,56 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
if (Op.getValueType().isVector())
- return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0,
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
false, false, 0);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ MachinePointerInfo(),
+ Vec.getValueType().getVectorElementType(),
+ false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Part = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // First store the whole vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ false, false, 0);
+
+ // Then store the inserted part.
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
else
- return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr,
- NULL, 0, Vec.getValueType().getVectorElementType(),
- false, false, 0);
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ StackPtr);
+
+ // Store the subvector.
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Finally, load the updated vector.
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+ false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1578,7 +1643,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
DebugLoc dl = Node->getDebugLoc();
SDValue FIPtr = DAG.CreateStackTemporary(VT);
int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(FI);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
// Emit a store of each element to the stack slot.
SmallVector<SDValue, 8> Stores;
@@ -1597,11 +1662,13 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
// element type, only store the bits necessary.
if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
- Node->getOperand(i), Idx, SV, Offset,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
EltVT, false, false, 0));
} else
Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
- Node->getOperand(i), Idx, SV, Offset,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
false, false, 0));
}
@@ -1613,7 +1680,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
StoreChain = DAG.getEntryNode();
// Result is a load from the stack slot.
- return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0);
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1628,7 +1695,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
if (isTypeLegal(IVT)) {
// Convert to an integer with the same sign bit.
- SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
+ SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
} else {
// Store the float to memory, then load the sign part out as an integer.
MVT LoadTy = TLI.getPointerTy();
@@ -1636,12 +1703,13 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
// Then store the float to it.
SDValue Ch =
- DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, NULL, 0,
+ DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
false, false, 0);
if (TLI.isBigEndian()) {
assert(FloatVT.isByteSized() && "Unsupported floating point type!");
// Load out a legal integer with the same sign bit as the float.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, NULL, 0, false, false, 0);
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
+ false, false, 0);
} else { // Little endian
SDValue LoadPtr = StackPtr;
// The float may be wider than the integer we are going to load. Advance
@@ -1651,7 +1719,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
LoadPtr, DAG.getIntPtrConstant(ByteOffset));
// Load a legal integer containing the sign bit.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, NULL, 0, false, false, 0);
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
+ false, false, 0);
// Move the sign bit to the top bit of the loaded integer.
unsigned BitShift = LoadTy.getSizeInBits() -
(FloatVT.getSizeInBits() - 8 * ByteOffset);
@@ -1694,7 +1763,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
if (Align > StackAlign)
SP = DAG.getNode(ISD::AND, dl, VT, SP,
DAG.getConstant(-(uint64_t)Align, VT));
@@ -1768,7 +1837,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
int SPFI = StackPtrFI->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
unsigned SlotSize = SlotVT.getSizeInBits();
@@ -1782,21 +1851,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
if (SrcSize > SlotSize)
Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
- SV, 0, SlotVT, false, false, SrcAlign);
+ PtrInfo, SlotVT, false, false, SrcAlign);
else {
assert(SrcSize == SlotSize && "Invalid store");
Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
- SV, 0, false, false, SrcAlign);
+ PtrInfo, false, false, SrcAlign);
}
// Result is a load from the stack slot.
if (SlotSize == DestSize)
- return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false,
- DestAlign);
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
+ false, false, DestAlign);
assert(SlotSize < DestSize && "Unknown extension!");
- return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT,
- false, false, DestAlign);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
+ PtrInfo, SlotVT, false, false, DestAlign);
}
SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1810,11 +1879,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
+ MachinePointerInfo::getFixedStack(SPFI),
Node->getValueType(0).getVectorElementType(),
false, false, 0);
return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
+ MachinePointerInfo::getFixedStack(SPFI),
false, false, 0);
}
@@ -1888,7 +1957,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, Alignment);
}
@@ -1924,7 +1993,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// and leave the Hi part unset.
SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
// The input chain to this libcall is the entry node of the function.
// Legalizing the call will automatically add the previous call to the
// dependence.
@@ -1945,12 +2013,20 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
// Splice the libcall in wherever FindInputOutputChains tells us to.
const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position.
+ bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
+ 0, TLI.getLibcallCallingConv(LC), isTailCall,
/*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
+ if (!CallInfo.second.getNode())
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+
// Legalize the call sequence, starting with the chain. This will advance
// the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
// was added by LowerCallTo (guaranteeing proper serialization of calls).
@@ -1964,7 +2040,6 @@ std::pair<SDValue, SDValue>
SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
SDValue InChain = Node->getOperand(0);
TargetLowering::ArgListTy Args;
@@ -1985,7 +2060,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
@@ -2064,16 +2139,17 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
}
// store the lo of the constructed double - based on integer input
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
- Op0Mapped, Lo, NULL, 0,
+ Op0Mapped, Lo, MachinePointerInfo(),
false, false, 0);
// initial hi portion of constructed double
SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
// store the hi of the constructed double - biased exponent
- SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0,
- false, false, 0);
+ SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
+ MachinePointerInfo(),
+ false, false, 0);
// load the constructed double
- SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0,
- false, false, 0);
+ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
+ MachinePointerInfo(), false, false, 0);
// FP constant to bias correct the final result
SDValue Bias = DAG.getConstantFP(isSigned ?
BitsToDouble(0x4330000080000000ULL) :
@@ -2116,17 +2192,40 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
DAG.getConstant(32, MVT::i64));
SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
- SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr);
- SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr);
+ SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
+ SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
TwoP84PlusTwoP52);
return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
}
- // Implementation of unsigned i64 to f32. This implementation has the
- // advantage of performing rounding correctly.
+ // Implementation of unsigned i64 to f32.
// TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundidf in compiler_rt.
+ if (!isSigned) {
+ SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
+
+ SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy());
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, MVT::i64);
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
+
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
+ SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ //pseudo-op, or, even better, for whole-function isel.
+ SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+ return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
+ }
+
+ // Otherwise, implement the fully general conversion.
EVT SHVT = TLI.getShiftAmountTy();
SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
@@ -2140,7 +2239,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
- ISD::SETUGE);
+ ISD::SETUGE);
SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
@@ -2155,7 +2254,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
DAG.getIntPtrConstant(0));
-
}
SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
@@ -2189,13 +2287,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue FudgeInReg;
if (DestVT == MVT::f32)
FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, Alignment);
else {
FudgeInReg =
- LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl,
+ LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
MVT::f32, false, false, Alignment));
}
@@ -2332,6 +2430,18 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
}
}
+/// SplatByte - Distribute ByteVal over NumBits bits.
+// FIXME: Move this helper to a common place.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+ APInt Val = APInt(NumBits, ByteVal);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ return Val;
+}
+
/// ExpandBitCount - Expand the specified bitcount instruction into operations.
///
SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
@@ -2339,26 +2449,45 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
switch (Opc) {
default: assert(0 && "Cannot expand this yet!");
case ISD::CTPOP: {
- static const uint64_t mask[6] = {
- 0x5555555555555555ULL, 0x3333333333333333ULL,
- 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
- 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
- };
EVT VT = Op.getValueType();
EVT ShVT = TLI.getShiftAmountTy();
- unsigned len = VT.getSizeInBits();
- for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
- //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
- unsigned EltSize = VT.isVector() ?
- VT.getVectorElementType().getSizeInBits() : len;
- SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT);
- SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
- Op = DAG.getNode(ISD::ADD, dl, VT,
- DAG.getNode(ISD::AND, dl, VT, Op, Tmp2),
- DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3),
- Tmp2));
- }
+ unsigned Len = VT.getSizeInBits();
+
+ assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+ "CTPOP not implemented for this type.");
+
+ // This is the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+ SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT);
+ SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT);
+ SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT);
+ SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(1, ShVT)),
+ Mask55));
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Op = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(2, ShVT)),
+ Mask33));
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Op = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(4, ShVT))),
+ Mask0F);
+ // v = (v * 0x01010101...) >> (Len - 8)
+ Op = DAG.getNode(ISD::SRL, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, ShVT));
+
return Op;
}
case ISD::CTLZ: {
@@ -2516,9 +2645,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::PREFETCH:
case ISD::VAEND:
case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_DISPATCHSETUP:
+ // If the target didn't expand these, there's nothing to do, so just
+ // preserve the chain and be done.
Results.push_back(Node->getOperand(0));
break;
case ISD::EH_SJLJ_SETJMP:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
Results.push_back(DAG.getConstant(0, MVT::i32));
Results.push_back(Node->getOperand(0));
break;
@@ -2527,7 +2661,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TargetLowering::ArgListTy Args;
std::pair<SDValue, SDValue> CallResult =
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false,
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__sync_synchronize",
TLI.getPointerTy()),
@@ -2538,7 +2673,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
// By default, atomic intrinsics are marked Legal and lowered. Targets
// which don't support them directly, however, may want libcalls, in which
// case they mark them Expand, and we get here.
- // FIXME: Unimplemented for now. Add libcalls.
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
@@ -2578,7 +2712,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TargetLowering::ArgListTy Args;
std::pair<SDValue, SDValue> CallResult =
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false,
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
Args, DAG, dl);
@@ -2586,7 +2721,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
break;
}
case ISD::FP_ROUND:
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
Node->getValueType(0), dl);
Results.push_back(Tmp1);
@@ -2637,8 +2772,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
SDValue True, False;
EVT VT = Node->getOperand(0).getValueType();
EVT NVT = Node->getValueType(0);
- const uint64_t zero[] = {0, 0};
- APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+ APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
APInt x = APInt::getSignBit(NVT.getSizeInBits());
(void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
Tmp1 = DAG.getConstantFP(apf, VT);
@@ -2662,8 +2796,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Tmp2 = Node->getOperand(1);
unsigned Align = Node->getConstantOperandVal(3);
- SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
- false, false, 0);
+ SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
SDValue VAList = VAListLoad;
if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -2674,7 +2808,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TLI.getPointerTy()));
VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
- DAG.getConstant(-Align,
+ DAG.getConstant(-(int64_t)Align,
TLI.getPointerTy()));
}
@@ -2684,10 +2818,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
TLI.getPointerTy()));
// Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0,
- false, false, 0);
+ Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
// Load the actual argument out of the pointer VAList
- Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0,
+ Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
false, false, 0));
Results.push_back(Results[0].getValue(1));
break;
@@ -2698,16 +2832,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
- Node->getOperand(2), VS, 0, false, false, 0);
- Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0,
- false, false, 0);
+ Node->getOperand(2), MachinePointerInfo(VS),
+ false, false, 0);
+ Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD), false, false, 0);
Results.push_back(Tmp1);
break;
}
case ISD::EXTRACT_VECTOR_ELT:
if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
// This must be an access of the only element. Return it.
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
Node->getOperand(0));
else
Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
@@ -2716,6 +2851,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::EXTRACT_SUBVECTOR:
Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
break;
+ case ISD::INSERT_SUBVECTOR:
+ Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+ break;
case ISD::CONCAT_VECTORS: {
Results.push_back(ExpandVectorBuildThroughStack(Node));
break;
@@ -3094,14 +3232,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
RHS);
TopHalf = BottomHalf.getValue(1);
- } else {
- // FIXME: We should be able to fall back to a libcall with an illegal
- // type in some cases.
- // Also, we can fall back to a division in some cases, but that's a big
- // performance hit in the general case.
- assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits() * 2)) &&
- "Don't know how to expand this operation yet!");
+ } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() * 2))) {
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
@@ -3110,6 +3242,30 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
DAG.getIntPtrConstant(0));
TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
DAG.getIntPtrConstant(1));
+ } else {
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+
+ SDValue Ret = ExpandLibCall(LC, Node, isSigned);
+ BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret);
+ TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret,
+ DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy()));
+ TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);
}
if (isSigned) {
Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
@@ -3165,8 +3321,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
- SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0, MemVT,
+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), MemVT,
false, false, 0);
Addr = LD;
if (TM.getRelocationModel() == Reloc::PIC_) {
@@ -3329,8 +3485,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
case ISD::XOR: {
unsigned ExtOp, TruncOp;
if (OVT.isVector()) {
- ExtOp = ISD::BIT_CONVERT;
- TruncOp = ISD::BIT_CONVERT;
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
} else {
assert(OVT.isInteger() && "Cannot promote logic operation");
ExtOp = ISD::ANY_EXTEND;
@@ -3347,8 +3503,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
case ISD::SELECT: {
unsigned ExtOp, TruncOp;
if (Node->getValueType(0).isVector()) {
- ExtOp = ISD::BIT_CONVERT;
- TruncOp = ISD::BIT_CONVERT;
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
} else if (Node->getValueType(0).isInteger()) {
ExtOp = ISD::ANY_EXTEND;
TruncOp = ISD::TRUNCATE;
@@ -3375,12 +3531,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
// Cast the two input vectors.
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
- Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
// Convert the shuffle mask to the right # elements.
Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
Results.push_back(Tmp1);
break;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 650ee5a..2775212 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -55,7 +55,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
- case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
case ISD::ConstantFP:
R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
@@ -102,7 +102,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
SetSoftenedFloat(SDValue(N, ResNo), R);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
@@ -133,8 +133,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
unsigned Size = NVT.getSizeInBits();
// Mask = ~(1 << (Size-1))
- SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1),
- NVT);
+ APInt API = APInt::getAllOnesValue(Size);
+ API.clearBit(Size-1);
+ SDValue Mask = DAG.getConstant(API, NVT);
SDValue Op = GetSoftenedFloat(N->getOperand(0));
return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
}
@@ -455,7 +456,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
- L->getSrcValue(), L->getSrcValueOffset(), NVT,
+ L->getPointerInfo(), NVT,
L->isVolatile(), L->isNonTemporal(), L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -466,8 +467,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
// Do a non-extending load followed by FP_EXTEND.
NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
L->getMemoryVT(), dl, L->getChain(),
- L->getBasePtr(), L->getOffset(),
- L->getSrcValue(), L->getSrcValueOffset(),
+ L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
L->getMemoryVT(), L->isVolatile(),
L->isNonTemporal(), L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
@@ -558,7 +558,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
#endif
llvm_unreachable("Do not know how to soften this operator's operand!");
- case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
@@ -670,8 +670,8 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
}
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
GetSoftenedFloat(N->getOperand(0)));
}
@@ -780,7 +780,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
Val = GetSoftenedFloat(Val);
return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
}
@@ -816,7 +816,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
- case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
@@ -1110,9 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
assert(NVT.isByteSized() && "Expanded type not byte sized!");
assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
- Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr,
- LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->getMemoryVT(), LD->isVolatile(),
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
// Remember the chain.
@@ -1222,7 +1221,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
#endif
llvm_unreachable("Do not know how to expand this operator's operand!");
- case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
@@ -1421,7 +1420,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
GetExpandedOp(ST->getValue(), Lo, Hi);
return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->getMemoryVT(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f8c5890..f0752df 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -49,7 +49,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
llvm_unreachable("Do not know how to promote this operator!");
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
- case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
@@ -143,7 +143,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
N->getMemoryVT(),
N->getChain(), N->getBasePtr(),
- Op2, N->getSrcValue(), N->getAlignment());
+ Op2, N->getMemOperand());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -155,14 +155,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
SDValue Op3 = GetPromotedInteger(N->getOperand(3));
SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
N->getMemoryVT(), N->getChain(), N->getBasePtr(),
- Op2, Op3, N->getSrcValue(), N->getAlignment());
+ Op2, Op3, N->getMemOperand());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
@@ -179,8 +179,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
case PromoteInteger:
if (NOutVT.bitsEq(NInVT))
// The input promotes to the same size. Convert the promoted value.
- return DAG.getNode(ISD::BIT_CONVERT, dl,
- NOutVT, GetPromotedInteger(InOp));
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
break;
case SoftenFloat:
// Promote the integer operand by hand.
@@ -193,7 +192,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
BitConvertToInteger(GetScalarizedVector(InOp)));
case SplitVector: {
- // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split
+ // For example, i32 = BITCAST v2i16 on alpha. Convert the split
// pieces of the input into integers and reassemble in the final type.
SDValue Lo, Hi;
GetSplitVector(N->getOperand(0), Lo, Hi);
@@ -207,12 +206,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
EVT::getIntegerVT(*DAG.getContext(),
NOutVT.getSizeInBits()),
JoinIntegers(Lo, Hi));
- return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
}
case WidenVector:
if (OutVT.bitsEq(NInVT))
// The input is widened to the same size. Convert to the widened value.
- return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));
+ return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
}
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -293,7 +292,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
APInt TopBit(NVT.getSizeInBits(), 0);
- TopBit.set(OVT.getSizeInBits());
+ TopBit.setBit(OVT.getSizeInBits());
Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
}
@@ -371,8 +370,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
ISD::LoadExtType ExtType =
ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
DebugLoc dl = N->getDebugLoc();
- SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(),
- N->getSrcValue(), N->getSrcValueOffset(),
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getPointerInfo(),
N->getMemoryVT(), N->isVolatile(),
N->isNonTemporal(), N->getAlignment());
@@ -549,6 +548,48 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ // Promote the overflow bit trivially.
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT SmallVT = LHS.getValueType();
+
+ // To determine if the result overflowed in a larger type, we extend the input
+ // to the larger type, do the multiply, then check the high bits of the result
+ // to see if the overflow happened.
+ if (N->getOpcode() == ISD::SMULO) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ } else {
+ LHS = ZExtPromotedInteger(LHS);
+ RHS = ZExtPromotedInteger(RHS);
+ }
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+
+ // Overflow occurred iff the high part of the result does not zero/sign-extend
+ // the low part.
+ SDValue Overflow;
+ if (N->getOpcode() == ISD::UMULO) {
+ // Unsigned overflow occurred iff the high part is non-zero.
+ SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+ DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+ DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+ } else {
+ // Signed overflow occurred iff the high part does not sign extend the low.
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+ Mul, DAG.getValueType(SmallVT));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+ }
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return Mul;
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
// Zero extend the input.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
@@ -602,11 +643,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
- assert(ResNo == 1 && "Only boolean result promotion currently supported!");
- return PromoteIntRes_Overflow(N);
-}
-
//===----------------------------------------------------------------------===//
// Integer Operand Promotion
//===----------------------------------------------------------------------===//
@@ -631,7 +667,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
llvm_unreachable("Do not know how to promote this operator's operand!");
case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
- case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
@@ -713,7 +749,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
// This should only occur in unusual situations like bitcasting to an
// x86_fp80, so just turn it into a store+load
return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
@@ -889,7 +925,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
- int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
@@ -898,8 +933,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
// Truncate the value and store the result.
- return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
- SVOffset, N->getMemoryVT(),
+ return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(),
isVolatile, isNonTemporal, Alignment);
}
@@ -951,7 +986,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
- case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
@@ -978,6 +1013,23 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+ SplitInteger(Tmp.first, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ break;
+ }
+
case ISD::AND:
case ISD::OR:
case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
@@ -999,6 +1051,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
case ISD::UADDO:
case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+ case ISD::UMULO:
+ case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1006,11 +1060,98 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
}
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic intrinsic Expand!");
+ break;
+ case ISD::ATOMIC_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ }
+ break;
+ case ISD::ATOMIC_CMP_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ }
+ break;
+ }
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
/// and the shift amount is a constant 'Amt'. Expand the operation.
void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi) {
- DebugLoc dl = N->getDebugLoc();
+ DebugLoc DL = N->getDebugLoc();
// Expand the incoming operand to be shifted, so that we have its parts
SDValue InL, InH;
GetExpandedInteger(N->getOperand(0), InL, InH);
@@ -1025,8 +1166,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
Lo = Hi = DAG.getConstant(0, NVT);
} else if (Amt > NVTBits) {
Lo = DAG.getConstant(0, NVT);
- Hi = DAG.getNode(ISD::SHL, dl,
- NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy));
+ Hi = DAG.getNode(ISD::SHL, DL,
+ NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
} else if (Amt == NVTBits) {
Lo = DAG.getConstant(0, NVT);
Hi = InL;
@@ -1034,17 +1175,17 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
TLI.isOperationLegalOrCustom(ISD::ADDC,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
// Emit this X << 1 as X+X.
- SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
SDValue LoOps[2] = { InL, InL };
- Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);
SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
- Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);
} else {
- Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy));
- Hi = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SHL, dl, NVT, InH,
+ Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(Amt, ShTy)),
- DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(NVTBits-Amt, ShTy)));
}
return;
@@ -1055,43 +1196,43 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
Lo = DAG.getConstant(0, NVT);
Hi = DAG.getConstant(0, NVT);
} else if (Amt > NVTBits) {
- Lo = DAG.getNode(ISD::SRL, dl,
+ Lo = DAG.getNode(ISD::SRL, DL,
NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
Hi = DAG.getConstant(0, NVT);
} else if (Amt == NVTBits) {
Lo = InH;
Hi = DAG.getConstant(0, NVT);
} else {
- Lo = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SRL, dl, NVT, InL,
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(Amt, ShTy)),
- DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(NVTBits-Amt, ShTy)));
- Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
}
return;
}
assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
if (Amt > VTBits) {
- Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits-1, ShTy));
} else if (Amt > NVTBits) {
- Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(Amt-NVTBits, ShTy));
- Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits-1, ShTy));
} else if (Amt == NVTBits) {
Lo = InH;
- Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits-1, ShTy));
} else {
- Lo = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SRL, dl, NVT, InL,
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(Amt, ShTy)),
- DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(NVTBits-Amt, ShTy)));
- Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
}
}
@@ -1269,7 +1410,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
// Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
// them. TODO: Teach operation legalization how to expand unsupported
// ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
- // a carry of type MVT::Flag, but there doesn't seem to be any way to
+ // a carry of type MVT::Glue, but there doesn't seem to be any way to
// generate a value of this type in the expanded code sequence.
bool hasCarry =
TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
@@ -1277,7 +1418,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (hasCarry) {
- SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
HiOps[2] = Lo.getValue(1);
@@ -1287,31 +1428,32 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
}
+ return;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+ SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+ SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+ ISD::SETULT);
+ SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+ DAG.getConstant(1, NVT), Carry1);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
} else {
- if (N->getOpcode() == ISD::ADD) {
- Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
- SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
- ISD::SETULT);
- SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
- DAG.getConstant(1, NVT),
- DAG.getConstant(0, NVT));
- SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
- ISD::SETULT);
- SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
- DAG.getConstant(1, NVT), Carry1);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
- } else {
- Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
- Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
- SDValue Cmp =
- DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
- LoOps[0], LoOps[1], ISD::SETULT);
- SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
- DAG.getConstant(1, NVT),
- DAG.getConstant(0, NVT));
- Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
- }
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+ SDValue Cmp =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+ SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
}
}
@@ -1322,7 +1464,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
DebugLoc dl = N->getDebugLoc();
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
- SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
SDValue LoOps[2] = { LHSL, RHSL };
SDValue HiOps[3] = { LHSH, RHSH };
@@ -1348,7 +1490,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
DebugLoc dl = N->getDebugLoc();
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
- SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
SDValue HiOps[3] = { LHSH, RHSH };
@@ -1437,7 +1579,7 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned NBitWidth = NVT.getSizeInBits();
const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
- Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);
+ Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);
Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
}
@@ -1524,7 +1666,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
ISD::LoadExtType ExtType = N->getExtensionType();
- int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
@@ -1535,7 +1676,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
if (N->getMemoryVT().bitsLE(NVT)) {
EVT MemVT = N->getMemoryVT();
- Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
MemVT, isVolatile, isNonTemporal, Alignment);
// Remember the chain.
@@ -1557,7 +1698,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
}
} else if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
- Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
unsigned ExcessBits =
@@ -1568,8 +1709,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize, NEVT,
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -1586,7 +1727,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned ExcessBits = (EBytes - IncrementSize)*8;
// Load both the high bits and maybe some of the low bits.
- Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
isVolatile, isNonTemporal, Alignment);
@@ -1595,8 +1736,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
// Load the rest of the low bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize,
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -1987,6 +2128,31 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Ofl);
}
+void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2);
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
+ SplitInteger(Ret, Lo, Hi);
+
+ // Now calculate overflow.
+ SDValue Ofl;
+ if (N->getOpcode() == ISD::UMULO)
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi,
+ DAG.getConstant(0, VT), ISD::SETNE);
+ else {
+ SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT);
+ Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp);
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE);
+ }
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -2078,7 +2244,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
#endif
llvm_unreachable("Do not know how to expand this operator's operand!");
- case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
@@ -2308,7 +2474,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
- int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
@@ -2319,14 +2484,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (N->getMemoryVT().bitsLE(NVT)) {
GetExpandedInteger(N->getValue(), Lo, Hi);
- return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
N->getMemoryVT(), isVolatile, isNonTemporal,
Alignment);
- } else if (TLI.isLittleEndian()) {
+ }
+
+ if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
GetExpandedInteger(N->getValue(), Lo, Hi);
- Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
unsigned ExcessBits =
@@ -2337,50 +2504,49 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize, NEVT,
- isVolatile, isNonTemporal,
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ NEVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
- } else {
- // Big-endian - high bits are at low addresses. Favor aligned stores at
- // the cost of some bit-fiddling.
- GetExpandedInteger(N->getValue(), Lo, Hi);
-
- EVT ExtVT = N->getMemoryVT();
- unsigned EBytes = ExtVT.getStoreSize();
- unsigned IncrementSize = NVT.getSizeInBits()/8;
- unsigned ExcessBits = (EBytes - IncrementSize)*8;
- EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
- ExtVT.getSizeInBits() - ExcessBits);
+ }
- if (ExcessBits < NVT.getSizeInBits()) {
- // Transfer high bits from the top of Lo to the bottom of Hi.
- Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
- DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
- TLI.getPointerTy()));
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SRL, dl, NVT, Lo,
- DAG.getConstant(ExcessBits,
- TLI.getPointerTy())));
- }
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ EVT ExtVT = N->getMemoryVT();
+ unsigned EBytes = ExtVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+ ExtVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ }
- // Store both the high bits and maybe some of the low bits.
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
- SVOffset, HiVT, isVolatile, isNonTemporal,
- Alignment);
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
+ HiVT, isVolatile, isNonTemporal, Alignment);
- // Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
- // Store the lowest ExcessBits bits in the second half.
- Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize,
- EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
- }
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
@@ -2460,8 +2626,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
// Load the value out, extending it from f32 to the destination float type.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(),
- FudgePtr, NULL, 0, MVT::f32,
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+ FudgePtr,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32,
false, false, Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 6e56c98..cedda7e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -714,6 +714,11 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
if (M->getNodeId() == Processed)
RemapValue(NewVal);
DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ // OldVal may be a target of the ReplacedValues map which was marked
+ // NewNode to force reanalysis because it was updated. Ensure that
+ // anything that ReplacedValues mapped to OldVal will now be mapped
+ // all the way to NewVal.
+ ReplacedValues[OldVal] = NewVal;
}
// The original node continues to exist in the DAG, marked NewNode.
}
@@ -858,7 +863,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
/// BitConvertToInteger - Convert to an integer of the same size.
SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
unsigned BitWidth = Op.getValueType().getSizeInBits();
- return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
}
@@ -869,7 +874,7 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
unsigned NumElts = Op.getValueType().getVectorNumElements();
- return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
}
@@ -880,10 +885,11 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
// the source and destination types.
SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
// Emit a store to the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Result is a load from the stack slot.
- return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0);
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, 0);
}
/// CustomLowerNode - Replace the node's results with custom code provided
@@ -1049,6 +1055,39 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
return CallInfo.first;
}
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+
+ return CallInfo;
+}
+
/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
/// of the given type. A target boolean is an integer value, not necessarily of
/// type i1, the bits of which conform to getBooleanContents.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d560292..3f81bbb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -99,7 +99,7 @@ private:
return SoftenFloat;
return ExpandFloat;
}
-
+
if (VT.getVectorNumElements() == 1)
return ScalarizeVector;
return SplitVector;
@@ -192,6 +192,10 @@ private:
SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
const SDValue *Ops, unsigned NumOps, bool isSigned,
DebugLoc dl);
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
void ReplaceValueWith(SDValue From, SDValue To);
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
@@ -244,7 +248,7 @@ private:
SDValue PromoteIntRes_AssertZext(SDNode *N);
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
- SDValue PromoteIntRes_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntRes_BITCAST(SDNode *N);
SDValue PromoteIntRes_BSWAP(SDNode *N);
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
SDValue PromoteIntRes_Constant(SDNode *N);
@@ -278,7 +282,7 @@ private:
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
- SDValue PromoteIntOp_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntOp_BITCAST(SDNode *N);
SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
@@ -344,6 +348,7 @@ private:
void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UMULSMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi);
@@ -352,7 +357,7 @@ private:
// Integer Operand Expansion.
bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
- SDValue ExpandIntOp_BIT_CONVERT(SDNode *N);
+ SDValue ExpandIntOp_BITCAST(SDNode *N);
SDValue ExpandIntOp_BR_CC(SDNode *N);
SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
@@ -387,7 +392,7 @@ private:
// Result Float to Integer Conversion.
void SoftenFloatResult(SDNode *N, unsigned OpNo);
- SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
@@ -426,7 +431,7 @@ private:
// Operand Float to Integer Conversion.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
- SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatOp_BITCAST(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
@@ -515,7 +520,7 @@ private:
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
- SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecRes_BITCAST(SDNode *N);
SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
@@ -532,7 +537,7 @@ private:
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
- SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecOp_BITCAST(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -557,7 +562,7 @@ private:
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -577,11 +582,12 @@ private:
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_UnaryOp(SDNode *N);
- SDValue SplitVecOp_BIT_CONVERT(SDNode *N);
+ SDValue SplitVecOp_BITCAST(SDNode *N);
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_FP_ROUND(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
@@ -603,7 +609,7 @@ private:
// Widen Vector Result Promotion.
void WidenVectorResult(SDNode *N, unsigned ResNo);
- SDValue WidenVecRes_BIT_CONVERT(SDNode* N);
+ SDValue WidenVecRes_BITCAST(SDNode* N);
SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
@@ -628,7 +634,7 @@ private:
// Widen Vector Operand.
bool WidenVectorOperand(SDNode *N, unsigned ResNo);
- SDValue WidenVecOp_BIT_CONVERT(SDNode *N);
+ SDValue WidenVecOp_BITCAST(SDNode *N);
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
@@ -721,7 +727,7 @@ private:
}
// Generic Result Expansion.
- void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -729,7 +735,7 @@ private:
void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
// Generic Operand Expansion.
- SDValue ExpandOp_BIT_CONVERT (SDNode *N);
+ SDValue ExpandOp_BITCAST (SDNode *N);
SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 9c2b1d9..a75ae87 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -32,8 +32,7 @@ using namespace llvm;
// little/big-endian machines, followed by the Hi/Lo part. This means that
// they cannot be used as is on vectors, for which Lo is always stored first.
-void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
SDValue InOp = N->getOperand(0);
@@ -50,31 +49,31 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
case SoftenFloat:
// Convert the integer operand instead.
SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case ExpandInteger:
case ExpandFloat:
// Convert the expanded pieces of the input.
GetExpandedOp(InOp, Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case SplitVector:
GetSplitVector(InOp, Lo, Hi);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case ScalarizeVector:
// Convert the element instead.
SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case WidenVector: {
- assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
InOp = GetWidenedVector(InOp);
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
InVT.getVectorNumElements()/2);
@@ -84,19 +83,19 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
}
}
if (InVT.isVector() && OutVT.isInteger()) {
- // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
+ // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
// is legal but the result is not.
EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
if (isTypeLegal(NVT)) {
- SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
+ SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
DAG.getIntPtrConstant(0));
Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
@@ -119,14 +118,14 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
getTypeForEVT(*DAG.getContext()));
SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
// Emit a store to the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0,
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
false, false, 0);
// Load the first half from the stack slot.
- Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0);
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -134,7 +133,8 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(IncrementSize));
// Load the second half from the stack slot.
- Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize), false,
false, MinAlign(Alignment, IncrementSize));
// Handle endianness of the load.
@@ -172,7 +172,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
EVT OldVT = N->getValueType(0);
EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
- SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
EVT::getVectorVT(*DAG.getContext(),
NewVT, 2*OldElts),
OldVec);
@@ -204,22 +204,21 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
- Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset+IncrementSize,
+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -262,14 +261,14 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Generic Operand Expansion.
//===--------------------------------------------------------------------===//
-SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
if (N->getValueType(0).isVector()) {
// An illegal expanding type is being converted to a legal vector type.
// Make a two element vector out of the expanded parts and convert that
// instead, but only if the new vector type is legal (otherwise there
// is no point, and it might create expansion loops). For example, on
- // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
+ // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
EVT OVT = N->getOperand(0).getValueType();
EVT NVT = EVT::getVectorVT(*DAG.getContext(),
TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
@@ -283,7 +282,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
std::swap(Parts[0], Parts[1]);
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec);
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
}
}
@@ -322,7 +321,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
&NewElts[0], NewElts.size());
// Convert the new vector to the old vector type.
- return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
}
SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
@@ -347,7 +346,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
// Bitconvert to a vector of twice the length with elements of the expanded
// type, insert the expanded vector elements, and then convert back.
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
- SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
NewVecVT, N->getOperand(0));
SDValue Lo, Hi;
@@ -363,7 +362,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
// Convert the new vector to the old vector type.
- return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
}
SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
@@ -390,7 +389,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
St->getValue().getValueType());
SDValue Chain = St->getChain();
SDValue Ptr = St->getBasePtr();
- int SVOffset = St->getSrcValueOffset();
unsigned Alignment = St->getAlignment();
bool isVolatile = St->isVolatile();
bool isNonTemporal = St->isNonTemporal();
@@ -404,14 +402,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
- Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
- SVOffset + IncrementSize,
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ St->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 621c087..167dbe0 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -241,14 +241,14 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
if (Op.getOperand(j).getValueType().isVector())
- Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j));
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
else
Operands[j] = Op.getOperand(j);
}
Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 93bc2d0..182f8fc 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -46,7 +46,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to scalarize the result of this operator!");
- case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
@@ -122,9 +122,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
NewVT, N->getOperand(0));
}
@@ -171,7 +171,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->getDebugLoc(),
N->getChain(), N->getBasePtr(),
DAG.getUNDEF(N->getBasePtr().getValueType()),
- N->getSrcValue(), N->getSrcValueOffset(),
+ N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
N->getOriginalAlignment());
@@ -296,8 +296,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to scalarize this operator's operand!");
- case ISD::BIT_CONVERT:
- Res = ScalarizeVecOp_BIT_CONVERT(N);
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
break;
case ISD::CONCAT_VECTORS:
Res = ScalarizeVecOp_CONCAT_VECTORS(N);
@@ -326,11 +326,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
return false;
}
-/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs
+/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
/// to be scalarized, it must be <1 x ty>. Convert the element instead.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
SDValue Elt = GetScalarizedVector(N->getOperand(0));
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
N->getValueType(0), Elt);
}
@@ -365,14 +365,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
if (N->isTruncatingStore())
return DAG.getTruncStore(N->getChain(), dl,
GetScalarizedVector(N->getOperand(1)),
- N->getBasePtr(),
- N->getSrcValue(), N->getSrcValueOffset(),
+ N->getBasePtr(), N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
N->getAlignment());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
- N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
+ N->getBasePtr(), N->getPointerInfo(),
N->isVolatile(), N->isNonTemporal(),
N->getOriginalAlignment());
}
@@ -407,7 +406,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
- case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
@@ -497,8 +496,8 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
}
-void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
// We know the result is a vector. The input may be either a vector or a
// scalar value.
EVT LoVT, HiVT;
@@ -526,8 +525,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
GetExpandedOp(InOp, Lo, Hi);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
return;
}
break;
@@ -535,8 +534,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
// If the input is a vector that needs to be split, convert each split
// piece of the input now.
GetSplitVector(InOp, Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
return;
}
@@ -550,8 +549,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
}
void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
@@ -626,9 +625,9 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
LoVT.getVectorNumElements());
VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(0));
+ DAG.getIntPtrConstant(0));
VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
break;
}
}
@@ -646,16 +645,15 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
SDValue Idx = N->getOperand(1);
- EVT IdxVT = Idx.getValueType();
DebugLoc dl = N->getDebugLoc();
EVT LoVT, HiVT;
GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
- Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,
- DAG.getConstant(LoVT.getVectorNumElements(), IdxVT));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx);
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));
}
void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
@@ -705,8 +703,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
@@ -714,11 +712,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment =
TLI.getTargetData()->getPrefTypeAlignment(VecType);
- Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
false, false, 0);
// Load the Lo part from the stack slot.
- Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0,
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
false, false, 0);
// Increment the pointer to the other part.
@@ -727,8 +725,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(IncrementSize));
// Load the Hi part from the stack slot.
- Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
- false, MinAlign(Alignment, IncrementSize));
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, MinAlign(Alignment, IncrementSize));
}
void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -751,8 +749,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue Ch = LD->getChain();
SDValue Ptr = LD->getBasePtr();
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
- const Value *SV = LD->getSrcValue();
- int SVOffset = LD->getSrcValueOffset();
EVT MemoryVT = LD->getMemoryVT();
unsigned Alignment = LD->getOriginalAlignment();
bool isVolatile = LD->isVolatile();
@@ -762,14 +758,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
- SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment);
+ LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
+ Alignment);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- SVOffset += IncrementSize;
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
- SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment);
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, isVolatile, isNonTemporal, Alignment);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -980,10 +977,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
#endif
llvm_unreachable("Do not know how to split this operator's operand!");
- case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -995,6 +993,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::FP_EXTEND:
+ case ISD::FTRUNC:
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
@@ -1036,8 +1036,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
-SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
- // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+ // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
// end up being split all the way down to individual components. Convert the
// split pieces into integers and reassemble.
SDValue Lo, Hi;
@@ -1048,13 +1048,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
JoinIntegers(Lo, Hi));
}
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
- // We know that the extracted result type is legal. For now, assume the index
- // is a constant.
+ // We know that the extracted result type is legal.
EVT SubVT = N->getValueType(0);
SDValue Idx = N->getOperand(1);
DebugLoc dl = N->getDebugLoc();
@@ -1099,15 +1098,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
EVT EltVT = VecVT.getVectorElementType();
DebugLoc dl = N->getDebugLoc();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Load back the required element.
StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
- return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr,
- SV, 0, EltVT, false, false, 0);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo(), EltVT, false, false, 0);
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1118,7 +1115,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
bool isTruncating = N->isTruncatingStore();
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
- int SVOffset = N->getSrcValueOffset();
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
bool isVol = N->isVolatile();
@@ -1132,22 +1128,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
- Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
LoMemVT, isVol, isNT, Alignment);
else
- Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
isVol, isNT, Alignment);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- SVOffset += IncrementSize;
if (isTruncating)
- Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
HiMemVT, isVol, isNT, Alignment);
else
- Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
isVol, isNT, Alignment);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
@@ -1155,7 +1152,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
DebugLoc DL = N->getDebugLoc();
-
+
// The input operands all must have the same type, and we know the result the
// result type is valid. Convert this to a buildvector which extracts all the
// input elements.
@@ -1172,11 +1169,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
}
}
-
+
return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
&Elts[0], Elts.size());
}
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}
+
+
//===----------------------------------------------------------------------===//
// Result Vector Widening
@@ -1201,7 +1216,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to widen the result of this operator!");
- case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
@@ -1297,7 +1312,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
- while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) {
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
@@ -1308,11 +1323,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
}
-
+
// No legal vector version so unroll the vector operation and then widen.
if (NumElts == 1)
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
-
+
// Since the operation can trap, apply operation on the original vector.
EVT MaxVT = VT;
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
@@ -1323,7 +1338,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
unsigned ConcatEnd = 0; // Current ConcatOps index.
int Idx = 0; // Current Idx into input vectors.
- // NumElts := greatest synthesizable vector size (at most WidenVT)
+ // NumElts := greatest legal vector size (at most WidenVT)
// while (orig. vector has unhandled elements) {
// take munches of size NumElts from the beginning and add to ConcatOps
// NumElts := next smaller supported vector size or 1
@@ -1341,13 +1356,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
do {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
- } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
if (NumElts == 1) {
for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp1, DAG.getIntPtrConstant(Idx));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp2, DAG.getIntPtrConstant(Idx));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
EOp1, EOp2);
@@ -1378,7 +1393,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
do {
NextSize *= 2;
NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
- } while (!TLI.isTypeSynthesizable(NextVT));
+ } while (!TLI.isTypeLegal(NextVT));
if (!VT.isVector()) {
// Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
@@ -1415,7 +1430,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
if (VT == WidenVT)
return ConcatOps[0];
}
-
+
// add undefs of size MaxVT until ConcatOps grows to length of WidenVT
unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
if (NumOps != ConcatEnd ) {
@@ -1428,7 +1443,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue InOp = N->getOperand(0);
- DebugLoc dl = N->getDebugLoc();
+ DebugLoc DL = N->getDebugLoc();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -1444,11 +1459,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
InVTNumElts = InVT.getVectorNumElements();
- if (InVTNumElts == WidenNumElts)
- return DAG.getNode(Opcode, dl, WidenVT, InOp);
+ if (InVTNumElts == WidenNumElts) {
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+ }
}
- if (TLI.isTypeSynthesizable(InWidenVT)) {
+ if (TLI.isTypeLegal(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1462,16 +1480,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(InVT);
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(Opcode, dl, WidenVT,
- DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT,
- &Ops[0], NumConcat));
+ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT,
+ &Ops[0], NumConcat);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVec);
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
}
if (InVTNumElts % WidenNumElts == 0) {
+ SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
+ InOp, DAG.getIntPtrConstant(0));
// Extract the input and convert the shorten input vector.
- return DAG.getNode(Opcode, dl, WidenVT,
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT,
- InOp, DAG.getIntPtrConstant(0)));
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVal);
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
}
}
@@ -1480,16 +1502,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
EVT EltVT = WidenVT.getVectorElementType();
unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
unsigned i;
- for (i=0; i < MinElts; ++i)
- Ops[i] = DAG.getNode(Opcode, dl, EltVT,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getIntPtrConstant(i)));
+ for (i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ if (N->getNumOperands() == 1)
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+ else
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+ }
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);
}
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
@@ -1536,7 +1562,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
WidenVT, WidenLHS, DAG.getValueType(ExtVT));
}
-SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
EVT VT = N->getValueType(0);
@@ -1555,7 +1581,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
InOp = GetPromotedInteger(InOp);
InVT = InOp.getValueType();
if (WidenVT.bitsEq(InVT))
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
break;
case SoftenFloat:
case ExpandInteger:
@@ -1570,13 +1596,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
InVT = InOp.getValueType();
if (WidenVT.bitsEq(InVT))
// The input widens to the same size. Convert to the widen value.
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
break;
}
unsigned WidenSize = WidenVT.getSizeInBits();
unsigned InSize = InVT.getSizeInBits();
- if (WidenSize % InSize == 0) {
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
// Determine new input vector type. The new input vector type will use
// the same element type (if its a vector) or use the input type as a
// vector. It is the same size as the type to widen to.
@@ -1590,7 +1617,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
}
- if (TLI.isTypeSynthesizable(NewInVT)) {
+ if (TLI.isTypeLegal(NewInVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1609,7 +1636,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
else
NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
NewInVT, &Ops[0], NewNumElts);
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
}
}
@@ -1730,7 +1757,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
SatOp, CvtCode);
}
- if (TLI.isTypeSynthesizable(InWidenVT)) {
+ if (TLI.isTypeLegal(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1794,39 +1821,25 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT InVT = InOp.getValueType();
- ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
- if (CIdx) {
- unsigned IdxVal = CIdx->getZExtValue();
- // Check if we can just return the input vector after widening.
- if (IdxVal == 0 && InVT == WidenVT)
- return InOp;
-
- // Check if we can extract from the vector.
- unsigned InNumElts = InVT.getVectorNumElements();
- if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
- }
+ // Check if we can just return the input vector after widening.
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
// We could try widening the input to the right length but for now, extract
// the original elements, fill the rest with undefs and build a vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = VT.getVectorElementType();
- EVT IdxVT = Idx.getValueType();
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
- if (CIdx) {
- unsigned IdxVal = CIdx->getZExtValue();
- for (i=0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(IdxVal+i, IdxVT));
- } else {
- Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx);
- for (i=1; i < NumElts; ++i) {
- SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
- DAG.getConstant(i, IdxVT));
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx);
- }
- }
+ for (i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(IdxVal+i));
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
@@ -1985,7 +1998,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to widen this operator's operand!");
- case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
@@ -2044,7 +2057,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
}
-SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue InOp = GetWidenedVector(N->getOperand(0));
EVT InWidenVT = InOp.getValueType();
@@ -2053,11 +2066,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
// Check if we can convert between two legal vector types and extract.
unsigned InWidenSize = InWidenVT.getSizeInBits();
unsigned Size = VT.getSizeInBits();
- if (InWidenSize % Size == 0 && !VT.isVector()) {
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
unsigned NewNumElts = InWidenSize / Size;
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
- if (TLI.isTypeSynthesizable(NewVT)) {
- SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
DAG.getIntPtrConstant(0));
}
@@ -2146,7 +2160,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
if (Width == WidenEltWidth)
return RetVT;
- // See if there is larger legal integer than the element type to load/store
+ // See if there is larger legal integer than the element type to load/store
unsigned VT;
for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
@@ -2154,7 +2168,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
unsigned MemVTWidth = MemVT.getSizeInBits();
if (MemVT.getSizeInBits() <= WidenEltWidth)
break;
- if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
RetVT = MemVT;
@@ -2168,7 +2182,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
EVT MemVT = (MVT::SimpleValueType) VT;
unsigned MemVTWidth = MemVT.getSizeInBits();
- if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
(WidenWidth % MemVTWidth) == 0 &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
@@ -2201,7 +2215,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
if (NewLdTy != LdTy) {
NumElts = Width / NewLdTy.getSizeInBits();
NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
- VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
// Readjust position and vector position based on new load type
Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
LdTy = NewLdTy;
@@ -2209,11 +2223,11 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
DAG.getIntPtrConstant(Idx++));
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp);
+ return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
}
-SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
- LoadSDNode * LD) {
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
+ LoadSDNode *LD) {
// The strategy assumes that we can efficiently load powers of two widths.
// The routines chops the vector into the largest vector loads with the same
// element type or scalar loads and then recombines it to the widen vector
@@ -2228,11 +2242,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
- const Value *SV = LD->getSrcValue();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth; // Difference
@@ -2241,7 +2253,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
// Find the vector type that can load from.
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
isVolatile, isNonTemporal, Align);
LdChain.push_back(LdOp.getValue(1));
@@ -2251,7 +2263,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
unsigned NumElts = WidenWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
if (NewVT == WidenVT)
return LdOp;
@@ -2286,8 +2298,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
NewVTWidth = NewVT.getSizeInBits();
}
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
- SVOffset+Offset, isVolatile,
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ isVolatile,
isNonTemporal, MinAlign(Align, Increment));
LdChain.push_back(LdOp.getValue(1));
LdOps.push_back(LdOp);
@@ -2300,7 +2313,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
if (!LdOps[0].getValueType().isVector())
// All the loads are scalar loads.
return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
-
+
// If the load contains vectors, build the vector using concat vector.
// All of the vectors used to loads are power of 2 and the scalars load
// can be combined to make a power of 2 vector.
@@ -2362,11 +2375,9 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
- const Value *SV = LD->getSrcValue();
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
@@ -2376,16 +2387,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(WidenNumElts);
unsigned Increment = LdEltVT.getSizeInBits() / 8;
- Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset,
+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
+ LD->getPointerInfo(),
LdEltVT, isVolatile, isNonTemporal, Align);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
BasePtr, DAG.getIntPtrConstant(Offset));
- Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV,
- SVOffset + Offset, LdEltVT, isVolatile,
- isNonTemporal, Align);
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+ isVolatile, isNonTemporal, Align);
LdChain.push_back(Ops[i].getValue(1));
}
@@ -2405,8 +2417,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
// element type or scalar stores.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
- const Value *SV = ST->getSrcValue();
- int SVOffset = ST->getSrcValueOffset();
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -2433,9 +2443,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
do {
SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
DAG.getIntPtrConstant(Idx));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset + Offset, isVolatile,
- isNonTemporal,
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
MinAlign(Align, Offset)));
StWidth -= NewVTWidth;
Offset += Increment;
@@ -2447,15 +2457,16 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
// Cast the vector to the scalar type we can store
unsigned NumElts = ValWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
- SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
// Readjust index position based on new vector type
Idx = Idx * ValEltWidth / NewVTWidth;
do {
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
DAG.getIntPtrConstant(Idx++));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset + Offset, isVolatile,
- isNonTemporal, MinAlign(Align, Offset)));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
@@ -2474,14 +2485,12 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
// and then store it. Instead, we extract each element and then store it.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
- const Value *SV = ST->getSrcValue();
- int SVOffset = ST->getSrcValueOffset();
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
SDValue ValOp = GetWidenedVector(ST->getValue());
DebugLoc dl = ST->getDebugLoc();
-
+
EVT StVT = ST->getMemoryVT();
EVT ValVT = ValOp.getValueType();
@@ -2499,8 +2508,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
unsigned NumElts = StVT.getVectorNumElements();
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getIntPtrConstant(0));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset, StEltVT,
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo(), StEltVT,
isVolatile, isNonTemporal, Align));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
@@ -2508,9 +2517,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
BasePtr, DAG.getIntPtrConstant(Offset));
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getIntPtrConstant(0));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
- SVOffset + Offset, StEltVT,
- isVolatile, isNonTemporal,
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ StEltVT, isVolatile, isNonTemporal,
MinAlign(Align, Offset)));
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index ac2d338..2dcb229 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -16,7 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DebugLoc.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
namespace llvm {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index fae2729..e3da208 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -205,7 +205,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
/// successors to the newly created node.
SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
- if (SU->getNode()->getFlaggedNode())
+ if (SU->getNode()->getGluedNode())
return NULL;
SDNode *N = SU->getNode();
@@ -216,7 +216,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
else if (VT == MVT::Other)
TryUnfold = true;
@@ -224,7 +224,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
EVT VT = Op.getNode()->getValueType(Op.getResNo());
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
}
@@ -476,12 +476,12 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
}
}
- for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
if (Node->getOpcode() == ISD::INLINEASM) {
// Inline asm can clobber physical defs.
unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
- --NumOps; // Ignore the flag operand.
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index 56f5ded..430283d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -40,7 +40,7 @@ STATISTIC(NumStalls, "Number of pipeline stalls");
static RegisterScheduler
tdListDAGScheduler("list-td", "Top-down list scheduler",
createTDListDAGScheduler);
-
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGList - The actual list scheduler implementation. This supports
@@ -51,7 +51,7 @@ private:
/// AvailableQueue - The priority queue to use for the available SUnits.
///
SchedulingPriorityQueue *AvailableQueue;
-
+
/// PendingQueue - This contains all of the instructions whose operands have
/// been issued, but their results are not ready yet (due to the latency of
/// the operation). Once the operands become available, the instruction is
@@ -63,11 +63,12 @@ private:
public:
ScheduleDAGList(MachineFunction &mf,
- SchedulingPriorityQueue *availqueue,
- ScheduleHazardRecognizer *HR)
- : ScheduleDAGSDNodes(mf),
- AvailableQueue(availqueue), HazardRec(HR) {
- }
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+
+ const TargetMachine &tm = mf.getTarget();
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
~ScheduleDAGList() {
delete HazardRec;
@@ -87,14 +88,14 @@ private:
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGList::Schedule() {
DEBUG(dbgs() << "********** List Scheduling **********\n");
-
+
// Build the scheduling graph.
BuildSchedGraph(NULL);
AvailableQueue->initNodes(SUnits);
-
+
ListScheduleTopDown();
-
+
AvailableQueue->releaseState();
}
@@ -118,7 +119,7 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
--SuccSU->NumPredsLeft;
SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
-
+
// If all the node's predecessors are scheduled, this node is ready
// to be scheduled. Ignore the special ExitSU node.
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
@@ -142,7 +143,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
-
+
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
@@ -168,7 +169,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
SUnits[i].isAvailable = true;
}
}
-
+
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
std::vector<SUnit*> NotReady;
@@ -187,7 +188,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
}
}
-
+
// If there are no instructions available, don't try to issue anything, and
// don't advance the hazard recognizer.
if (AvailableQueue->empty()) {
@@ -196,24 +197,24 @@ void ScheduleDAGList::ListScheduleTopDown() {
}
SUnit *FoundSUnit = 0;
-
+
bool HasNoopHazards = false;
while (!AvailableQueue->empty()) {
SUnit *CurSUnit = AvailableQueue->pop();
-
+
ScheduleHazardRecognizer::HazardType HT =
- HazardRec->getHazardType(CurSUnit);
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
if (HT == ScheduleHazardRecognizer::NoHazard) {
FoundSUnit = CurSUnit;
break;
}
-
+
// Remember if this is a noop hazard.
HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
-
+
NotReady.push_back(CurSUnit);
}
-
+
// Add the nodes that aren't ready back onto the available list.
if (!NotReady.empty()) {
AvailableQueue->push_all(NotReady);
@@ -228,7 +229,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
// If this is a pseudo-op node, we don't want to increment the current
// cycle.
if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
- ++CurCycle;
+ ++CurCycle;
} else if (!HasNoopHazards) {
// Otherwise, we have a pipeline stall, but no other problem, just advance
// the current cycle and try again.
@@ -257,12 +258,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-/// createTDListDAGScheduler - This creates a top-down list scheduler with a
-/// new hazard recognizer. This scheduler takes ownership of the hazard
-/// recognizer and deletes it when done.
+/// createTDListDAGScheduler - This creates a top-down list scheduler.
ScheduleDAGSDNodes *
llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
- return new ScheduleDAGList(*IS->MF,
- new LatencyPriorityQueue(),
- IS->CreateTargetHazardRecognizer());
+ return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 4c3e4e3..0b548b2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -20,6 +20,7 @@
#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -65,6 +66,10 @@ static RegisterScheduler
"which tries to balance ILP and register pressure",
createILPListDAGScheduler);
+static cl::opt<bool> DisableSchedCycles(
+ "disable-sched-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Disable cycle-level precision during preRA scheduling"));
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -83,31 +88,56 @@ private:
/// AvailableQueue - The priority queue to use for the available SUnits.
SchedulingPriorityQueue *AvailableQueue;
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// CurCycle - The current scheduler state corresponds to this cycle.
+ unsigned CurCycle;
+
+ /// MinAvailableCycle - Cycle of the soonest available instruction.
+ unsigned MinAvailableCycle;
+
/// LiveRegDefs - A set of physical registers and their definition
/// that are "live". These nodes must be scheduled before any other nodes that
/// modifies the registers can be scheduled.
unsigned NumLiveRegs;
std::vector<SUnit*> LiveRegDefs;
- std::vector<unsigned> LiveRegCycles;
+ std::vector<SUnit*> LiveRegGens;
/// Topo - A topological ordering for SUnits which permits fast IsReachable
/// and similar queries.
ScheduleDAGTopologicalSort Topo;
public:
- ScheduleDAGRRList(MachineFunction &mf,
- bool isbottomup, bool needlatency,
- SchedulingPriorityQueue *availqueue)
- : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
- AvailableQueue(availqueue), Topo(SUnits) {
- }
+ ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+ SchedulingPriorityQueue *availqueue,
+ CodeGenOpt::Level OptLevel)
+ : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+ NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+ Topo(SUnits) {
+
+ const TargetMachine &tm = mf.getTarget();
+ if (DisableSchedCycles || !NeedLatency)
+ HazardRec = new ScheduleHazardRecognizer();
+ else
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
~ScheduleDAGRRList() {
+ delete HazardRec;
delete AvailableQueue;
}
void Schedule();
+ ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
/// IsReachable - Checks if SU is reachable from TargetSU.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
return Topo.IsReachable(SU, TargetSU);
@@ -136,24 +166,37 @@ public:
}
private:
+ bool isReady(SUnit *SU) {
+ return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+ AvailableQueue->isReady(SU);
+ }
+
void ReleasePred(SUnit *SU, const SDep *PredEdge);
- void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ReleasePredecessors(SUnit *SU);
void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
void ReleaseSuccessors(SUnit *SU);
+ void ReleasePending();
+ void AdvanceToCycle(unsigned NextCycle);
+ void AdvancePastStalls(SUnit *SU);
+ void EmitNode(SUnit *SU);
+ void ScheduleNodeBottomUp(SUnit*);
void CapturePred(SDep *PredEdge);
- void ScheduleNodeBottomUp(SUnit*, unsigned);
- void ScheduleNodeTopDown(SUnit*, unsigned);
void UnscheduleNodeBottomUp(SUnit*);
- void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
+ void RestoreHazardCheckerBottomUp();
+ void BacktrackBottomUp(SUnit*, SUnit*);
SUnit *CopyAndMoveSuccessors(SUnit*);
void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
const TargetRegisterClass*,
const TargetRegisterClass*,
SmallVector<SUnit*, 2>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
- void ListScheduleTopDown();
+
+ SUnit *PickNodeToScheduleBottomUp();
void ListScheduleBottomUp();
+ void ScheduleNodeTopDown(SUnit*);
+ void ListScheduleTopDown();
+
/// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
/// Updates the topological ordering if required.
@@ -190,11 +233,13 @@ private:
void ScheduleDAGRRList::Schedule() {
DEBUG(dbgs()
<< "********** List Scheduling BB#" << BB->getNumber()
- << " **********\n");
+ << " '" << BB->getName() << "' **********\n");
+ CurCycle = 0;
+ MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
NumLiveRegs = 0;
- LiveRegDefs.resize(TRI->getNumRegs(), NULL);
- LiveRegCycles.resize(TRI->getNumRegs(), 0);
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegGens.resize(TRI->getNumRegs(), NULL);
// Build the scheduling graph.
BuildSchedGraph(NULL);
@@ -204,13 +249,15 @@ void ScheduleDAGRRList::Schedule() {
Topo.InitDAGTopologicalSorting();
AvailableQueue->initNodes(SUnits);
-
+
+ HazardRec->Reset();
+
// Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
if (isBottomUp)
ListScheduleBottomUp();
else
ListScheduleTopDown();
-
+
AvailableQueue->releaseState();
}
@@ -243,33 +290,197 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
// to be scheduled. Ignore the special EntrySU node.
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
PredSU->isAvailable = true;
- AvailableQueue->push(PredSU);
+
+ unsigned Height = PredSU->getHeight();
+ if (Height < MinAvailableCycle)
+ MinAvailableCycle = Height;
+
+ if (isReady(SU)) {
+ AvailableQueue->push(PredSU);
+ }
+ // CapturePred and others may have left the node in the pending queue, avoid
+ // adding it twice.
+ else if (!PredSU->isPending) {
+ PredSU->isPending = true;
+ PendingQueue.push_back(PredSU);
+ }
}
}
-void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
// Bottom up: release predecessors
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
ReleasePred(SU, &*I);
if (I->isAssignedRegDep()) {
// This is a physical register dependency and it's impossible or
- // expensive to copy the register. Make sure nothing that can
+ // expensive to copy the register. Make sure nothing that can
// clobber the register is scheduled between the predecessor and
// this node.
- if (!LiveRegDefs[I->getReg()]) {
+ SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
+ assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+ "interference on register dependence");
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ if (!LiveRegGens[I->getReg()]) {
++NumLiveRegs;
- LiveRegDefs[I->getReg()] = I->getSUnit();
- LiveRegCycles[I->getReg()] = CurCycle;
+ LiveRegGens[I->getReg()] = SU;
}
}
}
}
+/// Check to see if any of the pending instructions are ready to issue. If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+ if (DisableSchedCycles) {
+ assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+ return;
+ }
+
+ // If the available queue is empty, it is safe to reset MinAvailableCycle.
+ if (AvailableQueue->empty())
+ MinAvailableCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ unsigned ReadyCycle =
+ isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+ if (ReadyCycle < MinAvailableCycle)
+ MinAvailableCycle = ReadyCycle;
+
+ if (PendingQueue[i]->isAvailable) {
+ if (!isReady(PendingQueue[i]))
+ continue;
+ AvailableQueue->push(PendingQueue[i]);
+ }
+ PendingQueue[i]->isPending = false;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+ if (NextCycle <= CurCycle)
+ return;
+
+ AvailableQueue->setCurCycle(NextCycle);
+ if (!HazardRec->isEnabled()) {
+ // Bypass lots of virtual calls in case of long latency.
+ CurCycle = NextCycle;
+ }
+ else {
+ for (; CurCycle != NextCycle; ++CurCycle) {
+ if (isBottomUp)
+ HazardRec->RecedeCycle();
+ else
+ HazardRec->AdvanceCycle();
+ }
+ }
+ // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+ // available Q to release pending nodes at least once before popping.
+ ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+ if (DisableSchedCycles)
+ return;
+
+ unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+
+ // Bump CurCycle to account for latency. We assume the latency of other
+ // available instructions may be hidden by the stall (not a full pipe stall).
+ // This updates the hazard recognizer's cycle before reserving resources for
+ // this instruction.
+ AdvanceToCycle(ReadyCycle);
+
+ // Calls are scheduled in their preceding cycle, so don't conflict with
+ // hazards from instructions after the call. EmitNode will reset the
+ // scoreboard state before emitting the call.
+ if (isBottomUp && SU->isCall)
+ return;
+
+ // FIXME: For resource conflicts in very long non-pipelined stages, we
+ // should probably skip ahead here to avoid useless scoreboard checks.
+ int Stalls = 0;
+ while (true) {
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+
+ if (HT == ScheduleHazardRecognizer::NoHazard)
+ break;
+
+ ++Stalls;
+ }
+ AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+ if (!HazardRec->isEnabled())
+ return;
+
+ // Check for phys reg copy.
+ if (!SU->getNode())
+ return;
+
+ switch (SU->getNode()->getOpcode()) {
+ default:
+ assert(SU->getNode()->isMachineOpcode() &&
+ "This target-independent node should not be scheduled.");
+ break;
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor:
+ case ISD::CopyToReg:
+ case ISD::CopyFromReg:
+ case ISD::EH_LABEL:
+ // Noops don't affect the scoreboard state. Copies are likely to be
+ // removed.
+ return;
+ case ISD::INLINEASM:
+ // For inline asm, clear the pipeline state.
+ HazardRec->Reset();
+ return;
+ }
+ if (isBottomUp && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+
+ HazardRec->EmitInstruction(SU);
+
+ if (!isBottomUp && SU->isCall) {
+ HazardRec->Reset();
+ }
+}
+
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
@@ -278,36 +489,51 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
#endif
- // FIXME: Handle noop hazard.
+ // FIXME: Do not modify node height. It may interfere with
+ // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+ // node it's ready cycle can aid heuristics, and after scheduling it can
+ // indicate the scheduled cycle.
SU->setHeightToAtLeast(CurCycle);
+
+ // Reserve resources for the scheduled intruction.
+ EmitNode(SU);
+
Sequence.push_back(SU);
AvailableQueue->ScheduledNode(SU);
- ReleasePredecessors(SU, CurCycle);
+ // Update liveness of predecessors before successors to avoid treating a
+ // two-address node as a live range def.
+ ReleasePredecessors(SU);
// Release all the implicit physical register defs that are live.
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
- if (I->isAssignedRegDep()) {
- if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
- assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
- assert(LiveRegDefs[I->getReg()] == SU &&
- "Physical register dependency violated?");
- --NumLiveRegs;
- LiveRegDefs[I->getReg()] = NULL;
- LiveRegCycles[I->getReg()] = 0;
- }
+ // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegGens[I->getReg()] = NULL;
}
}
SU->isScheduled = true;
+
+ // Conditions under which the scheduler should eagerly advance the cycle:
+ // (1) No available instructions
+ // (2) All pipelines full, so available instructions must have hazards.
+ //
+ // If HazardRec is disabled, count each inst as one cycle.
+ if (!HazardRec->isEnabled() || HazardRec->atIssueLimit()
+ || AvailableQueue->empty())
+ AdvanceToCycle(CurCycle + 1);
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
/// unscheduled, incrcease the succ left count of its predecessors. Remove
/// them from AvailableQueue if necessary.
-void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
if (PredSU->isAvailable) {
PredSU->isAvailable = false;
@@ -328,59 +554,98 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
CapturePred(&*I);
- if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
+ if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
"Physical register dependency violated?");
--NumLiveRegs;
LiveRegDefs[I->getReg()] = NULL;
- LiveRegCycles[I->getReg()] = 0;
+ LiveRegGens[I->getReg()] = NULL;
}
}
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
+ // This becomes the nearest def. Note that an earlier def may still be
+ // pending if this is a two-address node.
+ LiveRegDefs[I->getReg()] = SU;
if (!LiveRegDefs[I->getReg()]) {
- LiveRegDefs[I->getReg()] = SU;
++NumLiveRegs;
}
- if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
- LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
+ if (LiveRegGens[I->getReg()] == NULL ||
+ I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
+ LiveRegGens[I->getReg()] = I->getSUnit();
}
}
+ if (SU->getHeight() < MinAvailableCycle)
+ MinAvailableCycle = SU->getHeight();
SU->setHeightDirty();
SU->isScheduled = false;
SU->isAvailable = true;
- AvailableQueue->push(SU);
+ if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+ // Don't make available until backtracking is complete.
+ SU->isPending = true;
+ PendingQueue.push_back(SU);
+ }
+ else {
+ AvailableQueue->push(SU);
+ }
AvailableQueue->UnscheduledNode(SU);
}
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+ HazardRec->Reset();
+
+ unsigned LookAhead = std::min((unsigned)Sequence.size(),
+ HazardRec->getMaxLookAhead());
+ if (LookAhead == 0)
+ return;
+
+ std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+ unsigned HazardCycle = (*I)->getHeight();
+ for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+ SUnit *SU = *I;
+ for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+ HazardRec->RecedeCycle();
+ }
+ EmitNode(SU);
+ }
+}
+
/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
/// BTCycle in order to schedule a specific node.
-void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
- unsigned &CurCycle) {
- SUnit *OldSU = NULL;
- while (CurCycle > BtCycle) {
- OldSU = Sequence.back();
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+ SUnit *OldSU = Sequence.back();
+ while (true) {
Sequence.pop_back();
if (SU->isSucc(OldSU))
// Don't try to remove SU from AvailableQueue.
SU->isAvailable = false;
+ // FIXME: use ready cycle instead of height
+ CurCycle = OldSU->getHeight();
UnscheduleNodeBottomUp(OldSU);
- --CurCycle;
AvailableQueue->setCurCycle(CurCycle);
+ if (OldSU == BtSU)
+ break;
+ OldSU = Sequence.back();
}
assert(!SU->isSucc(OldSU) && "Something is wrong!");
+ RestoreHazardCheckerBottomUp();
+
+ ReleasePending();
+
++NumBacktracks;
}
static bool isOperandOf(const SUnit *SU, SDNode *N) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
- SUNode = SUNode->getFlaggedNode()) {
+ SUNode = SUNode->getGluedNode()) {
if (SUNode->isOperandOf(N))
return true;
}
@@ -390,18 +655,18 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) {
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
/// successors to the newly created node.
SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
- if (SU->getNode()->getFlaggedNode())
- return NULL;
-
SDNode *N = SU->getNode();
if (!N)
return NULL;
+ if (SU->getNode()->getGluedNode())
+ return NULL;
+
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
else if (VT == MVT::Other)
TryUnfold = true;
@@ -409,7 +674,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
EVT VT = Op.getNode()->getValueType(Op.getResNo());
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
}
@@ -441,13 +706,15 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
} else {
LoadSU = CreateNewSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
ComputeLatency(LoadSU);
}
SUnit *NewSU = CreateNewSUnit(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NewSU->NodeNum);
-
+
const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
@@ -457,6 +724,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
}
if (TID.isCommutable())
NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
ComputeLatency(NewSU);
// Record all the edges to and from the old SU, by category.
@@ -507,6 +776,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
RemovePred(SuccDep, D);
D.setSUnit(NewSU);
AddPred(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+ && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
}
for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
SDep D = ChainSuccs[i];
@@ -517,7 +790,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
D.setSUnit(LoadSU);
AddPred(SuccDep, D);
}
- }
+ }
// Add a data dependency to reflect that NewSU reads the value defined
// by LoadSU.
@@ -633,52 +906,52 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
-static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
std::vector<SUnit*> &LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
- bool Added = false;
- if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
- if (RegAdded.insert(Reg)) {
+ for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+
+ // Check if Ref is live.
+ if (!LiveRegDefs[Reg]) continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[Reg] == SU) continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(Reg))
LRegs.push_back(Reg);
- Added = true;
- }
}
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
- if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
- if (RegAdded.insert(*Alias)) {
- LRegs.push_back(*Alias);
- Added = true;
- }
- }
- return Added;
}
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
-bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
- SmallVector<unsigned, 4> &LRegs){
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
if (NumLiveRegs == 0)
return false;
SmallSet<unsigned, 4> RegAdded;
// If this node would clobber any "live" register, then it's not ready.
+ //
+ // If SU is the currently live definition of the same register that it uses,
+ // then we are free to schedule it.
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
- if (I->isAssignedRegDep())
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
RegAdded, LRegs, TRI);
}
- for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
if (Node->getOpcode() == ISD::INLINEASM) {
// Inline asm can clobber physical defs.
unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
- --NumOps; // Ignore the flag operand.
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
@@ -708,17 +981,151 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
+
return !LRegs.empty();
}
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+ SmallVector<SUnit*, 4> Interferences;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+
+ SUnit *CurSU = AvailableQueue->pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ CurSU = AvailableQueue->pop();
+ }
+ if (CurSU) {
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ Interferences[i]->isPending = false;
+ assert(Interferences[i]->isAvailable && "must still be available");
+ AvailableQueue->push(Interferences[i]);
+ }
+ return CurSU;
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ SUnit *TrySU = Interferences[i];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ SUnit *BtSU = NULL;
+ unsigned LiveCycle = UINT_MAX;
+ for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+ unsigned Reg = LRegs[j];
+ if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+ BtSU = LiveRegGens[Reg];
+ LiveCycle = BtSU->getHeight();
+ }
+ }
+ if (!WillCreateCycle(TrySU, BtSU)) {
+ BacktrackBottomUp(TrySU, BtSU);
+
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (BtSU->isAvailable) {
+ BtSU->isAvailable = false;
+ if (!BtSU->isPending)
+ AvailableQueue->remove(BtSU);
+ }
+ AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer avaialable. Schedule a successor that's now
+ // available instead.
+ if (!TrySU->isAvailable) {
+ CurSU = AvailableQueue->pop();
+ }
+ else {
+ CurSU = TrySU;
+ TrySU->isPending = false;
+ Interferences.erase(Interferences.begin()+i);
+ }
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = Interferences[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is null, then it must be possible copy
+ // the value directly. Do not try duplicate the def.
+ SUnit *NewDef = 0;
+ if (DestRC)
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ else
+ DestRC = RC;
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ Interferences[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (Interferences[i]->isAvailable) {
+ AvailableQueue->push(Interferences[i]);
+ }
+ }
+ return CurSU;
+}
/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
/// schedulers.
void ScheduleDAGRRList::ListScheduleBottomUp() {
- unsigned CurCycle = 0;
-
// Release any predecessors of the special Exit node.
- ReleasePredecessors(&ExitSU, CurCycle);
+ ReleasePredecessors(&ExitSU);
// Add root to Available queue.
if (!SUnits.empty()) {
@@ -730,135 +1137,29 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
- SmallVector<SUnit*, 4> NotReady;
- DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
- bool Delayed = false;
- LRegsMap.clear();
- SUnit *CurSU = AvailableQueue->pop();
- while (CurSU) {
- SmallVector<unsigned, 4> LRegs;
- if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
- break;
- Delayed = true;
- LRegsMap.insert(std::make_pair(CurSU, LRegs));
+ DEBUG(dbgs() << "\n*** Examining Available\n";
+ AvailableQueue->dump(this));
- CurSU->isPending = true; // This SU is not in AvailableQueue right now.
- NotReady.push_back(CurSU);
- CurSU = AvailableQueue->pop();
- }
+ // Pick the best node to schedule taking all constraints into
+ // consideration.
+ SUnit *SU = PickNodeToScheduleBottomUp();
- // All candidates are delayed due to live physical reg dependencies.
- // Try backtracking, code duplication, or inserting cross class copies
- // to resolve it.
- if (Delayed && !CurSU) {
- for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
- SUnit *TrySU = NotReady[i];
- SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
-
- // Try unscheduling up to the point where it's safe to schedule
- // this node.
- unsigned LiveCycle = CurCycle;
- for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
- unsigned Reg = LRegs[j];
- unsigned LCycle = LiveRegCycles[Reg];
- LiveCycle = std::min(LiveCycle, LCycle);
- }
- SUnit *OldSU = Sequence[LiveCycle];
- if (!WillCreateCycle(TrySU, OldSU)) {
- BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
- // Force the current node to be scheduled before the node that
- // requires the physical reg dep.
- if (OldSU->isAvailable) {
- OldSU->isAvailable = false;
- AvailableQueue->remove(OldSU);
- }
- AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false, /*isArtificial=*/true));
- // If one or more successors has been unscheduled, then the current
- // node is no longer avaialable. Schedule a successor that's now
- // available instead.
- if (!TrySU->isAvailable)
- CurSU = AvailableQueue->pop();
- else {
- CurSU = TrySU;
- TrySU->isPending = false;
- NotReady.erase(NotReady.begin()+i);
- }
- break;
- }
- }
+ AdvancePastStalls(SU);
- if (!CurSU) {
- // Can't backtrack. If it's too expensive to copy the value, then try
- // duplicate the nodes that produces these "too expensive to copy"
- // values to break the dependency. In case even that doesn't work,
- // insert cross class copies.
- // If it's not too expensive, i.e. cost != -1, issue copies.
- SUnit *TrySU = NotReady[0];
- SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
- assert(LRegs.size() == 1 && "Can't handle this yet!");
- unsigned Reg = LRegs[0];
- SUnit *LRDef = LiveRegDefs[Reg];
- EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(Reg, VT);
- const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
-
- // If cross copy register class is null, then it must be possible copy
- // the value directly. Do not try duplicate the def.
- SUnit *NewDef = 0;
- if (DestRC)
- NewDef = CopyAndMoveSuccessors(LRDef);
- else
- DestRC = RC;
- if (!NewDef) {
- // Issue copies, these can be expensive cross register class copies.
- SmallVector<SUnit*, 2> Copies;
- InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
- DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
- << " to SU #" << Copies.front()->NodeNum << "\n");
- AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
- NewDef = Copies.back();
- }
+ ScheduleNodeBottomUp(SU);
- DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
- << " to SU #" << TrySU->NodeNum << "\n");
- LiveRegDefs[Reg] = NewDef;
- AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
- TrySU->isAvailable = false;
- CurSU = NewDef;
- }
-
- assert(CurSU && "Unable to resolve live physical register dependencies!");
- }
-
- // Add the nodes that aren't ready back onto the available list.
- for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
- NotReady[i]->isPending = false;
- // May no longer be available due to backtracking.
- if (NotReady[i]->isAvailable)
- AvailableQueue->push(NotReady[i]);
+ while (AvailableQueue->empty() && !PendingQueue.empty()) {
+ // Advance the cycle to free resources. Skip ahead to the next ready SU.
+ assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+ AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
}
- NotReady.clear();
-
- if (CurSU)
- ScheduleNodeBottomUp(CurSU, CurCycle);
- ++CurCycle;
- AvailableQueue->setCurCycle(CurCycle);
}
// Reverse the order if it is bottom up.
std::reverse(Sequence.begin(), Sequence.end());
-
+
#ifndef NDEBUG
VerifySchedule(isBottomUp);
#endif
@@ -905,7 +1206,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
/// count of its successors. If a successor pending count is zero, add it to
/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
@@ -921,7 +1222,6 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
/// ListScheduleTopDown - The main loop of list scheduling for top-down
/// schedulers.
void ScheduleDAGRRList::ListScheduleTopDown() {
- unsigned CurCycle = 0;
AvailableQueue->setCurCycle(CurCycle);
// Release any successors of the special Entry node.
@@ -935,19 +1235,19 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
SUnits[i].isAvailable = true;
}
}
-
+
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
SUnit *CurSU = AvailableQueue->pop();
-
+
if (CurSU)
- ScheduleNodeTopDown(CurSU, CurCycle);
+ ScheduleNodeTopDown(CurSU);
++CurCycle;
AvailableQueue->setCurCycle(CurCycle);
}
-
+
#ifndef NDEBUG
VerifySchedule(isBottomUp);
#endif
@@ -955,70 +1255,288 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
//===----------------------------------------------------------------------===//
-// RegReductionPriorityQueue Implementation
+// RegReductionPriorityQueue Definition
//===----------------------------------------------------------------------===//
//
// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
// to reduce register pressure.
-//
+//
namespace {
- template<class SF>
- class RegReductionPriorityQueue;
-
- /// bu_ls_rr_sort - Priority function for bottom up register pressure
- // reduction scheduler.
- struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
- bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
- bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
+class RegReductionPQBase;
+
+struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
};
- // td_ls_rr_sort - Priority function for top down register pressure reduction
- // scheduler.
- struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
- td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
- td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
+ RegReductionPQBase *SPQ;
+ bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+ bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// td_ls_rr_sort - Priority function for top down register pressure reduction
+// scheduler.
+struct td_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = false,
+ HasReadyFilter = false
};
- // src_ls_rr_sort - Priority function for source order scheduler.
- struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
- src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
- : SPQ(spq) {}
- src_ls_rr_sort(const src_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
+ RegReductionPQBase *SPQ;
+ td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+ td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
};
- // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
- struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
- hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
- : SPQ(spq) {}
- hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
+ RegReductionPQBase *SPQ;
+ src_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ src_ls_rr_sort(const src_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
- bool operator()(const SUnit* left, const SUnit* right) const;
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = true
};
- // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
- // scheduler.
- struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
- ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
- : SPQ(spq) {}
- ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
+ RegReductionPQBase *SPQ;
+ hybrid_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
- bool operator()(const SUnit* left, const SUnit* right) const;
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = true
};
-} // end anonymous namespace
+
+ RegReductionPQBase *SPQ;
+ ilp_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+ std::vector<SUnit*> Queue;
+ unsigned CurQueueId;
+ bool TracksRegPressure;
+
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ MachineFunction &MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+ ScheduleDAGRRList *scheduleDAG;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ /// RegPressure - Tracking current reg pressure per register class.
+ ///
+ std::vector<unsigned> RegPressure;
+
+ /// RegLimit - Tracking the number of allocatable registers per register
+ /// class.
+ std::vector<unsigned> RegLimit;
+
+public:
+ RegReductionPQBase(MachineFunction &mf,
+ bool hasReadyFilter,
+ bool tracksrp,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : SchedulingPriorityQueue(hasReadyFilter),
+ CurQueueId(0), TracksRegPressure(tracksrp),
+ MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+ if (TracksRegPressure) {
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
+ }
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ ScheduleHazardRecognizer* getHazardRec() {
+ return scheduleDAG->getHazardRec();
+ }
+
+ void initNodes(std::vector<SUnit> &sunits);
+
+ void addNode(const SUnit *SU);
+
+ void updateNode(const SUnit *SU);
+
+ void releaseState() {
+ SUnits = 0;
+ SethiUllmanNumbers.clear();
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const;
+
+ unsigned getNodeOrdering(const SUnit *SU) const {
+ return scheduleDAG->DAG->GetOrdering(SU->getNode());
+ }
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++CurQueueId;
+ Queue.push_back(U);
+ }
+
+ void remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+ SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+ SU->NodeQueueId = 0;
+ }
+
+ bool tracksRegPressure() const { return TracksRegPressure; }
+
+ void dumpRegPressure() const;
+
+ bool HighRegPressure(const SUnit *SU) const;
+
+ bool MayReduceRegPressure(SUnit *SU);
+
+ void ScheduledNode(SUnit *SU);
+
+ void UnscheduledNode(SUnit *SU);
+
+protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+ static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
+ std::vector<SUnit *>::iterator Best = Q.begin();
+ for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+ E = Q.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Q.end()))
+ std::swap(*Best, Q.back());
+ Q.pop_back();
+ return V;
+ }
+
+ SF Picker;
+
+public:
+ RegReductionPriorityQueue(MachineFunction &mf,
+ bool tracksrp,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+ Picker(this) {}
+
+ bool isBottomUp() const { return SF::IsBottomUp; }
+
+ bool isReady(SUnit *U) const {
+ return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+ }
+
+ SUnit *pop() {
+ if (Queue.empty()) return NULL;
+
+ SUnit *V = popFromQueue(Queue, Picker);
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+ void dump(ScheduleDAG *DAG) const {
+ // Emulate pop() without clobbering NodeQueueIds.
+ std::vector<SUnit*> DumpQueue = Queue;
+ SF DumpPicker = Picker;
+ while (!DumpQueue.empty()) {
+ SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
+ if (isBottomUp())
+ dbgs() << "Height " << SU->getHeight() << ": ";
+ else
+ dbgs() << "Depth " << SU->getDepth() << ": ";
+ SU->dump(DAG);
+ }
+ }
+};
+
+typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+BURegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<td_ls_rr_sort>
+TDRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<src_ls_rr_sort>
+SrcRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+HybridBURRPriorityQueue;
+
+typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ILPBURRPriorityQueue;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
/// Smaller number is the higher priority.
@@ -1045,413 +1563,283 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
if (SethiUllmanNumber == 0)
SethiUllmanNumber = 1;
-
+
return SethiUllmanNumber;
}
-namespace {
- template<class SF>
- class RegReductionPriorityQueue : public SchedulingPriorityQueue {
- std::vector<SUnit*> Queue;
- SF Picker;
- unsigned CurQueueId;
- bool TracksRegPressure;
-
- protected:
- // SUnits - The SUnits for the current graph.
- std::vector<SUnit> *SUnits;
-
- MachineFunction &MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const TargetLowering *TLI;
- ScheduleDAGRRList *scheduleDAG;
-
- // SethiUllmanNumbers - The SethiUllman number for each node.
- std::vector<unsigned> SethiUllmanNumbers;
-
- /// RegPressure - Tracking current reg pressure per register class.
- ///
- std::vector<unsigned> RegPressure;
-
- /// RegLimit - Tracking the number of allocatable registers per register
- /// class.
- std::vector<unsigned> RegLimit;
-
- public:
- RegReductionPriorityQueue(MachineFunction &mf,
- bool tracksrp,
- const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri,
- const TargetLowering *tli)
- : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp),
- MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
- if (TracksRegPressure) {
- unsigned NumRC = TRI->getNumRegClasses();
- RegLimit.resize(NumRC);
- RegPressure.resize(NumRC);
- std::fill(RegLimit.begin(), RegLimit.end(), 0);
- std::fill(RegPressure.begin(), RegPressure.end(), 0);
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I)
- RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
- }
- }
-
- void initNodes(std::vector<SUnit> &sunits) {
- SUnits = &sunits;
- // Add pseudo dependency edges for two-address nodes.
- AddPseudoTwoAddrDeps();
- // Reroute edges to nodes with multiple uses.
- PrescheduleNodesWithMultipleUses();
- // Calculate node priorities.
- CalculateSethiUllmanNumbers();
- }
-
- void addNode(const SUnit *SU) {
- unsigned SUSize = SethiUllmanNumbers.size();
- if (SUnits->size() > SUSize)
- SethiUllmanNumbers.resize(SUSize*2, 0);
- CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
- }
-
- void updateNode(const SUnit *SU) {
- SethiUllmanNumbers[SU->NodeNum] = 0;
- CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
- }
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
- void releaseState() {
- SUnits = 0;
- SethiUllmanNumbers.clear();
- std::fill(RegPressure.begin(), RegPressure.end(), 0);
- }
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
- unsigned getNodePriority(const SUnit *SU) const {
- assert(SU->NodeNum < SethiUllmanNumbers.size());
- unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
- if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
- // CopyToReg should be close to its uses to facilitate coalescing and
- // avoid spilling.
- return 0;
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::INSERT_SUBREG)
- // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
- // close to their uses to facilitate coalescing.
- return 0;
- if (SU->NumSuccs == 0 && SU->NumPreds != 0)
- // If SU does not have a register use, i.e. it doesn't produce a value
- // that would be consumed (e.g. store), then it terminates a chain of
- // computation. Give it a large SethiUllman number so it will be
- // scheduled right before its predecessors that it doesn't lengthen
- // their live ranges.
- return 0xffff;
- if (SU->NumPreds == 0 && SU->NumSuccs != 0)
- // If SU does not have a register def, schedule it close to its uses
- // because it does not lengthen any live ranges.
- return 0;
- return SethiUllmanNumbers[SU->NodeNum];
- }
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+}
- unsigned getNodeOrdering(const SUnit *SU) const {
- return scheduleDAG->DAG->GetOrdering(SU->getNode());
- }
+void RegReductionPQBase::addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
- bool empty() const { return Queue.empty(); }
-
- void push(SUnit *U) {
- assert(!U->NodeQueueId && "Node in the queue already");
- U->NodeQueueId = ++CurQueueId;
- Queue.push_back(U);
- }
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
- SUnit *pop() {
- if (empty()) return NULL;
- std::vector<SUnit *>::iterator Best = Queue.begin();
- for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
- E = Queue.end(); I != E; ++I)
- if (Picker(*Best, *I))
- Best = I;
- SUnit *V = *Best;
- if (Best != prior(Queue.end()))
- std::swap(*Best, Queue.back());
- Queue.pop_back();
- V->NodeQueueId = 0;
- return V;
- }
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+ return SethiUllmanNumbers[SU->NodeNum];
+}
- void remove(SUnit *SU) {
- assert(!Queue.empty() && "Queue is empty!");
- assert(SU->NodeQueueId != 0 && "Not in queue!");
- std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
- SU);
- if (I != prior(Queue.end()))
- std::swap(*I, Queue.back());
- Queue.pop_back();
- SU->NodeQueueId = 0;
- }
+//===----------------------------------------------------------------------===//
+// Register Pressure Tracking
+//===----------------------------------------------------------------------===//
- bool HighRegPressure(const SUnit *SU) const {
- if (!TLI)
- return false;
+void RegReductionPQBase::dumpRegPressure() const {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ unsigned Id = RC->getID();
+ unsigned RP = RegPressure[Id];
+ if (!RP) continue;
+ DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+ << '\n');
+ }
+}
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- continue;
- SUnit *PredSU = I->getSUnit();
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- continue;
- }
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] >= RegLimit[RCId])
- return true; // Reg pressure already high.
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- }
- }
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+ if (!TLI)
+ return false;
- return false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned Cost = TLI->getRepRegClassCostFor(VT);
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
}
+ }
+ return false;
+}
- void ScheduledNode(SUnit *SU) {
- if (!TracksRegPressure)
- return;
-
- const SDNode *N = SU->getNode();
- if (!N->isMachineOpcode()) {
- if (N->getOpcode() != ISD::CopyToReg)
- return;
- } else {
- unsigned Opc = N->getMachineOpcode();
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::INSERT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::REG_SEQUENCE ||
- Opc == TargetOpcode::IMPLICIT_DEF)
- return;
- }
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) {
+ const SDNode *N = SU->getNode();
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- continue;
- SUnit *PredSU = I->getSUnit();
- if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
- continue;
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- }
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- }
+ if (!N->isMachineOpcode() || !SU->NumSuccs)
+ return false;
- // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
- // may transfer data dependencies to CopyToReg.
- if (SU->NumSuccs && N->isMachineOpcode()) {
- unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = N->getValueType(i);
- if (!N->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
- // Register pressure tracking is imprecise. This can happen.
- RegPressure[RCId] = 0;
- else
- RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
- }
- }
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = N->getValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ return true;
+ }
+ return false;
+}
+
+void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
- dumpRegPressure();
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ // FIXME: The ScheduleDAG currently loses information about which of a
+ // node's values is consumed by each dependence. Consequently, if the node
+ // defines multiple register classes, we don't know which to pressurize
+ // here. Instead the following loop consumes the register defs in an
+ // arbitrary order. At least it handles the common case of clustered loads
+ // to the same class. For precise liveness, each SDep needs to indicate the
+ // result number. But that tightly couples the ScheduleDAG with the
+ // SelectionDAG making updates tricky. A simpler hack would be to attach a
+ // value type or register class to SDep.
+ //
+ // The most important aspect of register tracking is balancing the increase
+ // here with the reduction further below. Note that this SU may use multiple
+ // defs in PredSU. The can't be determined here, but we've already
+ // compensated by reducing NumRegDefsLeft in PredSU during
+ // ScheduleDAGSDNodes::AddSchedEdges.
+ --PredSU->NumRegDefsLeft;
+ unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs)
+ continue;
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ break;
}
+ }
- void UnscheduledNode(SUnit *SU) {
- if (!TracksRegPressure)
- return;
-
- const SDNode *N = SU->getNode();
- if (!N->isMachineOpcode()) {
- if (N->getOpcode() != ISD::CopyToReg)
- return;
- } else {
- unsigned Opc = N->getMachineOpcode();
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::INSERT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::REG_SEQUENCE ||
- Opc == TargetOpcode::IMPLICIT_DEF)
- return;
- }
+ // We should have this assert, but there may be dead SDNodes that never
+ // materialize as SUnits, so they don't appear to generate liveness.
+ //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+ int SkipRegDefs = (int)SU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs > 0)
+ continue;
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
+ // Register pressure tracking is imprecise. This can happen. But we try
+ // hard not to let it happen because it likely results in poor scheduling.
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
+ RegPressure[RCId] = 0;
+ }
+ else {
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+ dumpRegPressure();
+}
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- continue;
- SUnit *PredSU = I->getSUnit();
- if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
- continue;
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- }
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
- // Register pressure tracking is imprecise. This can happen.
- RegPressure[RCId] = 0;
- else
- RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
- }
- }
+void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
- // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
- // may transfer data dependencies to CopyToReg.
- if (SU->NumSuccs && N->isMachineOpcode()) {
- unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
- EVT VT = N->getValueType(i);
- if (VT == MVT::Flag || VT == MVT::Other)
- continue;
- if (!N->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+ // counts data deps.
+ if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
}
-
- dumpRegPressure();
+ continue;
}
-
- void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
- scheduleDAG = scheduleDag;
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+ EVT VT = PN->getOperand(0).getValueType();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
}
-
- void dumpRegPressure() const {
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
- unsigned Id = RC->getID();
- unsigned RP = RegPressure[Id];
- if (!RP) continue;
- DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
- << '\n');
- }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = PN->getValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
}
+ }
- protected:
- bool canClobber(const SUnit *SU, const SUnit *Op);
- void AddPseudoTwoAddrDeps();
- void PrescheduleNodesWithMultipleUses();
- void CalculateSethiUllmanNumbers();
- };
-
- typedef RegReductionPriorityQueue<bu_ls_rr_sort>
- BURegReductionPriorityQueue;
-
- typedef RegReductionPriorityQueue<td_ls_rr_sort>
- TDRegReductionPriorityQueue;
-
- typedef RegReductionPriorityQueue<src_ls_rr_sort>
- SrcRegReductionPriorityQueue;
-
- typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
- HybridBURRPriorityQueue;
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
- typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
- ILPBURRPriorityQueue;
+ dumpRegPressure();
}
+//===----------------------------------------------------------------------===//
+// Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
/// closestSucc - Returns the scheduled cycle of the successor which is
/// closest to the current cycle.
static unsigned closestSucc(const SUnit *SU) {
@@ -1483,9 +1871,123 @@ static unsigned calcMaxScratches(const SUnit *SU) {
return Scratches;
}
-template <typename RRSort>
-static bool BURRSort(const SUnit *left, const SUnit *right,
- const RegReductionPriorityQueue<RRSort> *SPQ) {
+/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+/// UnitsSharePred - Return true if the two scheduling units share a common
+/// data predecessor.
+static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
+ SmallSet<const SUnit*, 4> Preds;
+ for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ Preds.insert(I->getSUnit());
+ }
+ for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (Preds.count(I->getSUnit()))
+ return true;
+ }
+ return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+ if ((int)SPQ->getCurCycle() < Height) return true;
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return true;
+ return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+ RegReductionPQBase *SPQ) {
+ // If the two nodes share an operand and one of them has a single
+ // use that is a live out copy, favor the one that is live out. Otherwise
+ // it will be difficult to eliminate the copy if the instruction is a
+ // loop induction variable update. e.g.
+ // BB:
+ // sub r1, r3, #1
+ // str r0, [r2, r3]
+ // mov r3, r1
+ // cmp
+ // bne BB
+ bool SharePred = UnitsSharePred(left, right);
+ // FIXME: Only adjust if BB is a loop back edge.
+ // FIXME: What's the cost of a copy?
+ int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
+ int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
+ int LHeight = (int)left->getHeight() - LBonus;
+ int RHeight = (int)right->getHeight() - RBonus;
+
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+ BUHasStall(left, LHeight, SPQ);
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+ BUHasStall(right, RHeight, SPQ);
+
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort
+ // them according to their height.
+ if (LStall) {
+ if (!RStall)
+ return 1;
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ } else if (RStall)
+ return -1;
+
+ // If either node is scheduling for latency, sort them by height/depth
+ // and latency.
+ if (!checkPref || (left->SchedulingPref == Sched::Latency ||
+ right->SchedulingPref == Sched::Latency)) {
+ if (DisableSchedCycles) {
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ }
+ else {
+ // If neither instruction stalls (!LStall && !RStall) then
+ // it's height is already covered so only its depth matters. We also reach
+ // this if both stall but have the same height.
+ unsigned LDepth = left->getDepth();
+ unsigned RDepth = right->getDepth();
+ if (LDepth != RDepth) {
+ DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
+ }
+ }
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency ? 1 : -1;
+ }
+ return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);
if (LPriority != RPriority)
@@ -1519,24 +2021,31 @@ static bool BURRSort(const SUnit *left, const SUnit *right,
if (LScratch != RScratch)
return LScratch > RScratch;
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
-
- if (left->getDepth() != right->getDepth())
- return left->getDepth() < right->getDepth();
+ if (!DisableSchedCycles) {
+ int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ else {
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
- assert(left->NodeQueueId && right->NodeQueueId &&
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+ }
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
return (left->NodeQueueId > right->NodeQueueId);
}
// Bottom up
-bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
return BURRSort(left, right, SPQ);
}
// Source order, otherwise bottom up.
-bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
unsigned LOrder = SPQ->getNodeOrdering(left);
unsigned ROrder = SPQ->getNodeOrdering(right);
@@ -1548,49 +2057,69 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
return BURRSort(left, right, SPQ);
}
-bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ static const unsigned ReadyDelay = 3;
+
+ if (SPQ->MayReduceRegPressure(SU)) return true;
+
+ if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
bool LHigh = SPQ->HighRegPressure(left);
bool RHigh = SPQ->HighRegPressure(right);
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
- if (LHigh && !RHigh)
+ if (LHigh && !RHigh) {
+ DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
+ << right->NodeNum << ")\n");
return true;
- else if (!LHigh && RHigh)
+ }
+ else if (!LHigh && RHigh) {
+ DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
+ << left->NodeNum << ")\n");
return false;
+ }
else if (!LHigh && !RHigh) {
- // Low register pressure situation, schedule for latency if possible.
- bool LStall = left->SchedulingPref == Sched::Latency &&
- SPQ->getCurCycle() < left->getHeight();
- bool RStall = right->SchedulingPref == Sched::Latency &&
- SPQ->getCurCycle() < right->getHeight();
- // If scheduling one of the node will cause a pipeline stall, delay it.
- // If scheduling either one of the node will cause a pipeline stall, sort
- // them according to their height.
- // If neither will cause a pipeline stall, try to reduce register pressure.
- if (LStall) {
- if (!RStall)
- return true;
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
- } else if (RStall)
- return false;
-
- // If either node is scheduling for latency, sort them by height and latency
- // first.
- if (left->SchedulingPref == Sched::Latency ||
- right->SchedulingPref == Sched::Latency) {
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
- if (left->Latency != right->Latency)
- return left->Latency > right->Latency;
- }
+ int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
}
-
return BURRSort(left, right, SPQ);
}
-bool ilp_ls_rr_sort::operator()(const SUnit *left,
- const SUnit *right) const {
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ if (SU->getHeight() > CurCycle) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return SU->getHeight() <= CurCycle;
+}
+
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
bool LHigh = SPQ->HighRegPressure(left);
bool RHigh = SPQ->HighRegPressure(right);
// Avoid causing spills. If register pressure is high, schedule for
@@ -1611,9 +2140,11 @@ bool ilp_ls_rr_sort::operator()(const SUnit *left,
return BURRSort(left, right, SPQ);
}
-template<class SF>
-bool
-RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
+//===----------------------------------------------------------------------===//
+// Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
if (SU->isTwoAddress) {
unsigned Opc = SU->getNode()->getMachineOpcode();
const TargetInstrDesc &TID = TII->get(Opc);
@@ -1631,19 +2162,6 @@ RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
return false;
}
-/// hasCopyToRegUse - Return true if SU has a value successor that is a
-/// CopyToReg node.
-static bool hasCopyToRegUse(const SUnit *SU) {
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isCtrl()) continue;
- const SUnit *SuccSU = I->getSUnit();
- if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
- return true;
- }
- return false;
-}
-
/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
/// physical register defs.
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
@@ -1654,7 +2172,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
assert(ImpDefs && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
- SUNode = SUNode->getFlaggedNode()) {
+ SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
const unsigned *SUImpDefs =
@@ -1663,7 +2181,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
return false;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
- if (VT == MVT::Flag || VT == MVT::Other)
+ if (VT == MVT::Glue || VT == MVT::Other)
continue;
if (!N->hasAnyUseOfValue(i))
continue;
@@ -1709,8 +2227,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
/// after N, which shortens the U->N live range, reducing
/// register pressure.
///
-template<class SF>
-void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// Visit all the nodes in topological order, working top-down.
for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
SUnit *SU = &(*SUnits)[i];
@@ -1748,7 +2265,7 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
if (PredSU->NumSuccs == 1)
continue;
// Avoid prescheduling to copies from virtual registers, which don't behave
- // like other nodes from the perspective of scheduling // heuristics.
+ // like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU->getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
TargetRegisterInfo::isVirtualRegister
@@ -1802,17 +2319,17 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
/// one that has a CopyToReg use (more likely to be a loop induction update).
/// If both are two-address, but one is commutable while the other is not
/// commutable, favor the one that's not commutable.
-template<class SF>
-void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
SUnit *SU = &(*SUnits)[i];
if (!SU->isTwoAddress)
continue;
SDNode *Node = SU->getNode();
- if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
+ if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
continue;
+ bool isLiveOut = hasOnlyLiveOutUses(SU);
unsigned Opc = Node->getMachineOpcode();
const TargetInstrDesc &TID = TII->get(Opc);
unsigned NumRes = TID.getNumDefs();
@@ -1862,7 +2379,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
SuccOpc == TargetOpcode::SUBREG_TO_REG)
continue;
if ((!canClobber(SuccSU, DUSU) ||
- (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
+ (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
(!SU->isCommutable && SuccSU->isCommutable)) &&
!scheduleDAG->IsReachable(SuccSU, SU)) {
DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
@@ -1877,20 +2394,10 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
}
}
-/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
-/// scheduling units.
-template<class SF>
-void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
- SethiUllmanNumbers.assign(SUnits->size(), 0);
-
- for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
- CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
-}
-
/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
/// predecessors of the successors of the SUnit SU. Stop when the provided
/// limit is exceeded.
-static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
unsigned Limit) {
unsigned Sum = 0;
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -1942,7 +2449,7 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
if (left->NumSuccsLeft != right->NumSuccsLeft)
return left->NumSuccsLeft > right->NumSuccsLeft;
- assert(left->NodeQueueId && right->NodeQueueId &&
+ assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
return (left->NodeQueueId > right->NodeQueueId);
}
@@ -1952,68 +2459,74 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
//===----------------------------------------------------------------------===//
llvm::ScheduleDAGSDNodes *
-llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
+
BURegReductionPriorityQueue *PQ =
new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
+
TDRegReductionPriorityQueue *PQ =
new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
+
SrcRegReductionPriorityQueue *PQ =
new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
const TargetLowering *TLI = &IS->getTargetLowering();
-
+
HybridBURRPriorityQueue *PQ =
new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
const TargetLowering *TLI = &IS->getTargetLowering();
-
+
ILPBURRPriorityQueue *PQ =
new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index f1bf82a..477c1ff 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -34,8 +34,8 @@ using namespace llvm;
STATISTIC(LoadsClustered, "Number of loads clustered together");
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf) {
-}
+ : ScheduleDAG(mf),
+ InstrItins(mf.getTarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
@@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SUnit *SU = NewSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
SU->Latency = Old->Latency;
+ SU->isCall = Old->isCall;
SU->isTwoAddress = Old->isTwoAddress;
SU->isCommutable = Old->isCommutable;
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
@@ -85,7 +86,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
/// a specified operand is a physical register dependency. If so, returns the
/// register and the cost of copying the register.
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
- const TargetRegisterInfo *TRI,
+ const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
unsigned &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
@@ -108,29 +109,28 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
}
}
-static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
- SelectionDAG *DAG) {
+static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
SmallVector<EVT, 4> VTs;
- SDNode *FlagDestNode = Flag.getNode();
+ SDNode *GlueDestNode = Glue.getNode();
- // Don't add a flag from a node to itself.
- if (FlagDestNode == N) return;
+ // Don't add glue from a node to itself.
+ if (GlueDestNode == N) return;
- // Don't add a flag to something which already has a flag.
- if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return;
+ // Don't add glue to something which already has glue.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return;
for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
VTs.push_back(N->getValueType(I));
- if (AddFlag)
- VTs.push_back(MVT::Flag);
+ if (AddGlue)
+ VTs.push_back(MVT::Glue);
SmallVector<SDValue, 4> Ops;
for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
Ops.push_back(N->getOperand(I));
- if (FlagDestNode)
- Ops.push_back(Flag);
+ if (GlueDestNode)
+ Ops.push_back(Glue);
SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
MachineSDNode::mmo_iterator Begin = 0, End = 0;
@@ -149,9 +149,9 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
MN->setMemRefs(Begin, End);
}
-/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
/// This function finds loads of the same base and different offsets. If the
-/// offsets are not far apart (target specific), it add MVT::Flag inputs and
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
@@ -213,20 +213,20 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
if (NumLoads == 0)
return;
- // Cluster loads by adding MVT::Flag outputs and inputs. This also
+ // Cluster loads by adding MVT::Glue outputs and inputs. This also
// ensure they are scheduled in order of increasing addresses.
SDNode *Lead = Loads[0];
- AddFlags(Lead, SDValue(0, 0), true, DAG);
+ AddGlue(Lead, SDValue(0, 0), true, DAG);
- SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1);
+ SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1);
for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
- bool OutFlag = I < E - 1;
+ bool OutGlue = I < E - 1;
SDNode *Load = Loads[I];
- AddFlags(Load, InFlag, OutFlag, DAG);
+ AddGlue(Load, InGlue, OutGlue, DAG);
- if (OutFlag)
- InFlag = SDValue(Load, Load->getNumValues() - 1);
+ if (OutGlue)
+ InGlue = SDValue(Load, Load->getNumValues() - 1);
++LoadsClustered;
}
@@ -266,68 +266,75 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// FIXME: Multiply by 2 because we may clone nodes during scheduling.
// This is a temporary workaround.
SUnits.reserve(NumNodes * 2);
-
+
// Add all nodes in depth first order.
SmallVector<SDNode*, 64> Worklist;
SmallPtrSet<SDNode*, 64> Visited;
Worklist.push_back(DAG->getRoot().getNode());
Visited.insert(DAG->getRoot().getNode());
-
+
while (!Worklist.empty()) {
SDNode *NI = Worklist.pop_back_val();
-
+
// Add all operands to the worklist unless they've already been added.
for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
if (Visited.insert(NI->getOperand(i).getNode()))
Worklist.push_back(NI->getOperand(i).getNode());
-
+
if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
continue;
-
+
// If this node has already been processed, stop now.
if (NI->getNodeId() != -1) continue;
-
+
SUnit *NodeSUnit = NewSUnit(NI);
-
- // See if anything is flagged to this node, if so, add them to flagged
- // nodes. Nodes can have at most one flag input and one flag output. Flags
- // are required to be the last operand and result of a node.
-
- // Scan up to find flagged preds.
+
+ // See if anything is glued to this node, if so, add them to glued
+ // nodes. Nodes can have at most one glue input and one glue output. Glue
+ // is required to be the last operand and result of a node.
+
+ // Scan up to find glued preds.
SDNode *N = NI;
while (N->getNumOperands() &&
- N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
N = N->getOperand(N->getNumOperands()-1).getNode();
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
}
-
- // Scan down to find any flagged succs.
+
+ // Scan down to find any glued succs.
N = NI;
- while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
- SDValue FlagVal(N, N->getNumValues()-1);
-
- // There are either zero or one users of the Flag result.
- bool HasFlagUse = false;
- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+ SDValue GlueVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Glue result.
+ bool HasGlueUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
UI != E; ++UI)
- if (FlagVal.isOperandOf(*UI)) {
- HasFlagUse = true;
+ if (GlueVal.isOperandOf(*UI)) {
+ HasGlueUse = true;
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
N = *UI;
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
break;
}
- if (!HasFlagUse) break;
+ if (!HasGlueUse) break;
}
-
- // If there are flag operands involved, N is now the bottom-most node
- // of the sequence of nodes that are flagged together.
+
+ // If there are glue operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are glued together.
// Update the SUnit.
NodeSUnit->setNode(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
+ // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+ InitNumRegDefsLeft(NodeSUnit);
+
// Assign the Latency field of NodeSUnit using target-provided information.
ComputeLatency(NodeSUnit);
}
@@ -343,7 +350,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
SUnit *SU = &SUnits[su];
SDNode *MainNode = SU->getNode();
-
+
if (MainNode->isMachineOpcode()) {
unsigned Opc = MainNode->getMachineOpcode();
const TargetInstrDesc &TID = TII->get(Opc);
@@ -356,9 +363,9 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (TID.isCommutable())
SU->isCommutable = true;
}
-
+
// Find all predecessors and successors of the group.
- for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
if (N->isMachineOpcode() &&
TII->get(N->getMachineOpcode()).getImplicitDefs()) {
SU->hasPhysRegClobbers = true;
@@ -368,7 +375,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
SU->hasPhysRegDefs = true;
}
-
+
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDNode *OpN = N->getOperand(i).getNode();
if (isPassiveNode(OpN)) continue; // Not scheduled.
@@ -377,7 +384,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (OpSU == SU) continue; // In the same group.
EVT OpVT = N->getOperand(i).getValueType();
- assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+ assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
bool isChain = OpVT == MVT::Other;
unsigned PhysReg = 0;
@@ -403,7 +410,13 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
- SU->addPred(dep);
+ if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) {
+ // Multiple register uses are combined in the same SUnit. For example,
+ // we could have a set of glued nodes with all their defs consumed by
+ // another set of glued nodes. Register pressure tracking sees this as
+ // a single use, so to keep pressure balanced we reduce the defs.
+ --OpSU->NumRegDefsLeft;
+ }
}
}
}
@@ -412,7 +425,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
-/// flagged together nodes with a single SUnit.
+/// glued together nodes with a single SUnit.
void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
// Cluster certain nodes which should be scheduled together.
ClusterNodes();
@@ -422,6 +435,69 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
AddSchedEdges();
}
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+ if (!Node->isMachineOpcode()) {
+ if (Node->getOpcode() == ISD::CopyFromReg)
+ NodeNumDefs = 1;
+ else
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned POpc = Node->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF) {
+ // No register need be allocated for this.
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+ // Some instructions define regs that are not represented in the selection DAG
+ // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+ NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+ DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+ const ScheduleDAGSDNodes *SD)
+ : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+ InitNodeNumDefs();
+ Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+ for (;Node;) { // Visit all glued nodes.
+ for (;DefIdx < NodeNumDefs; ++DefIdx) {
+ if (!Node->hasAnyUseOfValue(DefIdx))
+ continue;
+ if (Node->isMachineOpcode() &&
+ Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) {
+ // Propagate the incoming (full-register) type. I doubt it's needed.
+ ValueType = Node->getOperand(0).getValueType();
+ }
+ else {
+ ValueType = Node->getValueType(DefIdx);
+ }
+ ++DefIdx;
+ return; // Found a normal regdef.
+ }
+ Node = Node->getGluedNode();
+ if (Node == NULL) {
+ return; // No values left to visit.
+ }
+ InitNodeNumDefs();
+ }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+ assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+ for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+ assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+ ++SU->NumRegDefsLeft;
+ }
+}
+
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
// Check to see if the scheduler cares about latencies.
if (ForceUnitLatencies()) {
@@ -429,20 +505,17 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
return;
}
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- if (InstrItins.isEmpty()) {
+ if (!InstrItins || InstrItins->isEmpty()) {
SU->Latency = 1;
return;
}
-
+
// Compute the latency for the node. We use the sum of the latencies for
- // all nodes flagged together into this SUnit.
+ // all nodes glued together into this SUnit.
SU->Latency = 0;
- for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
- if (N->isMachineOpcode()) {
- SU->Latency += InstrItins.
- getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
- }
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode())
+ SU->Latency += TII->getInstrLatency(InstrItins, N);
}
void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
@@ -451,32 +524,25 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
if (ForceUnitLatencies())
return;
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- if (InstrItins.isEmpty())
- return;
-
if (dep.getKind() != SDep::Data)
return;
unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
- if (Def->isMachineOpcode()) {
- const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
- if (DefIdx >= II.getNumDefs())
- return;
- int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
- if (DefCycle < 0)
- return;
- int UseCycle = 1;
- if (Use->isMachineOpcode()) {
- const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
- UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
- }
- if (UseCycle >= 0) {
- int Latency = DefCycle - UseCycle + 1;
- if (Latency >= 0)
- dep.setLatency(Latency);
- }
+ if (Use->isMachineOpcode())
+ // Adjust the use operand index by num of defs.
+ OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+ int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ !BB->succ_empty()) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ // This copy is a liveout value. It is likely coalesced, so reduce the
+ // latency so not to penalize the def.
+ // FIXME: need target specific adjustment here?
+ Latency = (Latency > 1) ? Latency - 1 : 1;
}
+ if (Latency >= 0)
+ dep.setLatency(Latency);
}
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
@@ -487,14 +553,14 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
SU->getNode()->dump(DAG);
dbgs() << "\n";
- SmallVector<SDNode *, 4> FlaggedNodes;
- for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
- FlaggedNodes.push_back(N);
- while (!FlaggedNodes.empty()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
dbgs() << " ";
- FlaggedNodes.back()->dump(DAG);
+ GluedNodes.back()->dump(DAG);
dbgs() << "\n";
- FlaggedNodes.pop_back();
+ GluedNodes.pop_back();
}
}
@@ -507,37 +573,25 @@ namespace {
};
}
-// ProcessSourceNode - Process nodes with source order numbers. These are added
-// to a vector which EmitSchedule uses to determine how to insert dbg_value
-// instructions in the right order.
-static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
- InstrEmitter &Emitter,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node.
+static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
- SmallSet<unsigned, 8> &Seen) {
- unsigned Order = DAG->GetOrdering(N);
- if (!Order || !Seen.insert(Order))
- return;
-
- MachineBasicBlock *BB = Emitter.getBlock();
- if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
- // Did not insert any instruction.
- Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
- return;
- }
-
- Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ unsigned Order) {
if (!N->getHasDebugValue())
return;
+
// Opportunistically insert immediate dbg_value uses, i.e. those with source
// order number right after the N.
+ MachineBasicBlock *BB = Emitter.getBlock();
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
if (DVs[i]->isInvalidated())
continue;
unsigned DVOrder = DVs[i]->getOrder();
- if (DVOrder == ++Order) {
+ if (!Order || DVOrder == ++Order) {
MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
if (DbgMI) {
Orders.push_back(std::make_pair(DVOrder, DbgMI));
@@ -548,6 +602,33 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
}
}
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+ SmallSet<unsigned, 8> &Seen) {
+ unsigned Order = DAG->GetOrdering(N);
+ if (!Order || !Seen.insert(Order)) {
+ // Process any valid SDDbgValues even if node does not have any order
+ // assigned.
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+ return;
+ }
+
+ MachineBasicBlock *BB = Emitter.getBlock();
+ if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
+ // Did not insert any instruction.
+ Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+ return;
+ }
+
+ Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
/// EmitSchedule - Emit the machine code in scheduled order.
MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
@@ -578,25 +659,25 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
}
// For pre-regalloc scheduling, create instructions corresponding to the
- // SDNode and any flagged SDNodes and append them to the block.
+ // SDNode and any glued SDNodes and append them to the block.
if (!SU->getNode()) {
// Emit a copy.
EmitPhysRegCopy(SU, CopyVRBaseMap);
continue;
}
- SmallVector<SDNode *, 4> FlaggedNodes;
- for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
- N = N->getFlaggedNode())
- FlaggedNodes.push_back(N);
- while (!FlaggedNodes.empty()) {
- SDNode *N = FlaggedNodes.back();
- Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N;
+ N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ SDNode *N = GluedNodes.back();
+ Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
- FlaggedNodes.pop_back();
+ GluedNodes.pop_back();
}
Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
VRBaseMap);
@@ -625,16 +706,8 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// Insert all SDDbgValue's whose order(s) are before "Order".
if (!MI)
continue;
-#ifndef NDEBUG
- unsigned LastDIOrder = 0;
-#endif
for (; DI != DE &&
(*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
-#ifndef NDEBUG
- assert((*DI)->getOrder() >= LastDIOrder &&
- "SDDbgValue nodes must be in source order!");
- LastDIOrder = (*DI)->getOrder();
-#endif
if ((*DI)->isInvalidated())
continue;
MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 842fc8c..cc7310e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -20,13 +20,13 @@
namespace llvm {
/// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
- ///
+ ///
/// Edges between SUnits are initially based on edges in the SelectionDAG,
/// and additional edges can be added by the schedulers as heuristics.
/// SDNodes such as Constants, Registers, and a few others that are not
/// interesting to schedulers are not allocated SUnits.
///
- /// SDNodes with MVT::Flag operands are grouped along with the flagged
+ /// SDNodes with MVT::Glue operands are grouped along with the flagged
/// nodes into a single SUnit so that they are scheduled together.
///
/// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
@@ -36,6 +36,7 @@ namespace llvm {
class ScheduleDAGSDNodes : public ScheduleDAG {
public:
SelectionDAG *DAG; // DAG of the current basic block
+ const InstrItineraryData *InstrItins;
explicit ScheduleDAGSDNodes(MachineFunction &mf);
@@ -72,13 +73,17 @@ namespace llvm {
/// predecessors / successors info nor the temporary scheduling states.
///
SUnit *Clone(SUnit *N);
-
+
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
virtual void BuildSchedGraph(AliasAnalysis *AA);
+ /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+ ///
+ void InitNumRegDefsLeft(SUnit *SU);
+
/// ComputeLatency - Compute node latency.
///
virtual void ComputeLatency(SUnit *SU);
@@ -105,6 +110,30 @@ namespace llvm {
virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+ /// RegDefIter - In place iteration over the values defined by an
+ /// SUnit. This does not need copies of the iterator or any other STLisms.
+ /// The iterator creates itself, rather than being provided by the SchedDAG.
+ class RegDefIter {
+ const ScheduleDAGSDNodes *SchedDAG;
+ const SDNode *Node;
+ unsigned DefIdx;
+ unsigned NodeNumDefs;
+ EVT ValueType;
+ public:
+ RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+ bool IsValid() const { return Node != NULL; }
+
+ EVT GetValue() const {
+ assert(IsValid() && "bad iterator");
+ return ValueType;
+ }
+
+ void Advance();
+ private:
+ void InitNodeNumDefs();
+ };
+
private:
/// ClusterNeighboringLoads - Cluster loads from "near" addresses into
/// combined SUnits.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ad06ebd..2fb2f2d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -31,7 +31,6 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetOptions.h"
@@ -44,7 +43,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -111,7 +110,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
/// BUILD_VECTOR where all of the elements are ~0 or undef.
bool ISD::isBuildVectorAllOnes(const SDNode *N) {
// Look through a bit convert.
- if (N->getOpcode() == ISD::BIT_CONVERT)
+ if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -152,7 +151,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
/// BUILD_VECTOR where all of the elements are 0 or undef.
bool ISD::isBuildVectorAllZeros(const SDNode *N) {
// Look through a bit convert.
- if (N->getOpcode() == ISD::BIT_CONVERT)
+ if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -199,6 +198,8 @@ bool ISD::isScalarToVector(const SDNode *N) {
if (N->getOperand(0).getOpcode() == ISD::UNDEF)
return false;
unsigned NumElems = N->getNumOperands();
+ if (NumElems == 1)
+ return false;
for (unsigned i = 1; i < NumElems; ++i) {
SDValue V = N->getOperand(i);
if (V.getOpcode() != ISD::UNDEF)
@@ -489,7 +490,7 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
/// doNotCSE - Return true if CSE should not be performed for this node.
static bool doNotCSE(SDNode *N) {
- if (N->getValueType(0) == MVT::Flag)
+ if (N->getValueType(0) == MVT::Glue)
return true; // Never CSE anything that produces a flag.
switch (N->getOpcode()) {
@@ -501,7 +502,7 @@ static bool doNotCSE(SDNode *N) {
// Check that remaining values produced are not flags.
for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
- if (N->getValueType(i) == MVT::Flag)
+ if (N->getValueType(i) == MVT::Glue)
return true; // Never CSE anything that produces a flag.
return false;
@@ -609,9 +610,6 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
bool Erased = false;
switch (N->getOpcode()) {
- case ISD::EntryToken:
- llvm_unreachable("EntryToken should not be in CSEMaps!");
- return false;
case ISD::HANDLENODE: return false; // noop.
case ISD::CONDCODE:
assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
@@ -641,6 +639,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
}
default:
// Remove it from the CSE Map.
+ assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+ assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
Erased = CSEMap.RemoveNode(N);
break;
}
@@ -648,7 +648,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
// Verify that the node was actually in one of the CSE maps, unless it has a
// flag result (which cannot be CSE'd) or is one of the special cases that are
// not subject to CSE.
- if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
!N->isMachineOpcode() && !doNotCSE(N)) {
N->dump(this);
dbgs() << "\n";
@@ -743,8 +743,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
return Node;
}
-/// VerifyNode - Sanity check the given node. Aborts if it is invalid.
-void SelectionDAG::VerifyNode(SDNode *N) {
+#ifndef NDEBUG
+/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid.
+static void VerifyNodeCommon(SDNode *N) {
switch (N->getOpcode()) {
default:
break;
@@ -778,6 +779,44 @@ void SelectionDAG::VerifyNode(SDNode *N) {
}
}
+/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+ // The SDNode allocators cannot be used to allocate nodes with fields that are
+ // not present in an SDNode!
+ assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
+ assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
+ assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
+ assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
+ assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
+ assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
+ assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
+ assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
+ assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
+ assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
+ assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
+ assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
+ assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
+ assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
+ assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
+ assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
+ assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
+ assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
+ assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
+
+ VerifyNodeCommon(N);
+}
+
+/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is
+/// invalid.
+static void VerifyMachineNode(SDNode *N) {
+ // The MachineNode allocators cannot be used to allocate nodes with fields
+ // that are not present in a MachineNode!
+ // Currently there are no such nodes.
+
+ VerifyNodeCommon(N);
+}
+#endif // NDEBUG
+
/// getEVTAlignment - Compute the default alignment value for the
/// given type.
///
@@ -1315,7 +1354,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
-
+
SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -1365,11 +1404,11 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
ID.AddPointer(MD);
-
+
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
-
+
SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -1613,7 +1652,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// Also compute a conserative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
- KnownOne.clear();
+ KnownOne.clearAllBits();
unsigned TrailZ = KnownZero.countTrailingOnes() +
KnownZero2.countTrailingOnes();
unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
@@ -1636,8 +1675,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
AllOnes, KnownZero2, KnownOne2, Depth+1);
unsigned LeadZ = KnownZero2.countLeadingOnes();
- KnownOne2.clear();
- KnownZero2.clear();
+ KnownOne2.clearAllBits();
+ KnownZero2.clearAllBits();
ComputeMaskedBits(Op.getOperand(1),
AllOnes, KnownZero2, KnownOne2, Depth+1);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
@@ -1765,7 +1804,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// If the sign extended bits are demanded, we know that the sign
// bit is demanded.
- InSignBit.zext(BitWidth);
+ InSignBit = InSignBit.zext(BitWidth);
if (NewBits.getBoolValue())
InputDemandedBits |= InSignBit;
@@ -1792,7 +1831,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::CTPOP: {
unsigned LowBits = Log2_32(BitWidth)+1;
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
- KnownOne.clear();
+ KnownOne.clearAllBits();
return;
}
case ISD::LOAD: {
@@ -1808,13 +1847,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask;
- InMask.trunc(InBits);
- KnownZero.trunc(InBits);
- KnownOne.trunc(InBits);
+ APInt InMask = Mask.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
return;
}
@@ -1823,16 +1861,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt InSignBit = APInt::getSignBit(InBits);
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask;
- InMask.trunc(InBits);
+ APInt InMask = Mask.trunc(InBits);
// If any of the sign extended bits are demanded, we know that the sign
// bit is demanded. Temporarily set this bit in the mask for our callee.
if (NewBits.getBoolValue())
InMask |= InSignBit;
- KnownZero.trunc(InBits);
- KnownOne.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
// Note if the sign bit is known to be zero or one.
@@ -1844,13 +1881,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// If the sign bit wasn't actually demanded by our caller, we don't
// want it set in the KnownZero and KnownOne result values. Reset the
// mask and reapply it to the result values.
- InMask = Mask;
- InMask.trunc(InBits);
+ InMask = Mask.trunc(InBits);
KnownZero &= InMask;
KnownOne &= InMask;
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
// If the sign bit is known zero or one, the top bits match.
if (SignBitKnownZero)
@@ -1862,26 +1898,24 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::ANY_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask;
- InMask.trunc(InBits);
- KnownZero.trunc(InBits);
- KnownOne.trunc(InBits);
+ APInt InMask = Mask.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
return;
}
case ISD::TRUNCATE: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask;
- InMask.zext(InBits);
- KnownZero.zext(InBits);
- KnownOne.zext(InBits);
+ APInt InMask = Mask.zext(InBits);
+ KnownZero = KnownZero.zext(InBits);
+ KnownOne = KnownOne.zext(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.trunc(BitWidth);
- KnownOne.trunc(BitWidth);
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
break;
}
case ISD::AssertZext: {
@@ -1921,7 +1955,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
}
}
// fall through
- case ISD::ADD: {
+ case ISD::ADD:
+ case ISD::ADDE: {
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
// low 3 bits clear.
@@ -1936,7 +1971,17 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownZeroOut = std::min(KnownZeroOut,
KnownZero2.countTrailingOnes());
- KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ if (Op.getOpcode() == ISD::ADD) {
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ return;
+ }
+
+ // With ADDE, a carry bit may be added in, so we can only use this
+ // information if we know (at least) that the low two bits are clear. We
+ // then return to the caller that the low bit is unknown but that other bits
+ // are known zero.
+ if (KnownZeroOut >= 2) // ADDE
+ KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
return;
}
case ISD::SREM:
@@ -1991,10 +2036,19 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
- KnownOne.clear();
+ KnownOne.clearAllBits();
KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
return;
}
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ if (unsigned Align = InferPtrAlignment(Op)) {
+ // The low bits are known zero if the pointer is aligned.
+ KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+ return;
+ }
+ break;
+
default:
// Allow the target to implement this method for its nodes.
if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
@@ -2234,6 +2288,25 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
}
+/// isBaseWithConstantOffset - Return true if the specified operand is an
+/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+/// semantics as an ADD. This handles the equivalence:
+/// X|Cst == X+Cst iff X&Cst = 0.
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+ if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+
+ if (Op.getOpcode() == ISD::OR &&
+ !MaskedValueIsZero(Op.getOperand(0),
+ cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+ return false;
+
+ return true;
+}
+
+
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
if (NoNaNsFPMath)
@@ -2295,7 +2368,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -2308,23 +2381,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
switch (Opcode) {
default: break;
case ISD::SIGN_EXTEND:
- return getConstant(APInt(Val).sextOrTrunc(VT.getSizeInBits()), VT);
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::TRUNCATE:
- return getConstant(APInt(Val).zextOrTrunc(VT.getSizeInBits()), VT);
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
- const uint64_t zero[] = {0, 0};
// No compile time operations on ppcf128.
if (VT == MVT::ppcf128) break;
- APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+ APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
APFloat::rmNearestTiesToEven);
return getConstantFP(apf, VT);
}
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
return getConstantFP(Val.bitsToFloat(), VT);
else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
@@ -2375,7 +2447,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
APInt api(VT.getSizeInBits(), 2, x);
return getConstant(api, VT);
}
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
@@ -2477,13 +2549,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
return Operand.getNode()->getOperand(0);
}
break;
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
// Basic sanity checking.
assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
- && "Cannot BIT_CONVERT between types of different sizes!");
+ && "Cannot BITCAST between types of different sizes!");
if (VT == Operand.getValueType()) return Operand; // noop conversion.
- if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x)
- return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
@@ -2519,7 +2591,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+ if (VT != MVT::Glue) { // Don't CSE flag producing nodes
FoldingSetNodeID ID;
SDValue Ops[1] = { Operand };
AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
@@ -2535,7 +2607,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -2676,6 +2748,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
"Shift operators return type must be the same as their first arg");
assert(VT.isInteger() && N2.getValueType().isInteger() &&
"Shifts only work on integers");
+ // Verify that the shift amount VT is bit enough to hold valid shift
+ // amounts. This catches things like trying to shift an i1024 value by an
+ // i8, which is easy to fall into in generic code that uses
+ // TLI.getShiftAmount().
+ assert(N2.getValueType().getSizeInBits() >=
+ Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+ "Invalid use of small shift amount with oversized value!");
// Always fold shifts of i1 values so the code generator doesn't need to
// handle them. Since we know the size of the shift has to be less than the
@@ -2820,11 +2899,30 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
return getConstant(ShiftedVal.trunc(ElementSize), VT);
}
break;
- case ISD::EXTRACT_SUBVECTOR:
- if (N1.getValueType() == VT) // Trivial extraction.
- return N1;
+ case ISD::EXTRACT_SUBVECTOR: {
+ SDValue Index = N2;
+ if (VT.isSimple() && N1.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Extract subvector VTs must be a vectors!");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+ "Extract subvector VTs must have the same element type!");
+ assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Extract subvector must be from larger vector to smaller vector!");
+
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((VT.getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= N1.getValueType().getVectorNumElements())
+ && "Extract subvector overflow!");
+ }
+
+ // Trivial extraction.
+ if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+ return N1;
+ }
break;
}
+ }
if (N1C) {
if (N2C) {
@@ -2961,7 +3059,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
// Memoize this node if possible.
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) {
+ if (VT != MVT::Glue) {
SDValue Ops[] = { N1, N2 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
@@ -2977,7 +3075,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -3019,7 +3117,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
break;
- case ISD::BIT_CONVERT:
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue Index = N3;
+ if (VT.isSimple() && N1.getValueType().isSimple()
+ && N2.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ N2.getValueType().isVector() &&
+ "Insert subvector VTs must be a vectors");
+ assert(VT == N1.getValueType() &&
+ "Dest and insert subvector source types must match!");
+ assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Insert subvector must be from smaller vector to larger vector!");
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((N2.getValueType().getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= VT.getVectorNumElements())
+ && "Insert subvector overflow!");
+ }
+
+ // Trivial insertion.
+ if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+ return N2;
+ }
+ break;
+ }
+ case ISD::BITCAST:
// Fold bit_convert nodes from a type to themselves.
if (N1.getValueType() == VT)
return N1;
@@ -3029,7 +3151,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
// Memoize node if it doesn't produce a flag.
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) {
+ if (VT != MVT::Glue) {
SDValue Ops[] = { N1, N2, N3 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
@@ -3045,7 +3167,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -3087,6 +3209,17 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
&ArgChains[0], ArgChains.size());
}
+/// SplatByte - Distribute ByteVal over NumBits bits.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+ APInt Val = APInt(NumBits, ByteVal);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ return Val;
+}
+
/// getMemsetValue - Vectorized representation of the memset value
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@@ -3095,27 +3228,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
unsigned NumBits = VT.getScalarType().getSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
- APInt Val = APInt(NumBits, C->getZExtValue() & 255);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Val = (Val << Shift) | Val;
- Shift <<= 1;
- }
+ APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
if (VT.isInteger())
return DAG.getConstant(Val, VT);
return DAG.getConstantFP(APFloat(Val), VT);
}
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Value = DAG.getNode(ISD::OR, dl, VT,
- DAG.getNode(ISD::SHL, dl, VT, Value,
- DAG.getConstant(Shift,
- TLI.getShiftAmountTy())),
- Value);
- Shift <<= 1;
+ if (NumBits > 8) {
+ // Use a multiplication with 0x010101... to extend the input to the
+ // required length.
+ APInt Magic = SplatByte(NumBits, 0x01);
+ Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
}
return Value;
@@ -3131,13 +3255,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
if (Str.empty()) {
if (VT.isInteger())
return DAG.getConstant(0, VT);
- else if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
- VT.getSimpleVT().SimpleTy == MVT::f64)
+ else if (VT == MVT::f32 || VT == MVT::f64)
return DAG.getConstantFP(0.0, VT);
else if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
EltVT, NumElts)));
} else
@@ -3234,15 +3357,6 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
if (VT.bitsGT(LVT))
VT = LVT;
}
-
- // If we're optimizing for size, and there is a limit, bump the maximum number
- // of operations inserted down to 4. This is a wild guess that approximates
- // the size of a call to memcpy or memset (3 arguments + call).
- if (Limit != ~0U) {
- const Function *F = DAG.getMachineFunction().getFunction();
- if (F->hasFnAttr(Attribute::OptimizeForSize))
- Limit = 4;
- }
unsigned NumMemOps = 0;
while (Size != 0) {
@@ -3276,18 +3390,22 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
SDValue Src, uint64_t Size,
unsigned Align, bool isVol,
bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Turn a memcpy of undef to nop.
if (Src.getOpcode() == ISD::UNDEF)
return Chain;
// Expand memcpy to a series of load and store ops if the size operand falls
// below a certain threshold.
+ // TODO: In the AlwaysInline case, if the size is big then generate a loop
+ // rather than maybe a humongous number of loads and stores.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3297,8 +3415,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
std::string Str;
bool CopyFromStr = isMemSrcFromString(Src, Str);
bool isZeroStr = CopyFromStr && Str.empty();
- unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy();
-
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroStr ? 0 : SrcAlign),
@@ -3334,7 +3452,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, isVol, false, Align);
+ DstPtrInfo.getWithOffset(DstOff), isVol,
+ false, Align);
} else {
// The type might not be legal for the target. This should only happen
// if the type is smaller than a legal type, as on PPC, so the right
@@ -3343,14 +3462,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
// FIXME does the case above also need this?
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
assert(NVT.bitsGE(VT));
- Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain,
+ Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcSVOff + SrcOff, VT, isVol, false,
+ SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
MinAlign(SrcAlign, SrcOff));
Store = DAG.getTruncStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, VT, isVol, false,
- Align);
+ DstPtrInfo.getWithOffset(DstOff), VT, isVol,
+ false, Align);
}
OutChains.push_back(Store);
SrcOff += VTSize;
@@ -3366,8 +3485,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
SDValue Src, uint64_t Size,
unsigned Align, bool isVol,
bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Turn a memmove of undef to nop.
if (Src.getOpcode() == ISD::UNDEF)
return Chain;
@@ -3377,14 +3496,16 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
- unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove();
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
@@ -3414,7 +3535,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Value = DAG.getLoad(VT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign);
+ SrcPtrInfo.getWithOffset(SrcOff), isVol,
+ false, SrcAlign);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -3429,7 +3551,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Store = DAG.getStore(Chain, dl, LoadValues[i],
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, isVol, false, Align);
+ DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -3442,7 +3564,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, uint64_t Size,
unsigned Align, bool isVol,
- const Value *DstSV, uint64_t DstSVOff) {
+ MachinePointerInfo DstPtrInfo) {
// Turn a memset of undef to nop.
if (Src.getOpcode() == ISD::UNDEF)
return Chain;
@@ -3452,13 +3574,15 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool NonScalarIntSafe =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
- if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
+ if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
NonScalarIntSafe, false, DAG, TLI))
return SDValue();
@@ -3477,15 +3601,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SmallVector<SDValue, 8> OutChains;
uint64_t DstOff = 0;
unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
- unsigned VTSize = VT.getSizeInBits() / 8;
- SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, isVol, false, 0);
+ DstPtrInfo.getWithOffset(DstOff),
+ isVol, false, Align);
OutChains.push_back(Store);
- DstOff += VTSize;
+ DstOff += VT.getSizeInBits() / 8;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3495,8 +3638,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol, bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3508,7 +3651,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(),Align,
- isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ isVol, false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -3518,7 +3661,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Result =
TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
isVol, AlwaysInline,
- DstSV, DstSVOff, SrcSV, SrcSVOff);
+ DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
@@ -3528,7 +3671,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
assert(ConstantSize && "AlwaysInline requires a constant size!");
return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Align, isVol,
- true, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ true, DstPtrInfo, SrcPtrInfo);
}
// FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
@@ -3559,8 +3702,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Check to see if we should lower the memmove to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3573,7 +3716,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Result =
getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Align, isVol,
- false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -3582,7 +3725,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
// code. If the target chooses to do this, this is the next best.
SDValue Result =
TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
- DstSV, DstSVOff, SrcSV, SrcSVOff);
+ DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
@@ -3611,7 +3754,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol,
- const Value *DstSV, uint64_t DstSVOff) {
+ MachinePointerInfo DstPtrInfo) {
// Check to see if we should lower the memset to stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3623,7 +3766,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Result =
getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
- Align, isVol, DstSV, DstSVOff);
+ Align, isVol, DstPtrInfo);
if (Result.getNode())
return Result;
@@ -3633,11 +3776,11 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
// code. If the target chooses to do this, this is the next best.
SDValue Result =
TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
- DstSV, DstSVOff);
+ DstPtrInfo);
if (Result.getNode())
return Result;
- // Emit a library call.
+ // Emit a library call.
const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -3669,19 +3812,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
- SDValue Chain,
- SDValue Ptr, SDValue Cmp,
- SDValue Swp, const Value* PtrVal,
+ SDValue Chain, SDValue Ptr, SDValue Cmp,
+ SDValue Swp, MachinePointerInfo PtrInfo,
unsigned Alignment) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
- // Check if the memory reference references a frame index
- if (!PtrVal)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
-
MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
@@ -3689,8 +3825,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrVal, Flags, 0,
- MemVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
}
@@ -3729,12 +3864,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
- // Check if the memory reference references a frame index
- if (!PtrVal)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
-
MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
@@ -3742,7 +3871,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrVal, Flags, 0,
+ MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
MemVT.getStoreSize(), Alignment);
return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
@@ -3785,7 +3914,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
}
/// getMergeValues - Create a MERGE_VALUES node from the given operands.
-/// Allowed to return something different (and simpler) if Simplify is true.
SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
DebugLoc dl) {
if (NumOps == 1)
@@ -3803,18 +3931,18 @@ SDValue
SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
const EVT *VTs, unsigned NumVTs,
const SDValue *Ops, unsigned NumOps,
- EVT MemVT, const Value *srcValue, int SVOff,
+ EVT MemVT, MachinePointerInfo PtrInfo,
unsigned Align, bool Vol,
bool ReadMem, bool WriteMem) {
return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
- MemVT, srcValue, SVOff, Align, Vol,
+ MemVT, PtrInfo, Align, Vol,
ReadMem, WriteMem);
}
SDValue
SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
const SDValue *Ops, unsigned NumOps,
- EVT MemVT, const Value *srcValue, int SVOff,
+ EVT MemVT, MachinePointerInfo PtrInfo,
unsigned Align, bool Vol,
bool ReadMem, bool WriteMem) {
if (Align == 0) // Ensure that codegen never sees alignment 0
@@ -3829,8 +3957,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
if (Vol)
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(srcValue, Flags, SVOff,
- MemVT.getStoreSize(), Align);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
}
@@ -3841,13 +3968,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
EVT MemVT, MachineMemOperand *MMO) {
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::PREFETCH ||
(Opcode <= INT_MAX &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
// Memoize the node unless it returns a flag.
MemIntrinsicSDNode *N;
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
@@ -3867,36 +3995,70 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
return SDValue(N, 0);
}
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+ // If this is FI+Offset, we can model it.
+ if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+ return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+
+ // If this is (FI+Offset1)+Offset2, we can model it.
+ if (Ptr.getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+ !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+ return MachinePointerInfo();
+
+ int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ return MachinePointerInfo::getFixedStack(FI, Offset+
+ cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+ // If the 'Offset' value isn't a constant, we can't handle this.
+ if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+ return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+ if (OffsetOp.getOpcode() == ISD::UNDEF)
+ return InferPointerInfo(Ptr);
+ return MachinePointerInfo();
+}
+
+
SDValue
SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset,
- const Value *SV, int SVOffset, EVT MemVT,
+ MachinePointerInfo PtrInfo, EVT MemVT,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(VT);
- // Check if the memory reference references a frame index
- if (!SV)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
- MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOLoad;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr, Offset);
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(SV, Flags, SVOffset,
- MemVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+ TBAAInfo);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
SDValue
-SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset, EVT MemVT,
MachineMemOperand *MMO) {
@@ -3943,25 +4105,26 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
- const Value *SV, int SVOffset,
+ MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
- SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment);
+ PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
}
-SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl,
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
SDValue Chain, SDValue Ptr,
- const Value *SV,
- int SVOffset, EVT MemVT,
+ MachinePointerInfo PtrInfo, EVT MemVT,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
- SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment);
+ PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+ TBAAInfo);
}
+
SDValue
SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM) {
@@ -3969,33 +4132,32 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
"Load is already a indexed load!");
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
- LD->getChain(), Base, Offset, LD->getSrcValue(),
- LD->getSrcValueOffset(), LD->getMemoryVT(),
+ LD->getChain(), Base, Offset, LD->getPointerInfo(),
+ LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
}
SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
- SDValue Ptr, const Value *SV, int SVOffset,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(Val.getValueType());
- // Check if the memory reference references a frame index
- if (!SV)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
- MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOStore;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(SV, Flags, SVOffset,
- Val.getValueType().getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags,
+ Val.getValueType().getStoreSize(), Alignment,
+ TBAAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
}
@@ -4024,27 +4186,26 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
}
SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
- SDValue Ptr, const Value *SV,
- int SVOffset, EVT SVT,
- bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ EVT SVT,bool isVolatile, bool isNonTemporal,
+ unsigned Alignment,
+ const MDNode *TBAAInfo) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(SVT);
- // Check if the memory reference references a frame index
- if (!SV)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
- MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOStore;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
+ TBAAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}
@@ -4170,7 +4331,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) {
+ if (VT != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
void *IP = 0;
@@ -4186,7 +4347,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -4236,7 +4397,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
// Memoize the node unless it returns a flag.
SDNode *N;
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
@@ -4268,7 +4429,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
}
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -4645,7 +4806,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
unsigned NumOps) {
// If an identical node already exists, use it.
void *IP = 0;
- if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
@@ -4845,9 +5006,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
const SDValue *Ops, unsigned NumOps) {
- bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag;
+ bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
MachineSDNode *N;
- void *IP;
+ void *IP = 0;
if (DoCSE) {
FoldingSetNodeID ID;
@@ -4876,7 +5037,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifyMachineNode(N);
#endif
return N;
}
@@ -4907,7 +5068,7 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
/// else return NULL.
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
const SDValue *Ops, unsigned NumOps) {
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
@@ -5340,6 +5501,29 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
SD->setHasDebugValue(true);
}
+/// TransferDbgValues - Transfer SDDbgValues.
+void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
+ if (From == To || !From.getNode()->getHasDebugValue())
+ return;
+ SDNode *FromNode = From.getNode();
+ SDNode *ToNode = To.getNode();
+ SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode);
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end();
+ I != E; ++I) {
+ SDDbgValue *Dbg = *I;
+ if (Dbg->getKind() == SDDbgValue::SDNODE) {
+ SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
+ Dbg->getOffset(), Dbg->getDebugLoc(),
+ Dbg->getOrder());
+ ClonedDVs.push_back(Clone);
+ }
+ }
+ for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(),
+ E = ClonedDVs.end(); I != E; ++I)
+ AddDbgValue(*I, ToNode, false);
+}
+
//===----------------------------------------------------------------------===//
// SDNode Class
//===----------------------------------------------------------------------===//
@@ -5367,7 +5551,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
}
MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
- const SDValue *Ops, unsigned NumOps, EVT memvt,
+ const SDValue *Ops, unsigned NumOps, EVT memvt,
MachineMemOperand *mmo)
: SDNode(Opc, dl, VTs, Ops, NumOps),
MemoryVT(memvt), MMO(mmo) {
@@ -5386,7 +5570,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {
namespace {
struct EVTArray {
std::vector<EVT> VTs;
-
+
EVTArray() {
VTs.reserve(MVT::LAST_VALUETYPE);
for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
@@ -5406,7 +5590,7 @@ const EVT *SDNode::getValueTypeList(EVT VT) {
sys::SmartScopedLock<true> Lock(*VTMutex);
return &(*EVTs->insert(VT).first);
} else {
- assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+ assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
"Value type out of range!");
return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
}
@@ -5478,9 +5662,9 @@ bool SDNode::isOperandOf(SDNode *N) const {
/// reachesChainWithoutSideEffects - Return true if this operand (which must
/// be a chain) reaches the specified operand without crossing any
-/// side-effecting instructions. In practice, this looks through token
-/// factors and non-volatile loads. In order to remain efficient, this only
-/// looks a couple of nodes in, it does not do an exhaustive search.
+/// side-effecting instructions on any chain path. In practice, this looks
+/// through token factors and non-volatile loads. In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
unsigned Depth) const {
if (*this == Dest) return true;
@@ -5490,12 +5674,12 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
if (Depth == 0) return false;
// If this is a token factor, all inputs to the TF happen in parallel. If any
- // of the operands of the TF reach dest, then we can do the xform.
+ // of the operands of the TF does not reach dest, then we cannot do the xform.
if (getOpcode() == ISD::TokenFactor) {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
- return true;
- return false;
+ if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+ return false;
+ return true;
}
// Loads don't have side effects, look through them.
@@ -5600,6 +5784,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EH_RETURN: return "EH_RETURN";
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
case ISD::ConstantPool: return "ConstantPool";
case ISD::ExternalSymbol: return "ExternalSymbol";
case ISD::BlockAddress: return "BlockAddress";
@@ -5690,6 +5875,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
@@ -5723,7 +5909,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UINT_TO_FP: return "uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
- case ISD::BIT_CONVERT: return "bit_convert";
+ case ISD::BITCAST: return "bit_convert";
case ISD::FP16_TO_FP32: return "fp16_to_fp32";
case ISD::FP32_TO_FP16: return "fp32_to_fp16";
@@ -5935,12 +6121,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << LBB->getName() << " ";
OS << (const void*)BBDN->getBasicBlock() << ">";
} else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- if (G && R->getReg() &&
- TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
- OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg());
- } else {
- OS << " %reg" << R->getReg();
- }
+ OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
} else if (const ExternalSymbolSDNode *ES =
dyn_cast<ExternalSymbolSDNode>(this)) {
OS << "'" << ES->getSymbol() << "'";
@@ -5986,7 +6167,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
const char *AM = getIndexedModeName(ST->getAddressingMode());
if (*AM)
OS << ", " << AM;
-
+
OS << ">";
} else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
OS << "<" << *M->getMemOperand() << ">";
@@ -6037,7 +6218,7 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
const SelectionDAG *G, unsigned depth,
- unsigned indent)
+ unsigned indent)
{
if (depth == 0)
return;
@@ -6058,7 +6239,7 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
unsigned depth) const {
printrWithDepthHelper(OS, this, G, depth, 0);
-}
+}
void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
// Don't print impossibly deep things.
@@ -6072,7 +6253,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
void SDNode::dumprFull(const SelectionDAG *G) const {
// Don't print impossibly deep things.
dumprWithDepth(G, 100);
-}
+}
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
@@ -6156,10 +6337,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
}
-/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
-/// location that is 'Dist' units away from the location that the 'Base' load
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
/// is loading from.
-bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
unsigned Bytes, int Dist) const {
if (LD->getChain() != Base->getChain())
return false;
@@ -6180,11 +6361,11 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
if (FS != BFS || FS != (int)Bytes) return false;
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
- if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
- if (V && (V->getSExtValue() == Dist*Bytes))
- return true;
- }
+
+ // Handle X+C
+ if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+ cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ return true;
const GlobalValue *GV1 = NULL;
const GlobalValue *GV2 = NULL;
@@ -6225,15 +6406,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
int64_t FrameOffset = 0;
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
FrameIdx = FI->getIndex();
- } else if (Ptr.getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ } else if (isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ // Handle FI+Cst
FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
FrameOffset = Ptr.getConstantOperandVal(1);
}
if (FrameIdx != (1 << 31)) {
- // FIXME: Handle FI+CST.
const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
FrameOffset);
@@ -6354,7 +6534,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
if (OpVal.getOpcode() == ISD::UNDEF)
SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
- SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+ SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
zextOrTrunc(sz) << BitPos;
else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
@@ -6369,10 +6549,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
while (sz > 8) {
unsigned HalfSize = sz / 2;
- APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize);
- APInt LowValue = APInt(SplatValue).trunc(HalfSize);
- APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize);
- APInt LowUndef = APInt(SplatUndef).trunc(HalfSize);
+ APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = SplatValue.trunc(HalfSize);
+ APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+ APInt LowUndef = SplatUndef.trunc(HalfSize);
// If the two halves do not match (ignoring undef bits), stop here.
if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
@@ -6412,7 +6592,7 @@ static void checkForCyclesHelper(const SDNode *N,
// If this node has already been checked, don't check it again.
if (Checked.count(N))
return;
-
+
// If a node has already been visited on this depth-first walk, reject it as
// a cycle.
if (!Visited.insert(N)) {
@@ -6421,10 +6601,10 @@ static void checkForCyclesHelper(const SDNode *N,
errs() << "Detected cycle in SelectionDAG\n";
abort();
}
-
+
for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
-
+
Checked.insert(N);
Visited.erase(N);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e657445..452f561 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -15,6 +15,7 @@
#include "SDNodeDbgValue.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -43,9 +44,8 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -70,10 +70,28 @@ LimitFPPrecision("limit-float-precision",
cl::location(LimitFloatPrecision),
cl::init(0));
+// Limit the width of DAG chains. This is important in general to prevent
+// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach, and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static cl::opt<unsigned>
+MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"),
+ cl::init(64), cl::Hidden);
+
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts, unsigned NumParts,
EVT PartVT, EVT ValueVT);
-
+
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
/// larger then ValueVT then AssertOp can be used to specify whether the extra
@@ -85,7 +103,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
ISD::NodeType AssertOp = ISD::DELETED_NODE) {
if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
-
+
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
@@ -112,8 +130,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
RoundParts / 2, PartVT, HalfVT);
} else {
- Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]);
- Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]);
+ Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
}
if (TLI.isBigEndian())
@@ -145,8 +163,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
"Unexpected split");
SDValue Lo, Hi;
- Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]);
- Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]);
+ Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
@@ -188,7 +206,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
}
if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
- return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
llvm_unreachable("Unknown mismatch!");
return SDValue();
@@ -206,7 +224,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
-
+
// Handle a multi-element vector.
if (NumParts > 1) {
EVT IntermediateVT, RegisterVT;
@@ -219,7 +237,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
assert(RegisterVT == Parts[0].getValueType() &&
"Part type doesn't match part!");
-
+
// Assemble the parts into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
if (NumIntermediates == NumParts) {
@@ -238,20 +256,20 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
PartVT, IntermediateVT);
}
-
+
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
// intermediate operands.
Val = DAG.getNode(IntermediateVT.isVector() ?
ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
ValueVT, &Ops[0], NumIntermediates);
}
-
+
// There is now one part, held in Val. Correct it to match ValueVT.
PartVT = Val.getValueType();
-
+
if (PartVT == ValueVT)
return Val;
-
+
if (PartVT.isVector()) {
// If the element type of the source/dest vectors are the same, but the
// parts vector has more elements than the value vector, then we have a
@@ -262,12 +280,12 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
"Cannot narrow, it would be a lossy transformation");
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
DAG.getIntPtrConstant(0));
- }
-
+ }
+
// Vector/Vector bitcast.
- return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
}
-
+
assert(ValueVT.getVectorElementType() == PartVT &&
ValueVT.getVectorNumElements() == 1 &&
"Only trivial scalar-to-vector conversions should get here!");
@@ -280,7 +298,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
EVT PartVT);
-
+
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
@@ -289,11 +307,11 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
EVT PartVT,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
EVT ValueVT = Val.getValueType();
-
+
// Handle the vector case separately.
if (ValueVT.isVector())
return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
-
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
@@ -316,14 +334,14 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
assert(PartVT.isInteger() && ValueVT.isInteger() &&
- "Unknown mismatch!");
+ "Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
}
} else if (PartBits == ValueVT.getSizeInBits()) {
// Different types of the same size.
assert(NumParts == 1 && PartVT != ValueVT);
- Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
// If the parts cover less bits than value has, truncate the value.
assert(PartVT.isInteger() && ValueVT.isInteger() &&
@@ -366,7 +384,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
// The number of parts is a power of 2. Repeatedly bisect the value using
// EXTRACT_ELEMENT.
- Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL,
+ Parts[0] = DAG.getNode(ISD::BITCAST, DL,
EVT::getIntegerVT(*DAG.getContext(),
ValueVT.getSizeInBits()),
Val);
@@ -384,8 +402,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
ThisVT, Part0, DAG.getIntPtrConstant(0));
if (ThisBits == PartBits && ThisVT != PartVT) {
- Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0);
- Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1);
+ Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
}
}
}
@@ -403,13 +421,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
+
if (NumParts == 1) {
if (PartVT == ValueVT) {
// Nothing to do.
} else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
// Bitconvert vector->vector case.
- Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (PartVT.isVector() &&
PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
@@ -420,7 +438,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
ElementVT, Val, DAG.getIntPtrConstant(i)));
-
+
for (unsigned i = ValueVT.getVectorNumElements(),
e = PartVT.getVectorNumElements(); i != e; ++i)
Ops.push_back(DAG.getUNDEF(ElementVT));
@@ -428,7 +446,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
// FIXME: Use CONCAT for 2x -> 4x.
-
+
//SDValue UndefElts = DAG.getUNDEF(VectorTy);
//Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
} else {
@@ -439,11 +457,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
PartVT, Val, DAG.getIntPtrConstant(0));
}
-
+
Parts[0] = Val;
return;
}
-
+
// Handle a multi-element vector.
EVT IntermediateVT, RegisterVT;
unsigned NumIntermediates;
@@ -451,11 +469,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
IntermediateVT,
NumIntermediates, RegisterVT);
unsigned NumElements = ValueVT.getVectorNumElements();
-
+
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
-
+
// Split the vector into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
@@ -467,7 +485,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
IntermediateVT, Val, DAG.getIntPtrConstant(i));
}
-
+
// Split the intermediate operands into legal parts.
if (NumParts == NumIntermediates) {
// If the register was not expanded, promote or copy the value,
@@ -618,48 +636,49 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
}
Chain = P.getValue(1);
+ Parts[i] = P;
// If the source register was virtual and if we know something about it,
// add an assert node.
- if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
- RegisterVT.isInteger() && !RegisterVT.isVector()) {
- unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
- if (FuncInfo.LiveOutRegInfo.size() > SlotNo) {
- const FunctionLoweringInfo::LiveOutInfo &LOI =
- FuncInfo.LiveOutRegInfo[SlotNo];
-
- unsigned RegSize = RegisterVT.getSizeInBits();
- unsigned NumSignBits = LOI.NumSignBits;
- unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
-
- // FIXME: We capture more information than the dag can represent. For
- // now, just use the tightest assertzext/assertsext possible.
- bool isSExt = true;
- EVT FromVT(MVT::Other);
- if (NumSignBits == RegSize)
- isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
- else if (NumZeroBits >= RegSize-1)
- isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
- else if (NumSignBits > RegSize-8)
- isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
- else if (NumZeroBits >= RegSize-8)
- isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
- else if (NumSignBits > RegSize-16)
- isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
- else if (NumZeroBits >= RegSize-16)
- isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
- else if (NumSignBits > RegSize-32)
- isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
- else if (NumZeroBits >= RegSize-32)
- isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
-
- if (FromVT != MVT::Other)
- P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
- RegisterVT, P, DAG.getValueType(FromVT));
- }
- }
+ if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+ !RegisterVT.isInteger() || RegisterVT.isVector() ||
+ !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i]))
+ continue;
+
+ const FunctionLoweringInfo::LiveOutInfo &LOI =
+ FuncInfo.LiveOutRegInfo[Regs[Part+i]];
+
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI.NumSignBits;
+ unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ EVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize)
+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
+ else if (NumZeroBits >= RegSize-1)
+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
+ else if (NumSignBits > RegSize-8)
+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
+ else if (NumZeroBits >= RegSize-8)
+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
+ else if (NumSignBits > RegSize-16)
+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
+ else if (NumZeroBits >= RegSize-16)
+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+ else if (NumSignBits > RegSize-32)
+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
+ else if (NumZeroBits >= RegSize-32)
+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+ else
+ continue;
- Parts[i] = P;
+ // Add an assertion node.
+ assert(FromVT != MVT::Other);
+ Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
}
Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
@@ -889,11 +908,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
Val.getResNo(), Offset, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
- } else {
- SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
- Offset, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
- }
+ } else
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
DanglingDebugInfoMap[V] = DanglingDebugInfo();
}
}
@@ -913,7 +929,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
SDValue Chain = DAG.getEntryNode();
- return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+ N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+ resolveDanglingDebugInfo(V, N);
+ return N;
}
// Otherwise create a new SDValue and remember it.
@@ -1088,7 +1106,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
Chains[i] =
DAG.getStore(Chain, getCurDebugLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + i),
- Add, NULL, Offsets[i], false, false, 0);
+ // FIXME: better loc info would be nice.
+ Add, MachinePointerInfo(), false, false, 0);
}
Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
@@ -1347,7 +1366,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
return false;
}
-
+
return true;
}
@@ -1383,6 +1402,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If this is a series of conditions that are or'd or and'd together, emit
// this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive, this should improve performance.
// For example, instead of something like:
// cmp A, B
// C = seteq
@@ -1397,7 +1417,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// jle foo
//
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
- if (BOp->hasOneUse() &&
+ if (!TLI.isJumpExpensive() &&
+ BOp->hasOneUse() &&
(BOp->getOpcode() == Instruction::And ||
BOp->getOpcode() == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
@@ -1502,10 +1523,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
MVT::Other, getControlRoot(), Cond,
DAG.getBasicBlock(CB.TrueBB));
- // Insert the false branch.
- if (CB.FalseBB != NextBlock)
- BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
- DAG.getBasicBlock(CB.FalseBB));
+ // Insert the false branch. Do this even if it's a fall through branch,
+ // this makes it easier to do DAG optimizations which require inverting
+ // the branch condition.
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
DAG.setRoot(BrCond);
}
@@ -1592,12 +1614,28 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
Sub, DAG.getConstant(B.Range, VT),
ISD::SETUGT);
- SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
- TLI.getPointerTy());
+ // Determine the type of the test operands.
+ bool UsePtrType = false;
+ if (!TLI.isTypeLegal(VT))
+ UsePtrType = true;
+ else {
+ for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+ if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ UsePtrType = true;
+ break;
+ }
+ }
+ if (UsePtrType) {
+ VT = TLI.getPointerTy();
+ Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
+ }
- B.Reg = FuncInfo.CreateReg(TLI.getPointerTy());
+ B.RegVT = VT;
+ B.Reg = FuncInfo.CreateReg(VT);
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
- B.Reg, ShiftOp);
+ B.Reg, Sub);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1623,36 +1661,34 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
}
/// visitBitTestCase - this function produces one "bit test"
-void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
- SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
- TLI.getPointerTy());
+ EVT VT = BB.RegVT;
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ Reg, VT);
SDValue Cmp;
if (CountPopulation_64(B.Mask) == 1) {
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
Cmp = DAG.getSetCC(getCurDebugLoc(),
- TLI.getSetCCResultType(ShiftOp.getValueType()),
+ TLI.getSetCCResultType(VT),
ShiftOp,
- DAG.getConstant(CountTrailingZeros_64(B.Mask),
- TLI.getPointerTy()),
+ DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
ISD::SETEQ);
} else {
// Make desired shift
- SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
- TLI.getPointerTy(),
- DAG.getConstant(1, TLI.getPointerTy()),
- ShiftOp);
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
+ DAG.getConstant(1, VT), ShiftOp);
// Emit bit tests and jumps
SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
- TLI.getPointerTy(), SwitchVal,
- DAG.getConstant(B.Mask, TLI.getPointerTy()));
+ VT, SwitchVal, DAG.getConstant(B.Mask, VT));
Cmp = DAG.getSetCC(getCurDebugLoc(),
- TLI.getSetCCResultType(AndOp.getValueType()),
- AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+ TLI.getSetCCResultType(VT),
+ AndOp, DAG.getConstant(0, VT),
ISD::SETNE);
}
@@ -1732,10 +1768,56 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
if (++BBI != FuncInfo.MF->end())
NextBlock = BBI;
- // TODO: If any two of the cases has the same destination, and if one value
+ // If any two of the cases has the same destination, and if one value
// is the same as the other, but has one bit unset that the other has set,
// use bit manipulation to do two compares at once. For example:
// "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+ // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
+ // TODO: Handle cases where CR.CaseBB != SwitchBB.
+ if (Size == 2 && CR.CaseBB == SwitchBB) {
+ Case &Small = *CR.Range.first;
+ Case &Big = *(CR.Range.second-1);
+
+ if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
+ const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
+ const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+
+ // Check that there is only one bit different.
+ if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+ (SmallValue | BigValue) == BigValue) {
+ // Isolate the common bit.
+ APInt CommonBit = BigValue & ~SmallValue;
+ assert((SmallValue | CommonBit) == BigValue &&
+ CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+ SDValue CondLHS = getValue(SV);
+ EVT VT = CondLHS.getValueType();
+ DebugLoc DL = getCurDebugLoc();
+
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+ DAG.getConstant(CommonBit, VT));
+ SDValue Cond = DAG.getSetCC(DL, MVT::i1,
+ Or, DAG.getConstant(BigValue, VT),
+ ISD::SETEQ);
+
+ // Update successor info.
+ SwitchBB->addSuccessor(Small.BB);
+ SwitchBB->addSuccessor(Default);
+
+ // Insert the true branch.
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+ getControlRoot(), Cond,
+ DAG.getBasicBlock(Small.BB));
+
+ // Insert the false branch.
+ BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+ DAG.getBasicBlock(Default));
+
+ DAG.setRoot(BrCond);
+ return true;
+ }
+ }
+ }
// Rearrange the case blocks so that the last one falls through if possible.
if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
@@ -1800,9 +1882,8 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) {
}
static APInt ComputeRange(const APInt &First, const APInt &Last) {
- APInt LastExt(Last), FirstExt(First);
uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
- LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
+ APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
return (LastExt - FirstExt + 1ULL);
}
@@ -2151,7 +2232,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
}
BitTestBlock BTB(lowBound, cmpRange, SV,
- -1U, (CR.CaseBB == SwitchBB),
+ -1U, MVT::Other, (CR.CaseBB == SwitchBB),
CR.CaseBB, Default, BTC);
if (CR.CaseBB == SwitchBB)
@@ -2180,7 +2261,8 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
if (Cases.size() >= 2)
// Must recompute end() each iteration because it may be
// invalidated by erase if we hold on to it
- for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+ for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+ J != Cases.end(); ) {
const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
MachineBasicBlock* nextBB = J->BB;
@@ -2205,6 +2287,19 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
return numCmps;
}
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+ MachineBasicBlock *Last) {
+ // Update JTCases.
+ for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+ if (JTCases[i].first.HeaderBB == First)
+ JTCases[i].first.HeaderBB = Last;
+
+ // Update BitTestCases.
+ for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+ if (BitTestCases[i].Parent == First)
+ BitTestCases[i].Parent = Last;
+}
+
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
@@ -2292,30 +2387,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
void SelectionDAGBuilder::visitFSub(const User &I) {
// -0.0 - X --> fneg
const Type *Ty = I.getType();
- if (Ty->isVectorTy()) {
- if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
- const VectorType *DestTy = cast<VectorType>(I.getType());
- const Type *ElTy = DestTy->getElementType();
- unsigned VL = DestTy->getNumElements();
- std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
- Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
- if (CV == CNZ) {
- SDValue Op2 = getValue(I.getOperand(1));
- setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
- Op2.getValueType(), Op2));
- return;
- }
- }
+ if (isa<Constant>(I.getOperand(0)) &&
+ I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
}
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
- if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
- SDValue Op2 = getValue(I.getOperand(1));
- setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
- Op2.getValueType(), Op2));
- return;
- }
-
visitBinary(I, ISD::FSUB);
}
@@ -2329,31 +2408,29 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- if (!I.getType()->isVectorTy() &&
- Op2.getValueType() != TLI.getShiftAmountTy()) {
+
+ MVT ShiftTy = TLI.getShiftAmountTy();
+
+ // Coerce the shift amount to the right type if we can.
+ if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+ unsigned ShiftSize = ShiftTy.getSizeInBits();
+ unsigned Op2Size = Op2.getValueType().getSizeInBits();
+ DebugLoc DL = getCurDebugLoc();
+
// If the operand is smaller than the shift count type, promote it.
- EVT PTy = TLI.getPointerTy();
- EVT STy = TLI.getShiftAmountTy();
- if (STy.bitsGT(Op2.getValueType()))
- Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
- TLI.getShiftAmountTy(), Op2);
+ if (ShiftSize > Op2Size)
+ Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
// If the operand is larger than the shift count type but the shift
// count type has enough bits to represent any shift value, truncate
// it now. This is a common case and it exposes the truncate to
// optimization early.
- else if (STy.getSizeInBits() >=
- Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
- Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
- TLI.getShiftAmountTy(), Op2);
- // Otherwise we'll need to temporarily settle for some other
- // convenient type; type legalization will make adjustments as
- // needed.
- else if (PTy.bitsLT(Op2.getValueType()))
- Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
- TLI.getPointerTy(), Op2);
- else if (PTy.bitsGT(Op2.getValueType()))
- Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
- TLI.getPointerTy(), Op2);
+ else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+ // Otherwise we'll need to temporarily settle for some other convenient
+ // type. Type legalization will make adjustments once the shiftee is split.
+ else
+ Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
}
setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
@@ -2499,9 +2576,9 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
EVT DestVT = TLI.getValueType(I.getType());
// BitCast assures us that source and destination are the same size so this is
- // either a BIT_CONVERT or a no-op.
+ // either a BITCAST or a no-op.
if (DestVT != N.getValueType())
- setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
DestVT, N)); // convert types.
else
setValue(&I, N); // noop cast.
@@ -2650,7 +2727,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
} else {
StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
- StartIdx[Input] + MaskNumElts < SrcNumElts)
+ StartIdx[Input] + MaskNumElts <= SrcNumElts)
RangeUse[Input] = 1; // Extract from a multiple of the mask length.
}
}
@@ -2726,8 +2803,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
bool IntoUndef = isa<UndefValue>(Op0);
bool FromUndef = isa<UndefValue>(Op1);
- unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
- I.idx_begin(), I.idx_end());
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
SmallVector<EVT, 4> AggValueVTs;
ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -2765,8 +2841,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
const Type *ValTy = I.getType();
bool OutOfUndef = isa<UndefValue>(Op0);
- unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
- I.idx_begin(), I.idx_end());
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
SmallVector<EVT, 4> ValValueVTs;
ComputeValueVTs(TLI, ValTy, ValValueVTs);
@@ -2884,7 +2959,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
- unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
if (Align <= StackAlign)
Align = 0;
@@ -2920,6 +2995,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata("nontemporal") != 0;
unsigned Alignment = I.getAlignment();
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
@@ -2930,10 +3006,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue Root;
bool ConstantMemory = false;
- if (I.isVolatile())
+ if (I.isVolatile() || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
- else if (AA->pointsToConstantMemory(SV)) {
+ else if (AA->pointsToConstantMemory(
+ AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -2943,23 +3020,38 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
}
SmallVector<SDValue, 4> Values(NumValues);
- SmallVector<SDValue, 4> Chains(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
EVT PtrVT = Ptr.getValueType();
- for (unsigned i = 0; i != NumValues; ++i) {
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // Serializing loads here may result in excessive register pressure, and
+ // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+ // could recover a bit by hoisting nodes upward in the chain by recognizing
+ // they are side-effect free or do not alias. The optimizer should really
+ // avoid this case by converting large object/array copies to llvm.memcpy
+ // (MaxParallelChains should always remain as failsafe).
+ if (ChainI == MaxParallelChains) {
+ assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
PtrVT, Ptr,
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
- A, SV, Offsets[i], isVolatile,
- isNonTemporal, Alignment);
+ A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
+ isNonTemporal, Alignment, TBAAInfo);
Values[i] = L;
- Chains[i] = L.getValue(1);
+ Chains[ChainI] = L.getValue(1);
}
if (!ConstantMemory) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
- MVT::Other, &Chains[0], NumValues);
+ MVT::Other, &Chains[0], ChainI);
if (isVolatile)
DAG.setRoot(Chain);
else
@@ -2989,23 +3081,37 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SDValue Ptr = getValue(PtrV);
SDValue Root = getRoot();
- SmallVector<SDValue, 4> Chains(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
EVT PtrVT = Ptr.getValueType();
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata("nontemporal") != 0;
unsigned Alignment = I.getAlignment();
-
- for (unsigned i = 0; i != NumValues; ++i) {
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // See visitLoad comments.
+ if (ChainI == MaxParallelChains) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
DAG.getConstant(Offsets[i], PtrVT));
- Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
- SDValue(Src.getNode(), Src.getResNo() + i),
- Add, PtrV, Offsets[i], isVolatile,
- isNonTemporal, Alignment);
- }
-
- DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
- MVT::Other, &Chains[0], NumValues));
+ SDValue St = DAG.getStore(Root, getCurDebugLoc(),
+ SDValue(Src.getNode(), Src.getResNo() + i),
+ Add, MachinePointerInfo(PtrV, Offsets[i]),
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ Chains[ChainI] = St;
+ }
+
+ SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ ++SDNodeOrder;
+ AssignOrderingToNode(StoreNode.getNode());
+ DAG.setRoot(StoreNode);
}
/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
@@ -3031,7 +3137,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
- if (!IsTgtIntrinsic)
+ if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+ Info.opc == ISD::INTRINSIC_W_CHAIN)
Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
// Add all operands of the call to the operand list.
@@ -3062,7 +3169,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// This is target intrinsic that touches memory
Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
VTs, &Ops[0], Ops.size(),
- Info.memVT, Info.ptrVal, Info.offset,
+ Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
Info.align, Info.vol,
Info.readMem, Info.writeMem);
} else if (!HasChain) {
@@ -3087,7 +3195,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
if (!I.getType()->isVoidTy()) {
if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
EVT VT = TLI.getValueType(PTy);
- Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
+ Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
}
setValue(&I, Result);
@@ -3106,7 +3214,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
DAG.getConstant(0x007fffff, MVT::i32));
SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
DAG.getConstant(0x3f800000, MVT::i32));
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
}
/// GetExponent - Get the exponent:
@@ -3205,13 +3313,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
// Add the exponent into the result in integer domain.
SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
TwoToFracPartOfX, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
} else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
@@ -3231,13 +3339,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
// Add the exponent into the result in integer domain.
SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
TwoToFracPartOfX, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
} else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
@@ -3269,14 +3377,14 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
MVT::i32, t13);
// Add the exponent into the result in integer domain.
SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
TwoToFracPartOfX, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
}
} else {
// No special expansion.
@@ -3298,7 +3406,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op = getValue(I.getArgOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log(2) [0.69314718f].
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3408,7 +3516,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op = getValue(I.getArgOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Get the exponent.
SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
@@ -3517,7 +3625,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op = getValue(I.getArgOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log10(2) [0.30102999f].
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3645,11 +3753,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e));
- SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
@@ -3670,11 +3778,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd));
- SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
// For floating-point precision of 18:
@@ -3706,11 +3814,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000));
- SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
}
} else {
@@ -3778,11 +3886,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e));
- SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
@@ -3803,11 +3911,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd));
- SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
// For floating-point precision of 18:
@@ -3839,11 +3947,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000));
- SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
}
} else {
@@ -3915,13 +4023,16 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
/// At the end of instruction selection, they will be inserted to the entry BB.
bool
SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
- int64_t Offset,
+ int64_t Offset,
const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+
// Ignore inlined function arguments here.
DIVariable DV(Variable);
if (DV.isInlinedFnArgument(MF.getFunction()))
@@ -3935,14 +4046,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
if (Arg->hasByValAttr()) {
// Byval arguments' frame index is recorded during argument lowering.
// Use this info directly.
- const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
Reg = TRI->getFrameRegister(MF);
Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+ // If byval argument ofset is not recorded then ignore this.
+ if (!Offset)
+ Reg = 0;
}
if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {
Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
- if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
unsigned PR = RegInfo.getLiveInPhysReg(Reg);
if (PR)
@@ -3951,13 +4064,25 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
}
if (!Reg) {
+ // Check if ValueMap has reg number.
DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
- if (VMI == FuncInfo.ValueMap.end())
- return false;
- Reg = VMI->second;
+ if (VMI != FuncInfo.ValueMap.end())
+ Reg = VMI->second;
}
- const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+ if (!Reg && N.getNode()) {
+ // Check if frame index is available.
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+ if (FrameIndexSDNode *FINode =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
+ Reg = TRI->getFrameRegister(MF);
+ Offset = FINode->getIndex();
+ }
+ }
+
+ if (!Reg)
+ return false;
+
MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
TII->get(TargetOpcode::DBG_VALUE))
.addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
@@ -3966,9 +4091,11 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
}
// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
#endif
/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
@@ -4013,7 +4140,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
- I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
return 0;
}
case Intrinsic::memset: {
@@ -4028,7 +4156,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- I.getArgOperand(0), 0));
+ MachinePointerInfo(I.getArgOperand(0))));
return 0;
}
case Intrinsic::memmove: {
@@ -4044,22 +4172,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Op3 = getValue(I.getArgOperand(2));
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-
- // If the source and destination are known to not be aliases, we can
- // lower memmove as memcpy.
- uint64_t Size = -1ULL;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
- Size = C->getZExtValue();
- if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
- AliasAnalysis::NoAlias) {
- DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- false, I.getArgOperand(0), 0,
- I.getArgOperand(1), 0));
- return 0;
- }
-
DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
return 0;
}
case Intrinsic::dbg_declare: {
@@ -4078,10 +4193,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Check if address has undef value.
if (isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
- SDDbgValue*SDV =
- DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
- 0, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return 0;
}
@@ -4092,7 +4204,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDDbgValue *SDV;
if (N.getNode()) {
// Parameters are handled specially.
- bool isParameter =
+ bool isParameter =
DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
@@ -4104,25 +4216,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Byval parameter. We have a frame index at this point.
SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
0, dl, SDNodeOrder);
- else
+ else {
// Can't do anything with other non-AI cases yet. This might be a
// parameter of a callee function that got inlined, for example.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return 0;
+ }
} else if (AI)
SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
0, dl, SDNodeOrder);
- else
+ else {
// Can't do anything with other non-AI cases yet.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return 0;
+ }
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
} else {
- // If Address is an arugment then try to emits its dbg value using
- // virtual register info from the FuncInfo.ValueMap. Otherwise add undef
- // to help track missing debug info.
+ // If Address is an argument then try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
- SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
- 0, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
+ // If variable is pinned by a alloca in dominating bb then
+ // use StaticAllocaMap.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ if (AI->getParent() != DI.getParent()) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ SDV = DAG.getDbgValue(Variable, SI->second,
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ return 0;
+ }
+ }
+ }
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
}
}
return 0;
@@ -4160,17 +4287,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
N.getResNo(), Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
- } else if (isa<PHINode>(V) && !V->use_empty() ) {
+ } else if (!V->use_empty() ) {
// Do not call getValue(V) yet, as we don't want to generate code.
// Remember it for later.
DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
DanglingDebugInfoMap[V] = DDI;
} else {
// We may expand this to cover more cases. One case where we have no
- // data available is an unreferenced parameter; we need this fallback.
- SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
- Offset, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
+ // data available is an unreferenced parameter.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
}
}
@@ -4186,7 +4311,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (SI == FuncInfo.StaticAllocaMap.end())
return 0; // VLAs.
int FI = SI->second;
-
+
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
@@ -4282,11 +4407,75 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_sjlj_longjmp: {
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
- getRoot(),
- getValue(I.getArgOperand(0))));
+ getRoot(), getValue(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_dispatch_setup: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+ getRoot(), getValue(I.getArgOperand(0))));
return 0;
}
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = getValue(I.getArgOperand(1));
+ if (isa<ConstantSDNode>(ShAmt)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ }
+ unsigned NewIntrinsic = 0;
+ EVT ShAmtVT = MVT::v2i32;
+ switch (Intrinsic) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ }
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+ // to be zero.
+ // We must do this early because v2i32 is not a legal type.
+ DebugLoc dl = getCurDebugLoc();
+ SDValue ShOps[2];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+ EVT DestVT = TLI.getValueType(I.getType());
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
+ Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(NewIntrinsic, MVT::i32),
+ getValue(I.getArgOperand(0)), ShAmt);
+ setValue(&I, Res);
+ return 0;
+ }
case Intrinsic::convertff:
case Intrinsic::convertfsi:
case Intrinsic::convertfui:
@@ -4430,8 +4619,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Store the stack protector onto the stack.
Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
- PseudoSourceValue::getFixedStack(FI),
- 0, true, false, 0);
+ MachinePointerInfo::getFixedStack(FI),
+ true, false, 0);
setValue(&I, Res);
DAG.setRoot(Res);
return 0;
@@ -4510,14 +4699,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::prefetch: {
SDValue Ops[4];
+ unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
- DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
+ DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
+ DAG.getVTList(MVT::Other),
+ &Ops[0], 4,
+ EVT::getIntegerVT(*Context, 8),
+ MachinePointerInfo(I.getArgOperand(0)),
+ 0, /* align */
+ false, /* volatile */
+ rw==0, /* read */
+ rw==1)); /* write */
return 0;
}
-
case Intrinsic::memory_barrier: {
SDValue Ops[6];
Ops[0] = getRoot();
@@ -4536,7 +4733,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2)),
- I.getArgOperand(0));
+ MachinePointerInfo(I.getArgOperand(0)));
setValue(&I, L);
DAG.setRoot(L.getValue(1));
return 0;
@@ -4599,6 +4796,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
FTy->isVarArg(), Outs, FTy->getContext());
SDValue DemoteStackSlot;
+ int DemoteStackIdx = -100;
if (!CanLowerReturn) {
uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
@@ -4606,10 +4804,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(
FTy->getReturnType());
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
- DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;
Entry.isSExt = false;
@@ -4703,7 +4901,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
DemoteStackSlot,
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
- Add, NULL, Offsets[i], false, false, 1);
+ Add,
+ MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
+ false, false, 1);
Values[i] = L;
Chains[i] = L.getValue(1);
}
@@ -4711,7 +4911,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
MVT::Other, &Chains[0], NumValues);
PendingLoads.push_back(Chain);
-
+
// Collect the legal value parts into potentially illegal values
// that correspond to the original function's return values.
SmallVector<EVT, 4> RetTys;
@@ -4724,7 +4924,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
EVT VT = RetTys[I];
EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
-
+
SDValue ReturnValue =
getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
RegisterVT, VT, AssertOp);
@@ -4806,7 +5006,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
SDValue Ptr = Builder.getValue(PtrVal);
SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
- Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
+ Ptr, MachinePointerInfo(PtrVal),
false /*volatile*/,
false /*nontemporal*/, 1 /* align=1 */);
@@ -4902,7 +5102,25 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
visitInlineAsm(&I);
return;
}
-
+
+ // See if any floating point values are being passed to this function. This is
+ // used to emit an undefined reference to fltused on Windows.
+ const FunctionType *FT =
+ cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ if (FT->isVarArg() &&
+ !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ const Type* T = I.getArgOperand(i)->getType();
+ for (po_iterator<const Type*> i = po_begin(T), e = po_end(T);
+ i != e; ++i) {
+ if (!i->isFloatingPointTy()) continue;
+ MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
+ break;
+ }
+ }
+ }
+
const char *RenameFn = 0;
if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
@@ -4980,7 +5198,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
}
}
}
-
+
SDValue Callee;
if (!RenameFn)
Callee = getValue(I.getCalledValue());
@@ -5008,7 +5226,7 @@ public:
/// contains the set of register corresponding to the operand.
RegsForValue AssignedRegs;
- explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+ explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
: TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
}
@@ -5083,6 +5301,8 @@ private:
}
};
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+
} // end llvm namespace.
/// isAllocatableRegister - If the specified register is safe to allocate,
@@ -5192,7 +5412,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
// vector types).
EVT RegVT = *PhysReg.second->vt_begin();
if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
- OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
@@ -5202,7 +5422,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
// machine.
RegVT = EVT::getIntegerVT(Context,
OpInfo.ConstraintVT.getSizeInBits());
- OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
}
@@ -5320,30 +5540,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
/// ConstraintOperands - Information about all of the constraints.
- std::vector<SDISelAsmOperandInfo> ConstraintOperands;
+ SDISelAsmOperandInfoVector ConstraintOperands;
std::set<unsigned> OutputRegs, InputRegs;
- // Do a prepass over the constraints, canonicalizing them, and building up the
- // ConstraintOperands list.
- std::vector<InlineAsm::ConstraintInfo>
- ConstraintInfos = IA->ParseConstraints();
-
- bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
-
- SDValue Chain, Flag;
-
- // We won't need to flush pending loads if this asm doesn't touch
- // memory and is nonvolatile.
- if (hasMemory || IA->hasSideEffects())
- Chain = getRoot();
- else
- Chain = DAG.getRoot();
+ TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS);
+ bool hasMemory = false;
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
- for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
- ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
EVT OpVT = MVT::Other;
@@ -5380,9 +5587,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this is an input or an indirect output, process the call argument.
// BasicBlocks are labels, currently appearing only in asm's.
if (OpInfo.CallOperandVal) {
- // Strip bitcasts, if any. This mostly comes up for functions.
- OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
-
if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
} else {
@@ -5393,11 +5597,33 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
OpInfo.ConstraintVT = OpVT;
+
+ // Indirect operand accesses access memory.
+ if (OpInfo.isIndirect)
+ hasMemory = true;
+ else {
+ for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]);
+ if (CType == TargetLowering::C_Memory) {
+ hasMemory = true;
+ break;
+ }
+ }
+ }
}
+ SDValue Chain, Flag;
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ if (hasMemory || IA->hasSideEffects())
+ Chain = getRoot();
+ else
+ Chain = DAG.getRoot();
+
// Second pass over the constraints: compute which constraint option to use
// and assign registers to constraints that want a specific physreg.
- for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
// If this is an output operand with a matching input operand, look up the
@@ -5406,7 +5632,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// error.
if (OpInfo.hasMatchingInput()) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
-
+
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
@@ -5427,7 +5653,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// need to to provide an address for the memory input.
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
!OpInfo.isIndirect) {
- assert(OpInfo.Type == InlineAsm::isInput &&
+ assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) &&
"Can only indirectify direct input operands!");
// Memory operands really want the address of the value. If we don't have
@@ -5451,7 +5677,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
Chain = DAG.getStore(Chain, getCurDebugLoc(),
- OpInfo.CallOperand, StackSlot, NULL, 0,
+ OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
OpInfo.CallOperand = StackSlot;
}
@@ -5469,8 +5696,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
}
- ConstraintInfos.clear();
-
// Second pass - Loop over all of the operands, assigning virtual or physregs
// to register class operands.
for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
@@ -5495,9 +5720,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
- // Remember the AlignStack bit as operand 3.
- AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0,
- MVT::i1));
+ // Remember the HasSideEffect and AlignStack bits as operand 3.
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+ TLI.getPointerTy()));
// Loop over all of the inputs, copying the operand values into the
// appropriate registers and processing the output regs.
@@ -5588,7 +5818,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
" don't know how to handle tied "
"indirect register inputs");
}
-
+
RegsForValue MatchedRegs;
MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
@@ -5607,7 +5837,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
DAG, AsmNodeOperands);
break;
}
-
+
assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
"Unexpected number of operands");
@@ -5622,8 +5852,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Treat indirect 'X' constraint as memory.
- if (OpInfo.ConstraintType == TargetLowering::C_Other &&
- OpInfo.isIndirect)
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect)
OpInfo.ConstraintType = TargetLowering::C_Memory;
if (OpInfo.ConstraintType == TargetLowering::C_Other) {
@@ -5642,7 +5872,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
break;
}
-
+
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
@@ -5693,7 +5923,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
- DAG.getVTList(MVT::Other, MVT::Flag),
+ DAG.getVTList(MVT::Other, MVT::Glue),
&AsmNodeOperands[0], AsmNodeOperands.size());
Flag = Chain.getValue(1);
@@ -5713,7 +5943,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// not have the same VT as was expected. Convert it to the right type
// with bit_convert.
if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
- Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
ResultType, Val);
} else if (ResultType != Val.getValueType() &&
@@ -5751,7 +5981,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
StoresToEmit[i].first,
getValue(StoresToEmit[i].second),
- StoresToEmit[i].second, 0,
+ MachinePointerInfo(StoresToEmit[i].second),
false, false, 0);
OutChains.push_back(Val);
}
@@ -5888,7 +6118,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
- MyFlags.VT = RegisterVT;
+ MyFlags.VT = RegisterVT.getSimpleVT();
MyFlags.Used = isReturnValueUsed;
if (RetSExt)
MyFlags.Flags.setSExt();
@@ -5924,7 +6154,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() &&
"LowerCall emitted a null value!");
- assert(Ins[i].VT == InVals[i].getValueType() &&
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
"LowerCall emitted a value with the wrong type!");
});
@@ -6085,7 +6315,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() &&
"LowerFormalArguments emitted a null value!");
- assert(Ins[i].VT == InVals[i].getValueType() &&
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
"LowerFormalArguments emitted a value with the wrong type!");
}
});
@@ -6154,7 +6384,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// Note down frame index for byval arguments.
if (I->hasByValAttr() && !ArgValues.empty())
- if (FrameIndexSDNode *FI =
+ if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 5f400e9..a1a70c3 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -258,15 +258,16 @@ private:
struct BitTestBlock {
BitTestBlock(APInt F, APInt R, const Value* SV,
- unsigned Rg, bool E,
+ unsigned Rg, EVT RgVT, bool E,
MachineBasicBlock* P, MachineBasicBlock* D,
const BitTestInfo& C):
- First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),
+ First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
Parent(P), Default(D), Cases(C) { }
APInt First;
APInt Range;
const Value *SValue;
unsigned Reg;
+ EVT RegVT;
bool Emitted;
MachineBasicBlock *Parent;
MachineBasicBlock *Default;
@@ -347,7 +348,7 @@ public:
SDValue getControlRoot();
DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
-
+ void setCurDebugLoc(DebugLoc dl){ CurDebugLoc = dl; }
unsigned getSDNodeOrder() const { return SDNodeOrder; }
void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
@@ -398,6 +399,10 @@ public:
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
MachineBasicBlock *LandingPad = NULL);
+ /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+ /// references that ned to refer to the last resulting block.
+ void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
private:
// Terminator instructions.
void visitRet(const ReturnInst &I);
@@ -431,7 +436,8 @@ public:
void visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB);
void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
- void visitBitTestCase(MachineBasicBlock* NextMBB,
+ void visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 66cb5ce..62ebc81 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -43,6 +43,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -53,8 +54,17 @@
using namespace llvm;
STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+#ifndef NDEBUG
+STATISTIC(NumBBWithOutOfOrderLineInfo,
+ "Number of blocks with out of order line number info");
+STATISTIC(NumMBBWithOutOfOrderLineInfo,
+ "Number of machine blocks with out of order line number info");
+#endif
+
static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
cl::desc("Enable verbose messages in the \"fast\" "
@@ -170,15 +180,18 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
-SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
+ CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
FuncInfo(new FunctionLoweringInfo(TLI)),
CurDAG(new SelectionDAG(tm)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(),
OptLevel(OL),
- DAGSize(0)
-{}
+ DAGSize(0) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+ }
SelectionDAGISel::~SelectionDAGISel() {
delete SDB;
@@ -202,6 +215,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
static bool FunctionCallsSetJmp(const Function *F) {
const Module *M = F->getParent();
static const char *ReturnsTwiceFns[] = {
+ "_setjmp",
"setjmp",
"sigsetjmp",
"setjmp_syscall",
@@ -227,6 +241,44 @@ static bool FunctionCallsSetJmp(const Function *F) {
#undef NUM_RETURNS_TWICE_FNS
}
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it. In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction.
+///
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+ // Loop for blocks with phi nodes.
+ for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ if (PN == 0) continue;
+
+ ReprocessBlock:
+ // For each block with a PHI node, check to see if any of the input values
+ // are potentially trapping constant expressions. Constant expressions are
+ // the only potentially trapping value that can occur as the argument to a
+ // PHI.
+ for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+ if (CE == 0 || !CE->canTrap()) continue;
+
+ // The only case we have to worry about is when the edge is critical.
+ // Since this block has a PHI Node, we assume it has multiple input
+ // edges: check to see if the pred has multiple successors.
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ // Okay, we have to split this edge.
+ SplitCriticalEdge(Pred->getTerminator(),
+ GetSuccessorNumber(Pred, BB), SDISel, true);
+ goto ReprocessBlock;
+ }
+ }
+}
+
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Do some sanity-checking on the command-line options.
assert((!EnableFastISelVerbose || EnableFastISel) &&
@@ -245,6 +297,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+ SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+
CurDAG->init(*MF);
FuncInfo->set(Fn, *MF);
SDB->init(GFI, *AA);
@@ -261,7 +315,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
if (!FuncInfo->ArgDbgValues.empty())
for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
E = RegInfo->livein_end(); LI != E; ++LI)
- if (LI->second)
+ if (LI->second)
LiveInMap.insert(std::make_pair(LI->first, LI->second));
// Insert DBG_VALUE instructions for function arguments to the entry block.
@@ -282,14 +336,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
if (LDI != LiveInMap.end()) {
MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
MachineBasicBlock::iterator InsertPos = Def;
- const MDNode *Variable =
+ const MDNode *Variable =
MI->getOperand(MI->getNumOperands()-1).getMetadata();
unsigned Offset = MI->getOperand(1).getImm();
// Def is never a terminator here, so it is ok to increment InsertPos.
- BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
+ BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
TII.get(TargetOpcode::DBG_VALUE))
.addReg(LDI->second, RegState::Debug)
.addImm(Offset).addMetadata(Variable);
+
+ // If this vreg is directly copied into an exported register then
+ // that COPY instructions also need DBG_VALUE, if it is the only
+ // user of LDI->second.
+ MachineInstr *CopyUseMI = NULL;
+ for (MachineRegisterInfo::use_iterator
+ UI = RegInfo->use_begin(LDI->second);
+ MachineInstr *UseMI = UI.skipInstruction();) {
+ if (UseMI->isDebugValue()) continue;
+ if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+ CopyUseMI = UseMI; continue;
+ }
+ // Otherwise this is another use or second copy use.
+ CopyUseMI = NULL; break;
+ }
+ if (CopyUseMI) {
+ MachineInstr *NewMI =
+ BuildMI(*MF, CopyUseMI->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
+ .addImm(Offset).addMetadata(Variable);
+ EntryMBB->insertAfter(CopyUseMI, NewMI);
+ }
}
}
@@ -303,10 +380,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
- // Operand 1 of an inline asm instruction indicates whether the asm
- // needs stack or not.
- if ((II->isInlineAsm() && II->getOperand(1).getImm()) ||
- (TID.isCall() && !TID.isReturn())) {
+ if ((TID.isCall() && !TID.isReturn()) ||
+ II->isStackAligningInlineAsm()) {
MFI->setHasCalls(true);
goto done;
}
@@ -362,6 +437,7 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
// Final step, emit the lowered DAG as machine code.
CodeGenAndEmitDAG();
+ return;
}
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
@@ -406,9 +482,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
// Only install this information if it tells us something.
if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
- DestReg -= TargetRegisterInfo::FirstVirtualRegister;
- if (DestReg >= FuncInfo->LiveOutRegInfo.size())
- FuncInfo->LiveOutRegInfo.resize(DestReg+1);
+ FuncInfo->LiveOutRegInfo.grow(DestReg);
FunctionLoweringInfo::LiveOutInfo &LOI =
FuncInfo->LiveOutRegInfo[DestReg];
LOI.NumSignBits = NumSignBits;
@@ -541,13 +615,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Emit machine code to BB. This can change 'BB' to the last block being
// inserted into.
+ MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
{
NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
- FuncInfo->MBB = Scheduler->EmitSchedule();
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
FuncInfo->InsertPt = Scheduler->InsertPos;
}
+ // If the block was split, make sure we update any references that are used to
+ // update PHI nodes later on.
+ if (FirstMBB != LastMBB)
+ SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
// Free the scheduler state.
{
NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
@@ -563,19 +643,19 @@ void SelectionDAGISel::DoInstructionSelection() {
DEBUG(errs() << "===== Instruction selection begins:\n");
PreprocessISelDAG();
-
+
// Select target instructions for the DAG.
{
// Number all nodes with a topological order and set DAGSize.
DAGSize = CurDAG->AssignTopologicalOrder();
-
+
// Create a dummy node (which is not added to allnodes), that adds
// a reference to the root node, preventing it from being deleted,
// and tracking any changes of the root.
HandleSDNode Dummy(CurDAG->getRoot());
ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
++ISelPosition;
-
+
// The AllNodes list is now topological-sorted. Visit the
// nodes by starting at the end of the list (the root of the
// graph) and preceding back toward the beginning (the entry
@@ -587,19 +667,19 @@ void SelectionDAGISel::DoInstructionSelection() {
// makes it theoretically possible to disable the DAGCombiner.
if (Node->use_empty())
continue;
-
+
SDNode *ResNode = Select(Node);
-
+
// FIXME: This is pretty gross. 'Select' should be changed to not return
// anything at all and this code should be nuked with a tactical strike.
-
+
// If node should not be replaced, continue with the next one.
if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
continue;
// Replace node.
if (ResNode)
ReplaceUses(Node, ResNode);
-
+
// If after the replacement this node is not used any more,
// remove this dead node.
if (Node->use_empty()) { // Don't delete EntryToken, etc.
@@ -607,9 +687,9 @@ void SelectionDAGISel::DoInstructionSelection() {
CurDAG->RemoveDeadNode(Node, &ISU);
}
}
-
+
CurDAG->setRoot(Dummy.getValue());
- }
+ }
DEBUG(errs() << "===== Instruction selection ends:\n");
@@ -661,6 +741,90 @@ void SelectionDAGISel::PrepareEHLandingPad() {
}
}
+
+
+
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+ FastISel *FastIS) {
+ // Don't try to fold volatile loads. Target has to deal with alignment
+ // constraints.
+ if (LI->isVolatile()) return false;
+
+ // Figure out which vreg this is going into.
+ unsigned LoadReg = FastIS->getRegForValue(LI);
+ assert(LoadReg && "Load isn't already assigned a vreg? ");
+
+ // Check to see what the uses of this vreg are. If it has no uses, or more
+ // than one use (at the machine instr level) then we can't fold it.
+ MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+ if (RI == RegInfo->reg_end())
+ return false;
+
+ // See if there is exactly one use of the vreg. If there are multiple uses,
+ // then the instruction got lowered to multiple machine instructions or the
+ // use of the loaded value ended up being multiple operands of the result, in
+ // either case, we can't fold this.
+ MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+ if (PostRI != RegInfo->reg_end())
+ return false;
+
+ assert(RI.getOperand().isUse() &&
+ "The only use of the vreg must be a use, we haven't emitted the def!");
+
+ MachineInstr *User = &*RI;
+
+ // Set the insertion point properly. Folding the load can cause generation of
+ // other random instructions (like sign extends) for addressing modes, make
+ // sure they get inserted in a logical place before the new instruction.
+ FuncInfo->InsertPt = User;
+ FuncInfo->MBB = User->getParent();
+
+ // Ask the target to try folding the load.
+ return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
+}
+
+#ifndef NDEBUG
+/// CheckLineNumbers - Check if basic block instructions follow source order
+/// or not.
+static void CheckLineNumbers(const BasicBlock *BB) {
+ unsigned Line = 0;
+ unsigned Col = 0;
+ for (BasicBlock::const_iterator BI = BB->begin(),
+ BE = BB->end(); BI != BE; ++BI) {
+ const DebugLoc DL = BI->getDebugLoc();
+ if (DL.isUnknown()) continue;
+ unsigned L = DL.getLine();
+ unsigned C = DL.getCol();
+ if (L < Line || (L == Line && C < Col)) {
+ ++NumBBWithOutOfOrderLineInfo;
+ return;
+ }
+ Line = L;
+ Col = C;
+ }
+}
+
+/// CheckLineNumbers - Check if machine basic block instructions follow source
+/// order or not.
+static void CheckLineNumbers(const MachineBasicBlock *MBB) {
+ unsigned Line = 0;
+ unsigned Col = 0;
+ for (MachineBasicBlock::const_iterator MBI = MBB->begin(),
+ MBE = MBB->end(); MBI != MBE; ++MBI) {
+ const DebugLoc DL = MBI->getDebugLoc();
+ if (DL.isUnknown()) continue;
+ unsigned L = DL.getLine();
+ unsigned C = DL.getCol();
+ if (L < Line || (L == Line && C < Col)) {
+ ++NumMBBWithOutOfOrderLineInfo;
+ return;
+ }
+ Line = L;
+ Col = C;
+ }
+}
+#endif
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
@@ -670,6 +834,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Iterate over all basic blocks in the function.
for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
const BasicBlock *LLVMBB = &*I;
+#ifndef NDEBUG
+ CheckLineNumbers(LLVMBB);
+#endif
FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
@@ -682,10 +849,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Setup an EH landing-pad block.
if (FuncInfo->MBB->isLandingPad())
PrepareEHLandingPad();
-
+
// Lower any arguments needed in this block if this is the entry block.
- if (LLVMBB == &Fn.getEntryBlock())
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ for (BasicBlock::const_iterator DBI = LLVMBB->begin(), DBE = LLVMBB->end();
+ DBI != DBE; ++DBI) {
+ if (const DbgInfoIntrinsic *DI = dyn_cast<DbgInfoIntrinsic>(DBI)) {
+ const DebugLoc DL = DI->getDebugLoc();
+ SDB->setCurDebugLoc(DL);
+ break;
+ }
+ }
LowerArguments(LLVMBB);
+ }
// Before doing SelectionDAG ISel, see if FastISel has been requested.
if (FastIS) {
@@ -723,8 +899,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
// Try to select the instruction with FastISel.
- if (FastIS->SelectInstruction(Inst))
+ if (FastIS->SelectInstruction(Inst)) {
+ // If fast isel succeeded, check to see if there is a single-use
+ // non-volatile load right before the selected instruction, and see if
+ // the load is used by the instruction. If so, try to fold it.
+ const Instruction *BeforeInst = 0;
+ if (Inst != Begin)
+ BeforeInst = llvm::prior(llvm::prior(BI));
+ if (BeforeInst && isa<LoadInst>(BeforeInst) &&
+ BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
+ TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS))
+ --BI; // If we succeeded, don't re-select the load.
continue;
+ }
// Then handle certain instructions as single-LLVM-Instruction blocks.
if (isa<CallInst>(Inst)) {
@@ -771,6 +958,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
}
+ if (Begin != BI)
+ ++NumDAGBlocks;
+ else
+ ++NumFastIselBlocks;
+
// Run SelectionDAG instruction selection on the remainder of the block
// not handled by FastISel. If FastISel is not run, this is the entire
// block.
@@ -782,6 +974,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
delete FastIS;
+#ifndef NDEBUG
+ for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end();
+ MBI != MBE; ++MBI)
+ CheckLineNumbers(MBI);
+#endif
}
void
@@ -831,12 +1028,14 @@ SelectionDAGISel::FinishBasicBlock() {
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
if (j+1 != ej)
- SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Cases[j+1].ThisBB,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
FuncInfo->MBB);
else
- SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Default,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
FuncInfo->MBB);
@@ -951,7 +1150,7 @@ SelectionDAGISel::FinishBasicBlock() {
// additional DAGs necessary.
for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
// Set the current basic block to the mbb we wish to insert the code into
- MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+ FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Determine the unique successors.
@@ -960,13 +1159,15 @@ SelectionDAGISel::FinishBasicBlock() {
if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
Succs.push_back(SDB->SwitchCases[i].FalseBB);
- // Emit the code. Note that this could result in ThisBB being split, so
- // we need to check for updates.
+ // Emit the code. Note that this could result in FuncInfo->MBB being split.
SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
- ThisBB = FuncInfo->MBB;
+
+ // Remember the last block, now that any splitting is done, for use in
+ // populating PHI nodes in successors.
+ MachineBasicBlock *ThisBB = FuncInfo->MBB;
// Handle any PHI nodes in successors of this chunk, as if we were coming
// from the original BB before switch expansion. Note that PHI nodes can
@@ -1016,10 +1217,6 @@ ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
return Ctor(this, OptLevel);
}
-ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
- return new ScheduleHazardRecognizer();
-}
-
//===----------------------------------------------------------------------===//
// Helper functions used by the generated instruction selector.
//===----------------------------------------------------------------------===//
@@ -1099,11 +1296,11 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
- Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3
+ Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)
unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
- if (InOps[e-1].getValueType() == MVT::Flag)
- --e; // Don't process a flag operand if it is here.
+ if (InOps[e-1].getValueType() == MVT::Glue)
+ --e; // Don't process a glue operand if it is here.
while (i != e) {
unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
@@ -1130,15 +1327,15 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
}
}
- // Add the flag input back if present.
+ // Add the glue input back if present.
if (e != InOps.size())
Ops.push_back(InOps.back());
}
-/// findFlagUse - Return use of EVT::Flag value produced by the specified
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
/// SDNode.
///
-static SDNode *findFlagUse(SDNode *N) {
+static SDNode *findGlueUse(SDNode *N) {
unsigned FlagResNo = N->getNumValues()-1;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
SDUse &Use = I.getUse();
@@ -1160,11 +1357,11 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
// never find it.
//
// The Use may be -1 (unassigned) if it is a newly allocated node. This can
- // happen because we scan down to newly selected nodes in the case of flag
+ // happen because we scan down to newly selected nodes in the case of glue
// uses.
if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
return false;
-
+
// Don't revisit nodes if we already scanned it and didn't fail, we know we
// won't fail if we scan it again.
if (!Visited.insert(Use))
@@ -1174,7 +1371,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
// Ignore chain uses, they are validated by HandleMergeInputChains.
if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
continue;
-
+
SDNode *N = Use->getOperand(i).getNode();
if (N == Def) {
if (Use == ImmedUse || Use == Root)
@@ -1221,8 +1418,8 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
//
// * indicates nodes to be folded together.
//
- // If Root produces a flag, then it gets (even more) interesting. Since it
- // will be "glued" together with its flag use in the scheduler, we need to
+ // If Root produces glue, then it gets (even more) interesting. Since it
+ // will be "glued" together with its glue use in the scheduler, we need to
// check if it might reach N.
//
// [N*] //
@@ -1240,30 +1437,30 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
// ^ / //
// f / //
// | / //
- // [FU] //
+ // [GU] //
//
- // If FU (flag use) indirectly reaches N (the load), and Root folds N
- // (call it Fold), then X is a predecessor of FU and a successor of
- // Fold. But since Fold and FU are flagged together, this will create
+ // If GU (glue use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of GU and a successor of
+ // Fold. But since Fold and GU are glued together, this will create
// a cycle in the scheduling graph.
- // If the node has flags, walk down the graph to the "lowest" node in the
- // flagged set.
+ // If the node has glue, walk down the graph to the "lowest" node in the
+ // glueged set.
EVT VT = Root->getValueType(Root->getNumValues()-1);
- while (VT == MVT::Flag) {
- SDNode *FU = findFlagUse(Root);
- if (FU == NULL)
+ while (VT == MVT::Glue) {
+ SDNode *GU = findGlueUse(Root);
+ if (GU == NULL)
break;
- Root = FU;
+ Root = GU;
VT = Root->getValueType(Root->getNumValues()-1);
-
- // If our query node has a flag result with a use, we've walked up it. If
+
+ // If our query node has a glue result with a use, we've walked up it. If
// the user (which has already been selected) has a chain or indirectly uses
// the chain, our WalkChainUsers predicate will not consider it. Because of
// this, we cannot ignore chains in this predicate.
IgnoreChains = false;
}
-
+
SmallPtrSet<SDNode*, 16> Visited;
return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
@@ -1272,10 +1469,10 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
std::vector<SDValue> Ops(N->op_begin(), N->op_end());
SelectInlineAsmMemoryOperands(Ops);
-
+
std::vector<EVT> VTs;
VTs.push_back(MVT::Other);
- VTs.push_back(MVT::Flag);
+ VTs.push_back(MVT::Glue);
SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
VTs, &Ops[0], Ops.size());
New->setNodeId(-1);
@@ -1287,11 +1484,11 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
}
/// GetVBR - decode a vbr encoding whose top bit is set.
-ALWAYS_INLINE static uint64_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
assert(Val >= 128 && "Not a VBR");
Val &= 127; // Remove first vbr bit.
-
+
unsigned Shift = 7;
uint64_t NextBits;
do {
@@ -1299,25 +1496,25 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
Val |= (NextBits&127) << Shift;
Shift += 7;
} while (NextBits & 128);
-
+
return Val;
}
-/// UpdateChainsAndFlags - When a match is complete, this method updates uses of
-/// interior flag and chain results to use the new flag and chain results.
+/// UpdateChainsAndGlue - When a match is complete, this method updates uses of
+/// interior glue and chain results to use the new glue and chain results.
void SelectionDAGISel::
-UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
- const SmallVectorImpl<SDNode*> &ChainNodesMatched,
- SDValue InputFlag,
- const SmallVectorImpl<SDNode*> &FlagResultNodesMatched,
- bool isMorphNodeTo) {
+UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+ const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SDValue InputGlue,
+ const SmallVectorImpl<SDNode*> &GlueResultNodesMatched,
+ bool isMorphNodeTo) {
SmallVector<SDNode*, 4> NowDeadNodes;
-
+
ISelUpdater ISU(ISelPosition);
// Now that all the normal results are replaced, we replace the chain and
- // flag results if present.
+ // glue results if present.
if (!ChainNodesMatched.empty()) {
assert(InputChain.getNode() != 0 &&
"Matched input chains but didn't produce a chain");
@@ -1325,55 +1522,55 @@ UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
// Replace all the chain results with the final chain we ended up with.
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
SDNode *ChainNode = ChainNodesMatched[i];
-
+
// If this node was already deleted, don't look at it.
if (ChainNode->getOpcode() == ISD::DELETED_NODE)
continue;
-
+
// Don't replace the results of the root node if we're doing a
// MorphNodeTo.
if (ChainNode == NodeToMatch && isMorphNodeTo)
continue;
-
+
SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
- if (ChainVal.getValueType() == MVT::Flag)
+ if (ChainVal.getValueType() == MVT::Glue)
ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
-
+
// If the node became dead and we haven't already seen it, delete it.
if (ChainNode->use_empty() &&
!std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
NowDeadNodes.push_back(ChainNode);
}
}
-
- // If the result produces a flag, update any flag results in the matched
- // pattern with the flag result.
- if (InputFlag.getNode() != 0) {
+
+ // If the result produces glue, update any glue results in the matched
+ // pattern with the glue result.
+ if (InputGlue.getNode() != 0) {
// Handle any interior nodes explicitly marked.
- for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) {
- SDNode *FRN = FlagResultNodesMatched[i];
-
+ for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
+ SDNode *FRN = GlueResultNodesMatched[i];
+
// If this node was already deleted, don't look at it.
if (FRN->getOpcode() == ISD::DELETED_NODE)
continue;
-
- assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag &&
- "Doesn't have a flag result");
+
+ assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
+ "Doesn't have a glue result");
CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
- InputFlag, &ISU);
-
+ InputGlue, &ISU);
+
// If the node became dead and we haven't already seen it, delete it.
if (FRN->use_empty() &&
!std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
NowDeadNodes.push_back(FRN);
}
}
-
+
if (!NowDeadNodes.empty())
CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
-
+
DEBUG(errs() << "ISEL: Match complete!\n");
}
@@ -1392,17 +1589,17 @@ enum ChainResult {
///
/// The walk we do here is guaranteed to be small because we quickly get down to
/// already selected nodes "below" us.
-static ChainResult
+static ChainResult
WalkChainUsers(SDNode *ChainedNode,
SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
ChainResult Result = CR_Simple;
-
+
for (SDNode::use_iterator UI = ChainedNode->use_begin(),
E = ChainedNode->use_end(); UI != E; ++UI) {
// Make sure the use is of the chain, not some other value we produce.
if (UI.getUse().getValueType() != MVT::Other) continue;
-
+
SDNode *User = *UI;
// If we see an already-selected machine node, then we've gone beyond the
@@ -1411,7 +1608,7 @@ WalkChainUsers(SDNode *ChainedNode,
if (User->isMachineOpcode() ||
User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
continue;
-
+
if (User->getOpcode() == ISD::CopyToReg ||
User->getOpcode() == ISD::CopyFromReg ||
User->getOpcode() == ISD::INLINEASM ||
@@ -1437,7 +1634,7 @@ WalkChainUsers(SDNode *ChainedNode,
if (!std::count(ChainedNodesInPattern.begin(),
ChainedNodesInPattern.end(), User))
return CR_InducesCycle;
-
+
// Otherwise we found a node that is part of our pattern. For example in:
// x = load ptr
// y = x+4
@@ -1449,7 +1646,7 @@ WalkChainUsers(SDNode *ChainedNode,
InteriorChainedNodes.push_back(User);
continue;
}
-
+
// If we found a TokenFactor, there are two cases to consider: first if the
// TokenFactor is just hanging "below" the pattern we're matching (i.e. no
// uses of the TF are in our pattern) we just want to ignore it. Second,
@@ -1486,7 +1683,7 @@ WalkChainUsers(SDNode *ChainedNode,
case CR_LeadsToInteriorNode:
break; // Otherwise, keep processing.
}
-
+
// Okay, we know we're in the interesting interior case. The TokenFactor
// is now going to be considered part of the pattern so that we rewrite its
// uses (it may have uses that are not part of the pattern) with the
@@ -1497,7 +1694,7 @@ WalkChainUsers(SDNode *ChainedNode,
InteriorChainedNodes.push_back(User);
continue;
}
-
+
return Result;
}
@@ -1519,7 +1716,7 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
InteriorChainedNodes) == CR_InducesCycle)
return SDValue(); // Would induce a cycle.
}
-
+
// Okay, we have walked all the matched nodes and collected TokenFactor nodes
// that we are interested in. Form our input TokenFactor node.
SmallVector<SDValue, 3> InputChains;
@@ -1530,14 +1727,14 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
if (N->getOpcode() != ISD::TokenFactor) {
if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
continue;
-
+
// Otherwise, add the input chain.
SDValue InChain = ChainNodesMatched[i]->getOperand(0);
assert(InChain.getValueType() == MVT::Other && "Not a chain");
InputChains.push_back(InChain);
continue;
}
-
+
// If we have a token factor, we want to add all inputs of the token factor
// that are not part of the pattern we're matching.
for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
@@ -1546,13 +1743,13 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
InputChains.push_back(N->getOperand(op));
}
}
-
+
SDValue Res;
if (InputChains.size() == 1)
return InputChains[0];
return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
MVT::Other, &InputChains[0], InputChains.size());
-}
+}
/// MorphNode - Handle morphing a node in place for the selector.
SDNode *SelectionDAGISel::
@@ -1560,15 +1757,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
// It is possible we're using MorphNodeTo to replace a node with no
// normal results with one that has a normal result (or we could be
- // adding a chain) and the input could have flags and chains as well.
+ // adding a chain) and the input could have glue and chains as well.
// In this case we need to shift the operands down.
// FIXME: This is a horrible hack and broken in obscure cases, no worse
// than the old isel though.
- int OldFlagResultNo = -1, OldChainResultNo = -1;
+ int OldGlueResultNo = -1, OldChainResultNo = -1;
unsigned NTMNumResults = Node->getNumValues();
- if (Node->getValueType(NTMNumResults-1) == MVT::Flag) {
- OldFlagResultNo = NTMNumResults-1;
+ if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+ OldGlueResultNo = NTMNumResults-1;
if (NTMNumResults != 1 &&
Node->getValueType(NTMNumResults-2) == MVT::Other)
OldChainResultNo = NTMNumResults-2;
@@ -1589,54 +1786,55 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
}
unsigned ResNumResults = Res->getNumValues();
- // Move the flag if needed.
- if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 &&
- (unsigned)OldFlagResultNo != ResNumResults-1)
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo),
+ // Move the glue if needed.
+ if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+ (unsigned)OldGlueResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
SDValue(Res, ResNumResults-1));
- if ((EmitNodeInfo & OPFL_FlagOutput) != 0)
+ if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
--ResNumResults;
// Move the chain reference if needed.
if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
(unsigned)OldChainResultNo != ResNumResults-1)
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
SDValue(Res, ResNumResults-1));
// Otherwise, no replacement happened because the node already exists. Replace
// Uses of the old node with the new one.
if (Res != Node)
CurDAG->ReplaceAllUsesWith(Node, Res);
-
+
return Res;
}
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) {
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
// Accept if it is exactly the same as a previously recorded node.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- return N == RecordedNodes[RecNo];
+ return N == RecordedNodes[RecNo].first;
}
-
+
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SelectionDAGISel &SDISel) {
return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SelectionDAGISel &SDISel, SDNode *N) {
return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDNode *N) {
uint16_t Opc = MatcherTable[MatcherIndex++];
@@ -1644,17 +1842,17 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return N->getOpcode() == Opc;
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering &TLI) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (N.getValueType() == VT) return true;
-
+
// Handle the case when VT is iPTR.
return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering &TLI,
unsigned ChildNo) {
@@ -1664,57 +1862,57 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
return cast<CondCodeSDNode>(N)->get() ==
(ISD::CondCode)MatcherTable[MatcherIndex++];
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering &TLI) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (cast<VTSDNode>(N)->getVT() == VT)
return true;
-
+
// Handle the case when VT is iPTR.
return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
-
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
return C != 0 && C->getSExtValue() == Val;
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
-
+
if (N->getOpcode() != ISD::AND) return false;
-
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
-
+
if (N->getOpcode() != ISD::OR) return false;
-
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
}
@@ -1724,11 +1922,11 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
/// fail, set Result=true and return anything. If the current predicate is
/// known to pass, set Result=false and return the MatcherIndex to continue
/// with. If the current predicate is unknown, set Result=false and return the
-/// MatcherIndex to continue with.
+/// MatcherIndex to continue with.
static unsigned IsPredicateKnownToFail(const unsigned char *Table,
unsigned Index, SDValue N,
bool &Result, SelectionDAGISel &SDISel,
- SmallVectorImpl<SDValue> &RecordedNodes){
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
switch (Table[Index++]) {
default:
Result = false;
@@ -1782,21 +1980,21 @@ namespace {
struct MatchScope {
/// FailIndex - If this match fails, this is the index to continue with.
unsigned FailIndex;
-
+
/// NodeStack - The node stack when the scope was formed.
SmallVector<SDValue, 4> NodeStack;
-
+
/// NumRecordedNodes - The number of recorded nodes when the scope was formed.
unsigned NumRecordedNodes;
-
+
/// NumMatchedMemRefs - The number of matched memref entries.
unsigned NumMatchedMemRefs;
-
- /// InputChain/InputFlag - The current chain/flag
- SDValue InputChain, InputFlag;
+
+ /// InputChain/InputGlue - The current chain/glue
+ SDValue InputChain, InputGlue;
/// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
- bool HasChainNodesMatched, HasFlagResultNodesMatched;
+ bool HasChainNodesMatched, HasGlueResultNodesMatched;
};
}
@@ -1838,7 +2036,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
case ISD::UNDEF: return Select_UNDEF(NodeToMatch);
}
-
+
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
// Set up the node stack with NodeToMatch as the only node on the stack.
@@ -1849,37 +2047,38 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// MatchScopes - Scopes used when matching, if a match failure happens, this
// indicates where to continue checking.
SmallVector<MatchScope, 8> MatchScopes;
-
+
// RecordedNodes - This is the set of nodes that have been recorded by the
- // state machine.
- SmallVector<SDValue, 8> RecordedNodes;
-
+ // state machine. The second value is the parent of the node, or null if the
+ // root is recorded.
+ SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
// MatchedMemRefs - This is the set of MemRef's we've seen in the input
// pattern.
SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
-
- // These are the current input chain and flag for use when generating nodes.
+
+ // These are the current input chain and glue for use when generating nodes.
// Various Emit operations change these. For example, emitting a copytoreg
// uses and updates these.
- SDValue InputChain, InputFlag;
-
+ SDValue InputChain, InputGlue;
+
// ChainNodesMatched - If a pattern matches nodes that have input/output
// chains, the OPC_EmitMergeInputChains operation is emitted which indicates
// which ones they are. The result is captured into this list so that we can
// update the chain results when the pattern is complete.
SmallVector<SDNode*, 3> ChainNodesMatched;
- SmallVector<SDNode*, 3> FlagResultNodesMatched;
-
+ SmallVector<SDNode*, 3> GlueResultNodesMatched;
+
DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
NodeToMatch->dump(CurDAG);
errs() << '\n');
-
+
// Determine where to start the interpreter. Normally we start at opcode #0,
// but if the state machine starts with an OPC_SwitchOpcode, then we
// accelerate the first lookup (which is guaranteed to be hot) with the
// OpcodeOffset table.
unsigned MatcherIndex = 0;
-
+
if (!OpcodeOffset.empty()) {
// Already computed the OpcodeOffset table, just index into it.
if (N.getOpcode() < OpcodeOffset.size())
@@ -1911,7 +2110,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (N.getOpcode() < OpcodeOffset.size())
MatcherIndex = OpcodeOffset[N.getOpcode()];
}
-
+
while (1) {
assert(MatcherIndex < TableSize && "Invalid index");
#ifndef NDEBUG
@@ -1926,7 +2125,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// determine immediately that the first check (or first several) will
// immediately fail, don't even bother pushing a scope for them.
unsigned FailIndex;
-
+
while (1) {
unsigned NumToSkip = MatcherTable[MatcherIndex++];
if (NumToSkip & 128)
@@ -1936,12 +2135,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
FailIndex = 0;
break;
}
-
+
FailIndex = MatcherIndex+NumToSkip;
-
+
unsigned MatcherIndexOfPredicate = MatcherIndex;
(void)MatcherIndexOfPredicate; // silence warning.
-
+
// If we can't evaluate this predicate without pushing a scope (e.g. if
// it is a 'MoveParent') or if the predicate succeeds on this node, we
// push the scope and evaluate the full predicate chain.
@@ -1950,20 +2149,20 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
Result, *this, RecordedNodes);
if (!Result)
break;
-
+
DEBUG(errs() << " Skipped scope entry (due to false predicate) at "
<< "index " << MatcherIndexOfPredicate
<< ", continuing at " << FailIndex << "\n");
++NumDAGIselRetries;
-
+
// Otherwise, we know that this case of the Scope is guaranteed to fail,
// move to the next case.
MatcherIndex = FailIndex;
}
-
+
// If the whole scope failed to match, bail.
if (FailIndex == 0) break;
-
+
// Push a MatchScope which indicates where to go if the first child fails
// to match.
MatchScope NewEntry;
@@ -1972,17 +2171,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
NewEntry.NumRecordedNodes = RecordedNodes.size();
NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
NewEntry.InputChain = InputChain;
- NewEntry.InputFlag = InputFlag;
+ NewEntry.InputGlue = InputGlue;
NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
- NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty();
+ NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();
MatchScopes.push_back(NewEntry);
continue;
}
- case OPC_RecordNode:
+ case OPC_RecordNode: {
// Remember this node, it may end up being an operand in the pattern.
- RecordedNodes.push_back(N);
+ SDNode *Parent = 0;
+ if (NodeStack.size() > 1)
+ Parent = NodeStack[NodeStack.size()-2].getNode();
+ RecordedNodes.push_back(std::make_pair(N, Parent));
continue;
-
+ }
+
case OPC_RecordChild0: case OPC_RecordChild1:
case OPC_RecordChild2: case OPC_RecordChild3:
case OPC_RecordChild4: case OPC_RecordChild5:
@@ -1991,20 +2194,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (ChildNo >= N.getNumOperands())
break; // Match fails if out of range child #.
- RecordedNodes.push_back(N->getOperand(ChildNo));
+ RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+ N.getNode()));
continue;
}
case OPC_RecordMemRef:
MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
continue;
-
- case OPC_CaptureFlagInput:
- // If the current node has an input flag, capture it in InputFlag.
+
+ case OPC_CaptureGlueInput:
+ // If the current node has an input glue, capture it in InputGlue.
if (N->getNumOperands() != 0 &&
- N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag)
- InputFlag = N->getOperand(N->getNumOperands()-1);
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+ InputGlue = N->getOperand(N->getNumOperands()-1);
continue;
-
+
case OPC_MoveChild: {
unsigned ChildNo = MatcherTable[MatcherIndex++];
if (ChildNo >= N.getNumOperands())
@@ -2013,14 +2217,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
NodeStack.push_back(N);
continue;
}
-
+
case OPC_MoveParent:
// Pop the current node off the NodeStack.
NodeStack.pop_back();
assert(!NodeStack.empty() && "Node stack imbalance!");
- N = NodeStack.back();
+ N = NodeStack.back();
continue;
-
+
case OPC_CheckSame:
if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
continue;
@@ -2036,7 +2240,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned CPNum = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
- if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum,
+ if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+ RecordedNodes[RecNo].first, CPNum,
RecordedNodes))
break;
continue;
@@ -2044,11 +2249,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_CheckOpcode:
if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
continue;
-
+
case OPC_CheckType:
if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
continue;
-
+
case OPC_SwitchOpcode: {
unsigned CurNodeOpcode = N.getOpcode();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
@@ -2066,22 +2271,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// If the opcode matches, then we will execute this case.
if (CurNodeOpcode == Opc)
break;
-
+
// Otherwise, skip over this case.
MatcherIndex += CaseSize;
}
-
+
// If no cases matched, bail out.
if (CaseSize == 0) break;
-
+
// Otherwise, execute the case we found.
DEBUG(errs() << " OpcodeSwitch from " << SwitchStart
<< " to " << MatcherIndex << "\n");
continue;
}
-
+
case OPC_SwitchType: {
- MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy;
+ MVT CurNodeVT = N.getValueType().getSimpleVT();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
unsigned CaseSize;
while (1) {
@@ -2090,23 +2295,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (CaseSize & 128)
CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
if (CaseSize == 0) break;
-
- MVT::SimpleValueType CaseVT =
- (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+
+ MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (CaseVT == MVT::iPTR)
- CaseVT = TLI.getPointerTy().SimpleTy;
-
+ CaseVT = TLI.getPointerTy();
+
// If the VT matches, then we will execute this case.
if (CurNodeVT == CaseVT)
break;
-
+
// Otherwise, skip over this case.
MatcherIndex += CaseSize;
}
-
+
// If no cases matched, bail out.
if (CaseSize == 0) break;
-
+
// Otherwise, execute the case we found.
DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
<< "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
@@ -2135,7 +2339,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_CheckOrImm:
if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
continue;
-
+
case OPC_CheckFoldableChainNode: {
assert(NodeStack.size() != 1 && "No parent node");
// Verify that all intermediate nodes between the root and this one have
@@ -2156,7 +2360,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
NodeToMatch, OptLevel,
true/*We validate our own chains*/))
break;
-
+
continue;
}
case OPC_EmitInteger: {
@@ -2165,22 +2369,24 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
- RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT));
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getTargetConstant(Val, VT), (SDNode*)0));
continue;
}
case OPC_EmitRegister: {
MVT::SimpleValueType VT =
(MVT::SimpleValueType)MatcherTable[MatcherIndex++];
unsigned RegNo = MatcherTable[MatcherIndex++];
- RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT));
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), (SDNode*)0));
continue;
}
-
+
case OPC_EmitConvertToTarget: {
// Convert from IMM/FPIMM to target version.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- SDValue Imm = RecordedNodes[RecNo];
+ SDValue Imm = RecordedNodes[RecNo].first;
if (Imm->getOpcode() == ISD::Constant) {
int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
@@ -2189,11 +2395,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
}
-
- RecordedNodes.push_back(Imm);
+
+ RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
continue;
}
-
+
case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1
// These are space-optimized forms of OPC_EmitMergeInputChains.
@@ -2201,28 +2407,28 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
"EmitMergeInputChains should be the first chain producing node");
assert(ChainNodesMatched.empty() &&
"Should only have one EmitMergeInputChains per match");
-
+
// Read all of the chained nodes.
unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
-
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
// FIXME: What if other value results of the node have uses not matched
// by this pattern?
if (ChainNodesMatched.back() != NodeToMatch &&
- !RecordedNodes[RecNo].hasOneUse()) {
+ !RecordedNodes[RecNo].first.hasOneUse()) {
ChainNodesMatched.clear();
break;
}
-
+
// Merge the input chains if they are not intra-pattern references.
InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
-
+
if (InputChain.getNode() == 0)
break; // Failed to merge.
continue;
}
-
+
case OPC_EmitMergeInputChains: {
assert(InputChain.getNode() == 0 &&
"EmitMergeInputChains should be the first chain producing node");
@@ -2242,54 +2448,55 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
for (unsigned i = 0; i != NumChains; ++i) {
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
-
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
// FIXME: What if other value results of the node have uses not matched
// by this pattern?
if (ChainNodesMatched.back() != NodeToMatch &&
- !RecordedNodes[RecNo].hasOneUse()) {
+ !RecordedNodes[RecNo].first.hasOneUse()) {
ChainNodesMatched.clear();
break;
}
}
-
+
// If the inner loop broke out, the match fails.
if (ChainNodesMatched.empty())
break;
// Merge the input chains if they are not intra-pattern references.
InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
-
+
if (InputChain.getNode() == 0)
break; // Failed to merge.
continue;
}
-
+
case OPC_EmitCopyToReg: {
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
unsigned DestPhysReg = MatcherTable[MatcherIndex++];
-
+
if (InputChain.getNode() == 0)
InputChain = CurDAG->getEntryNode();
-
+
InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
- DestPhysReg, RecordedNodes[RecNo],
- InputFlag);
-
- InputFlag = InputChain.getValue(1);
+ DestPhysReg, RecordedNodes[RecNo].first,
+ InputGlue);
+
+ InputGlue = InputChain.getValue(1);
continue;
}
-
+
case OPC_EmitNodeXForm: {
unsigned XFormNo = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo));
+ SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
continue;
}
-
+
case OPC_EmitNode:
case OPC_MorphNodeTo: {
uint16_t TargetOpc = MatcherTable[MatcherIndex++];
@@ -2304,12 +2511,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
VTs.push_back(VT);
}
-
+
if (EmitNodeInfo & OPFL_Chain)
VTs.push_back(MVT::Other);
- if (EmitNodeInfo & OPFL_FlagOutput)
- VTs.push_back(MVT::Flag);
-
+ if (EmitNodeInfo & OPFL_GlueOutput)
+ VTs.push_back(MVT::Glue);
+
// This is hot code, so optimize the two most common cases of 1 and 2
// results.
SDVTList VTList;
@@ -2327,11 +2534,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned RecNo = MatcherTable[MatcherIndex++];
if (RecNo & 128)
RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
-
+
assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
- Ops.push_back(RecordedNodes[RecNo]);
+ Ops.push_back(RecordedNodes[RecNo].first);
}
-
+
// If there are variadic operands to add, handle them now.
if (EmitNodeInfo & OPFL_VariadicInfo) {
// Determine the start index to copy from.
@@ -2339,22 +2546,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
"Invalid variadic node");
- // Copy all of the variadic operands, not including a potential flag
+ // Copy all of the variadic operands, not including a potential glue
// input.
for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
i != e; ++i) {
SDValue V = NodeToMatch->getOperand(i);
- if (V.getValueType() == MVT::Flag) break;
+ if (V.getValueType() == MVT::Glue) break;
Ops.push_back(V);
}
}
-
- // If this has chain/flag inputs, add them.
+
+ // If this has chain/glue inputs, add them.
if (EmitNodeInfo & OPFL_Chain)
Ops.push_back(InputChain);
- if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0)
- Ops.push_back(InputFlag);
-
+ if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0)
+ Ops.push_back(InputGlue);
+
// Create the node.
SDNode *Res = 0;
if (Opcode != OPC_MorphNodeTo) {
@@ -2362,28 +2569,29 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// add the results to the RecordedNodes list.
Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
VTList, Ops.data(), Ops.size());
-
- // Add all the non-flag/non-chain results to the RecordedNodes list.
+
+ // Add all the non-glue/non-chain results to the RecordedNodes list.
for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
- if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break;
- RecordedNodes.push_back(SDValue(Res, i));
+ if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+ (SDNode*) 0));
}
-
+
} else {
Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
EmitNodeInfo);
}
-
- // If the node had chain/flag results, update our notion of the current
- // chain and flag.
- if (EmitNodeInfo & OPFL_FlagOutput) {
- InputFlag = SDValue(Res, VTs.size()-1);
+
+ // If the node had chain/glue results, update our notion of the current
+ // chain and glue.
+ if (EmitNodeInfo & OPFL_GlueOutput) {
+ InputGlue = SDValue(Res, VTs.size()-1);
if (EmitNodeInfo & OPFL_Chain)
InputChain = SDValue(Res, VTs.size()-2);
} else if (EmitNodeInfo & OPFL_Chain)
InputChain = SDValue(Res, VTs.size()-1);
- // If the OPFL_MemRefs flag is set on this node, slap all of the
+ // If the OPFL_MemRefs glue is set on this node, slap all of the
// accumulated memrefs onto it.
//
// FIXME: This is vastly incorrect for patterns with multiple outputs
@@ -2396,37 +2604,37 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
cast<MachineSDNode>(Res)
->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
}
-
+
DEBUG(errs() << " "
<< (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
<< " node: "; Res->dump(CurDAG); errs() << "\n");
-
+
// If this was a MorphNodeTo then we're completely done!
if (Opcode == OPC_MorphNodeTo) {
- // Update chain and flag uses.
- UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
- InputFlag, FlagResultNodesMatched, true);
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, true);
return Res;
}
-
+
continue;
}
-
- case OPC_MarkFlagResults: {
+
+ case OPC_MarkGlueResults: {
unsigned NumNodes = MatcherTable[MatcherIndex++];
-
- // Read and remember all the flag-result nodes.
+
+ // Read and remember all the glue-result nodes.
for (unsigned i = 0; i != NumNodes; ++i) {
unsigned RecNo = MatcherTable[MatcherIndex++];
if (RecNo & 128)
RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+ GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
}
continue;
}
-
+
case OPC_CompleteMatch: {
// The match has been completed, and any new nodes (if any) have been
// created. Patch up references to the matched dag to use the newly
@@ -2437,13 +2645,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned ResSlot = MatcherTable[MatcherIndex++];
if (ResSlot & 128)
ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
-
+
assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
- SDValue Res = RecordedNodes[ResSlot];
-
+ SDValue Res = RecordedNodes[ResSlot].first;
+
assert(i < NodeToMatch->getNumValues() &&
NodeToMatch->getValueType(i) != MVT::Other &&
- NodeToMatch->getValueType(i) != MVT::Flag &&
+ NodeToMatch->getValueType(i) != MVT::Glue &&
"Invalid number of results to complete!");
assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
NodeToMatch->getValueType(i) == MVT::iPTR ||
@@ -2454,24 +2662,23 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
}
- // If the root node defines a flag, add it to the flag nodes to update
- // list.
- if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag)
- FlagResultNodesMatched.push_back(NodeToMatch);
-
- // Update chain and flag uses.
- UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
- InputFlag, FlagResultNodesMatched, false);
-
+ // If the root node defines glue, add it to the glue nodes to update list.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue)
+ GlueResultNodesMatched.push_back(NodeToMatch);
+
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, false);
+
assert(NodeToMatch->use_empty() &&
"Didn't replace all uses of the node?");
-
+
// FIXME: We just return here, which interacts correctly with SelectRoot
// above. We should fix this to not return an SDNode* anymore.
return 0;
}
}
-
+
// If the code reached this point, then the match failed. See if there is
// another child to try in the current 'Scope', otherwise pop it until we
// find a case to check.
@@ -2494,15 +2701,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
MatcherIndex = LastScope.FailIndex;
-
+
DEBUG(errs() << " Continuing at " << MatcherIndex << "\n");
-
+
InputChain = LastScope.InputChain;
- InputFlag = LastScope.InputFlag;
+ InputGlue = LastScope.InputGlue;
if (!LastScope.HasChainNodesMatched)
ChainNodesMatched.clear();
- if (!LastScope.HasFlagResultNodesMatched)
- FlagResultNodesMatched.clear();
+ if (!LastScope.HasGlueResultNodesMatched)
+ GlueResultNodesMatched.clear();
// Check to see what the offset is at the new MatcherIndex. If it is zero
// we have reached the end of this scope, otherwise we have another child
@@ -2517,21 +2724,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
LastScope.FailIndex = MatcherIndex+NumToSkip;
break;
}
-
+
// End of this scope, pop it and try the next child in the containing
// scope.
MatchScopes.pop_back();
}
}
}
-
+
void SelectionDAGISel::CannotYetSelect(SDNode *N) {
std::string msg;
raw_string_ostream Msg(msg);
- Msg << "Cannot yet select: ";
-
+ Msg << "Cannot select: ";
+
if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
N->getOpcode() != ISD::INTRINSIC_VOID) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 8313de5..76eb945 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -93,7 +93,7 @@ namespace llvm {
static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
SDValue Op = EI.getNode()->getOperand(EI.getOperand());
EVT VT = Op.getValueType();
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return "color=red,style=bold";
else if (VT == MVT::Other)
return "color=blue,style=dashed";
@@ -273,14 +273,14 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
raw_string_ostream O(s);
O << "SU(" << SU->NodeNum << "): ";
if (SU->getNode()) {
- SmallVector<SDNode *, 4> FlaggedNodes;
- for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
- FlaggedNodes.push_back(N);
- while (!FlaggedNodes.empty()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
O << DOTGraphTraits<SelectionDAG*>
- ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
- FlaggedNodes.pop_back();
- if (!FlaggedNodes.empty())
+ ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+ GluedNodes.pop_back();
+ if (!GluedNodes.empty())
O << "\n ";
}
} else {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b74f600..691390e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include <cctype>
using namespace llvm;
namespace llvm {
@@ -530,7 +531,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
}
-
+
// These operations default to expand.
setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
@@ -538,8 +539,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
// Most targets ignore the @llvm.prefetch intrinsic.
setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
-
- // ConstantFP nodes default to expand. Targets can either change this to
+
+ // ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
@@ -560,18 +561,21 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
-
+
IsLittleEndian = TD->isLittleEndian();
ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+ maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
+ = maxStoresPerMemmoveOptSize = 4;
benefitFromCodePlacementOpt = false;
UseUnderscoreSetJmp = false;
UseUnderscoreLongJmp = false;
SelectIsExpensive = false;
IntDivIsCheap = false;
Pow2DivIsCheap = false;
+ JumpIsExpensive = false;
StackPointerRegisterToSaveRestore = 0;
ExceptionPointerRegister = 0;
ExceptionSelectorRegister = 0;
@@ -617,16 +621,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
// Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType();
-
+
unsigned NumVectorRegs = 1;
-
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(NumElts)) {
NumVectorRegs = NumElts;
NumElts = 1;
}
-
+
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
@@ -635,7 +639,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
}
NumIntermediates = NumVectorRegs;
-
+
MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
if (!TLI->isTypeLegal(NewVT))
NewVT = EltTy;
@@ -645,7 +649,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
RegisterVT = DestVT;
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
-
+
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
@@ -750,7 +754,7 @@ void TargetLowering::computeRegisterProperties() {
RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
TransformToType[MVT::ppcf128] = MVT::f64;
ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
- }
+ }
// Decide how to handle f64. If the target does not have native f64 support,
// expand it to i64 and we will be generating soft float library calls.
@@ -776,13 +780,13 @@ void TargetLowering::computeRegisterProperties() {
ValueTypeActions.setTypeAction(MVT::f32, Expand);
}
}
-
+
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType)i;
if (isTypeLegal(VT)) continue;
-
+
// Determine if there is a legal wider type. If so, we should promote to
// that wider vector type.
EVT EltVT = VT.getVectorElementType();
@@ -792,8 +796,8 @@ void TargetLowering::computeRegisterProperties() {
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
EVT SVT = (MVT::SimpleValueType)nVT;
if (SVT.getVectorElementType() == EltVT &&
- SVT.getVectorNumElements() > NElts &&
- isTypeSynthesizable(SVT)) {
+ SVT.getVectorNumElements() > NElts &&
+ isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -804,7 +808,7 @@ void TargetLowering::computeRegisterProperties() {
}
if (IsLegalWiderType) continue;
}
-
+
MVT IntermediateVT;
EVT RegisterVT;
unsigned NumIntermediates;
@@ -812,7 +816,7 @@ void TargetLowering::computeRegisterProperties() {
getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
RegisterVT, this);
RegisterTypeForVT[i] = RegisterVT;
-
+
EVT NVT = VT.getPow2VectorType();
if (NVT == VT) {
// Type is already a power of 2. The default action is to split.
@@ -865,7 +869,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
unsigned &NumIntermediates,
EVT &RegisterVT) const {
unsigned NumElts = VT.getVectorNumElements();
-
+
// If there is a wider vector type with the same element type as this one,
// we should widen to that legal vector type. This handles things like
// <2 x float> -> <4 x float>.
@@ -877,19 +881,19 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
return 1;
}
}
-
+
// Figure out the right, legal destination reg to copy into.
EVT EltTy = VT.getVectorElementType();
-
+
unsigned NumVectorRegs = 1;
-
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(NumElts)) {
NumVectorRegs = NumElts;
NumElts = 1;
}
-
+
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
while (NumElts > 1 && !isTypeLegal(
@@ -899,7 +903,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
}
NumIntermediates = NumVectorRegs;
-
+
EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
if (!isTypeLegal(NewVT))
NewVT = EltTy;
@@ -909,13 +913,13 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
RegisterVT = DestVT;
if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
-
+
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
}
-/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// Get the EVTs and ArgFlags collections that represent the legalized return
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
@@ -988,11 +992,11 @@ unsigned TargetLowering::getJumpTableEncoding() const {
// In non-pic modes, just use the address of a block.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
return MachineJumpTableInfo::EK_BlockAddress;
-
+
// In PIC mode, if the target supports a GPRel32 directive, use it.
if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
return MachineJumpTableInfo::EK_GPRel32BlockAddress;
-
+
// Otherwise, use a label difference.
return MachineJumpTableInfo::EK_LabelDifference32;
}
@@ -1036,11 +1040,11 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// Optimization Methods
//===----------------------------------------------------------------------===//
-/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
/// specified instruction is a constant integer. If so, check to see if there
/// are any bits set in the constant that are not demanded. If so, shrink the
/// constant and return true.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
const APInt &Demanded) {
DebugLoc dl = Op.getDebugLoc();
@@ -1062,7 +1066,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
EVT VT = Op.getValueType();
SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
DAG.getConstant(Demanded &
- C->getAPIntValue(),
+ C->getAPIntValue(),
VT));
return CombineTo(Op, New);
}
@@ -1139,9 +1143,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero = KnownOne = APInt(BitWidth, 0);
// Other users may use these bits.
- if (!Op.getNode()->hasOneUse()) {
+ if (!Op.getNode()->hasOneUse()) {
if (Depth != 0) {
- // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // If not at the root, Just compute the KnownZero/KnownOne bits to
// simplify things downstream.
TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
return false;
@@ -1149,7 +1153,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If this is the root being simplified, allow it to have multiple uses,
// just set the NewMask to all bits.
NewMask = APInt::getAllOnesValue(BitWidth);
- } else if (DemandedMask == 0) {
+ } else if (DemandedMask == 0) {
// Not demanding any bits from Op.
if (Op.getOpcode() != ISD::UNDEF)
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
@@ -1172,8 +1176,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// the RHS.
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
APInt LHSZero, LHSOne;
+ // Do not increment Depth here; that can cause an infinite loop.
TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
- LHSZero, LHSOne, Depth+1);
+ LHSZero, LHSOne, Depth);
// If the LHS already has zeros where RHSC does, this and is dead.
if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
return TLO.CombineTo(Op, Op.getOperand(0));
@@ -1182,16 +1187,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
return true;
}
-
+
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
KnownZero2, KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
@@ -1214,15 +1219,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero |= KnownZero2;
break;
case ISD::OR:
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
KnownZero2, KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
@@ -1248,15 +1253,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne |= KnownOne2;
break;
case ISD::XOR:
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if ((KnownZero & NewMask) == NewMask)
@@ -1274,12 +1279,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
Op.getOperand(0),
Op.getOperand(1)));
-
+
// Output known-0 bits are known if clear or set in both the LHS & RHS.
KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
-
+
// If all of the demanded bits on one side are known, and all of the set
// bits on that side are also known to be set on the other side, turn this
// into an AND, as we know the bits will be cleared.
@@ -1288,11 +1293,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if ((KnownOne & KnownOne2) == KnownOne) {
EVT VT = Op.getValueType();
SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
Op.getOperand(0), ANDC));
}
}
-
+
// If the RHS is a constant, see if we can simplify it.
// for XOR, we prefer to force bits to 1 if they will make a -1.
// if we can't force bits, try to shrink constant
@@ -1317,37 +1322,37 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne = KnownOneOut;
break;
case ISD::SELECT:
- if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If the operands are constants, see if we can simplify them.
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
-
+
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
break;
case ISD::SELECT_CC:
- if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If the operands are constants, see if we can simplify them.
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
-
+
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
@@ -1373,16 +1378,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SRL;
- }
-
- SDValue NewSA =
+ }
+
+ SDValue NewSA =
TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
EVT VT = Op.getValueType();
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
InOp.getOperand(0), NewSA));
}
- }
-
+ }
+
if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
KnownZero, KnownOne, TLO, Depth+1))
return true;
@@ -1421,7 +1426,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
unsigned ShAmt = SA->getZExtValue();
unsigned VTSize = VT.getSizeInBits();
SDValue InOp = Op.getOperand(0);
-
+
// If the shift count is an invalid immediate, don't do anything.
if (ShAmt >= BitWidth)
break;
@@ -1438,20 +1443,20 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SHL;
- }
-
+ }
+
SDValue NewSA =
TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
InOp.getOperand(0), NewSA));
}
- }
-
+ }
+
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
@@ -1472,7 +1477,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
EVT VT = Op.getValueType();
unsigned ShAmt = SA->getZExtValue();
-
+
// If the shift count is an invalid immediate, don't do anything.
if (ShAmt >= BitWidth)
break;
@@ -1484,21 +1489,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
if (HighBits.intersects(NewMask))
InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
-
+
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
-
+
// Handle the sign bit, adjusted to where it is now in the mask.
APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
-
+
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
Op.getOperand(0),
Op.getOperand(1)));
} else if (KnownOne.intersects(SignBit)) { // New bits are known one.
@@ -1509,23 +1514,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::SIGN_EXTEND_INREG: {
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- // Sign extension. Compute the demanded bits in the result that are not
+ // Sign extension. Compute the demanded bits in the result that are not
// present in the input.
APInt NewBits =
APInt::getHighBitsSet(BitWidth,
BitWidth - EVT.getScalarType().getSizeInBits());
-
+
// If none of the extended bits are demanded, eliminate the sextinreg.
if ((NewBits & NewMask) == 0)
return TLO.CombineTo(Op, Op.getOperand(0));
- APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
- InSignBit.zext(BitWidth);
+ APInt InSignBit =
+ APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
APInt InputDemandedBits =
APInt::getLowBitsSet(BitWidth,
EVT.getScalarType().getSizeInBits()) &
NewMask;
-
+
// Since the sign extended bits are demanded, we know that the sign
// bit is demanded.
InputDemandedBits |= InSignBit;
@@ -1533,16 +1538,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
-
+
// If the input sign bit is known zero, convert this into a zero extension.
if (KnownZero.intersects(InSignBit))
- return TLO.CombineTo(Op,
+ return TLO.CombineTo(Op,
TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
-
+
if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
KnownOne |= NewBits;
KnownZero &= ~NewBits;
@@ -1555,23 +1560,22 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::ZERO_EXTEND: {
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
- APInt InMask = NewMask;
- InMask.trunc(OperandBitWidth);
-
+ APInt InMask = NewMask.trunc(OperandBitWidth);
+
// If none of the top bits are demanded, convert this into an any_extend.
APInt NewBits =
APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
if (!NewBits.intersects(NewMask))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
- Op.getValueType(),
+ Op.getValueType(),
Op.getOperand(0)));
-
+
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
break;
}
@@ -1581,31 +1585,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
APInt NewBits = ~InMask & NewMask;
-
+
// If none of the top bits are demanded, convert this into an any_extend.
if (NewBits == 0)
return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
Op.getValueType(),
Op.getOperand(0)));
-
+
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
APInt InDemandedBits = InMask & NewMask;
InDemandedBits |= InSignBit;
- InDemandedBits.trunc(InBits);
-
- if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ InDemandedBits = InDemandedBits.trunc(InBits);
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
-
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
// If the sign bit is known zero, convert this to a zero extend.
if (KnownZero.intersects(InSignBit))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
- Op.getValueType(),
+ Op.getValueType(),
Op.getOperand(0)));
-
+
// If the sign bit is known one, the top bits match.
if (KnownOne.intersects(InSignBit)) {
KnownOne |= NewBits;
@@ -1619,14 +1623,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::ANY_EXTEND: {
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
- APInt InMask = NewMask;
- InMask.trunc(OperandBitWidth);
+ APInt InMask = NewMask.trunc(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
@@ -1634,14 +1637,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// zero/one bits live out.
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
- APInt TruncMask = NewMask;
- TruncMask.zext(OperandBitWidth);
+ APInt TruncMask = NewMask.zext(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- KnownZero.trunc(BitWidth);
- KnownOne.trunc(BitWidth);
-
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
if (Op.getOperand(0).getNode()->hasOneUse()) {
@@ -1661,25 +1663,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
OperandBitWidth - BitWidth);
- HighBits = HighBits.lshr(ShAmt->getZExtValue());
- HighBits.trunc(BitWidth);
+ HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
- Op.getValueType(),
+ Op.getValueType(),
In.getOperand(0));
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
Op.getValueType(),
- NewTrunc,
+ NewTrunc,
In.getOperand(1)));
}
break;
}
}
-
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
break;
}
case ISD::AssertZext: {
@@ -1689,7 +1690,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth,
@@ -1697,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero |= ~InMask & NewMask;
break;
}
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
#if 0
// If this is an FP->Int bitcast and if the sign bit is the only thing that
// is demanded, turn this into a FGETSIGN.
@@ -1709,7 +1710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
- SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
Op.getOperand(0));
unsigned ShVal = Op.getValueType().getSizeInBits()-1;
SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
@@ -1742,21 +1743,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
break;
}
-
+
// If we know the value of all of the demanded bits, return this as a
// constant.
if ((NewMask & (KnownZero|KnownOne)) == NewMask)
return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
-
+
return false;
}
-/// computeMaskedBitsForTargetNode - Determine which of the bits specified
-/// in Mask are known to be either zero or one and return them in the
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
-void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
const APInt &Mask,
- APInt &KnownZero,
+ APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
@@ -1817,7 +1818,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
(KnownOne.countPopulation() == 1);
}
-/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
/// and cc. If it is unable to simplify it, return a null SDValue.
SDValue
TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -1869,6 +1870,30 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ SDValue CTPOP = N0;
+ // Look through truncs that don't change the value of a ctpop.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+ CTPOP = N0.getOperand(0);
+
+ if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+ (N0 == CTPOP || N0.getValueType().getSizeInBits() >
+ Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+ EVT CTVT = CTPOP.getValueType();
+ SDValue CTOp = CTPOP.getOperand(0);
+
+ // (ctpop x) u< 2 -> (x & x-1) == 0
+ // (ctpop x) u> 1 -> (x & x-1) != 0
+ if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
+ DAG.getConstant(1, CTVT));
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+ ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+ return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+ }
+
+ // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+ }
+
// If the LHS is '(and load, const)', the RHS is 0,
// the test is for equality or unsigned, and all 1 bits of the const are
// in the same partial word, see if we can shorten the load.
@@ -1884,7 +1909,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (!Lod->isVolatile() && Lod->isUnindexed()) {
unsigned origWidth = N0.getValueType().getSizeInBits();
unsigned maskWidth = origWidth;
- // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
// 8 bits, but have to be careful...
if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
origWidth = Lod->getMemoryVT().getSizeInBits();
@@ -1916,10 +1941,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
DAG.getConstant(bestOffset, PtrType));
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
- Lod->getSrcValue(),
- Lod->getSrcValueOffset() + bestOffset,
+ Lod->getPointerInfo().getWithOffset(bestOffset),
false, false, NewAlign);
- return DAG.getSetCC(dl, VT,
+ return DAG.getSetCC(dl, VT,
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
DAG.getConstant(bestMask.trunc(bestWidth),
newVT)),
@@ -1969,7 +1993,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(isOperationLegal(ISD::SETCC, newVT) &&
getCondCodeAction(Cond, newVT)==Legal))
return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(APInt(C1).trunc(InSize), newVT),
+ DAG.getConstant(C1.trunc(InSize), newVT),
Cond);
break;
}
@@ -1987,7 +2011,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// the sign extension, it is impossible for both sides to be equal.
if (C1.getMinSignedBits() > ExtSrcTyBits)
return DAG.getConstant(Cond == ISD::SETNE, VT);
-
+
SDValue ZextOp;
EVT Op0Ty = N0.getOperand(0).getValueType();
if (Op0Ty == ExtSrcTy) {
@@ -2000,10 +2024,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(ZextOp.getNode());
// Otherwise, make this a use of a zext.
- return DAG.getSetCC(dl, VT, ZextOp,
+ return DAG.getSetCC(dl, VT, ZextOp,
DAG.getConstant(C1 & APInt::getLowBitsSet(
ExtDstTyBits,
- ExtSrcTyBits),
+ ExtSrcTyBits),
ExtDstTy),
Cond);
} else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
@@ -2013,16 +2037,16 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
if (TrueWhenTrue)
- return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- CC = ISD::getSetCCInverse(CC,
+ CC = ISD::getSetCCInverse(CC,
N0.getOperand(0).getValueType().isInteger());
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
}
if ((N0.getOpcode() == ISD::XOR ||
- (N0.getOpcode() == ISD::AND &&
+ (N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR &&
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
@@ -2038,7 +2062,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0.getOpcode() == ISD::XOR)
Val = N0.getOperand(0);
else {
- assert(N0.getOpcode() == ISD::AND &&
+ assert(N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR);
// ((X^1)&1)^1 -> X & 1
Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
@@ -2082,7 +2106,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
}
-
+
APInt MinVal, MaxVal;
unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
if (ISD::isSignedIntSetCC(Cond)) {
@@ -2097,7 +2121,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
// X >= C0 --> X > (C0-1)
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(C1-1, N1.getValueType()),
(Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
}
@@ -2105,7 +2129,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
// X <= C0 --> X < (C0+1)
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(C1+1, N1.getValueType()),
(Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
}
@@ -2128,12 +2152,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If we have setult X, 1, turn it into seteq X, 0
if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(MinVal, N0.getValueType()),
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, N0.getValueType()),
ISD::SETEQ);
// If we have setugt X, Max-1, turn it into seteq X, Max
else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(MaxVal, N0.getValueType()),
ISD::SETEQ);
@@ -2141,9 +2165,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// by changing cc.
// SETUGT X, SINTMAX -> SETLT X, 0
- if (Cond == ISD::SETUGT &&
+ if (Cond == ISD::SETUGT &&
C1 == APInt::getSignedMaxValue(OperandBitSize))
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(0, N1.getValueType()),
ISD::SETLT);
@@ -2203,7 +2227,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getUNDEF(VT);
}
}
-
+
// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
// constant if knowing that the operand is non-nan is enough. We prefer to
// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
@@ -2278,14 +2302,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (DAG.isCommutativeBinOp(N0.getOpcode())) {
// If X op Y == Y op X, try other combinations.
if (N0.getOperand(0) == N1.getOperand(1))
- return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
Cond);
if (N0.getOperand(1) == N1.getOperand(0))
- return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
Cond);
}
}
-
+
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
@@ -2295,7 +2319,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
LHSR->getAPIntValue(),
N0.getValueType()), Cond);
}
-
+
// Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
if (N0.getOpcode() == ISD::XOR)
// If we know that all of the inverted bits are zero, don't bother
@@ -2308,7 +2332,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getValueType()),
Cond);
}
-
+
// Turn (C1-X) == C2 --> X == C1-C2
if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
@@ -2319,7 +2343,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getValueType()),
Cond);
}
- }
+ }
}
// Simplify (X+Z) == X --> Z == 0
@@ -2334,7 +2358,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
// (Z-X) == X --> Z == X<<1
SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
- N1,
+ N1,
DAG.getConstant(1, getShiftAmountTy()));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
@@ -2356,7 +2380,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
} else if (N1.getNode()->hasOneUse()) {
assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
// X == (Z-X) --> X<<1 == Z
- SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
DAG.getConstant(1, getShiftAmountTy()));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
@@ -2443,7 +2467,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
/// node is a GlobalAddress + offset.
-bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
int64_t &Offset) const {
if (isa<GlobalAddressSDNode>(N)) {
GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
@@ -2469,6 +2493,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
}
}
}
+
return false;
}
@@ -2497,7 +2522,10 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
return C_Memory;
case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
+ case 'E': // Floating Point Constant
+ case 'F': // Floating Point Constant
case 's': // Relocatable Constant
+ case 'p': // Address.
case 'X': // Allow ANY value.
case 'I': // Target registers.
case 'J':
@@ -2507,11 +2535,13 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
case 'N':
case 'O':
case 'P':
+ case '<':
+ case '>':
return C_Other;
}
}
-
- if (Constraint.size() > 1 && Constraint[0] == '{' &&
+
+ if (Constraint.size() > 1 && Constraint[0] == '{' &&
Constraint[Constraint.size()-1] == '}')
return C_Register;
return C_Unknown;
@@ -2550,7 +2580,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// is possible and fine if either GV or C are missing.
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
-
+
// If we have "(add GV, C)", pull out GV/C
if (Op.getOpcode() == ISD::ADD) {
C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
@@ -2562,14 +2592,14 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
if (C == 0 || GA == 0)
C = 0, GA = 0;
}
-
+
// If we find a valid operand, map to the TargetXXX version so that the
// value itself doesn't get selected.
if (GA) { // Either &GV or &GV+C
if (ConstraintLetter != 'n') {
int64_t Offs = GA->getOffset();
if (C) Offs += C->getZExtValue();
- Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
C ? C->getDebugLoc() : DebugLoc(),
Op.getValueType(), Offs));
return;
@@ -2613,8 +2643,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
E = RI->regclass_end(); RCI != E; ++RCI) {
const TargetRegisterClass *RC = *RCI;
-
- // If none of the value types for this register class are valid, we
+
+ // If none of the value types for this register class are valid, we
// can't use it. For example, 64-bit reg classes on 32-bit targets.
bool isLegal = false;
for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
@@ -2624,16 +2654,16 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
break;
}
}
-
+
if (!isLegal) continue;
-
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
if (RegName.equals_lower(RI->getName(*I)))
return std::make_pair(*I, RC);
}
}
-
+
return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
}
@@ -2655,6 +2685,186 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
}
+/// ParseConstraints - Split up the constraint string from the inline
+/// assembly value into the specific constraints and their prefixes,
+/// and also tie in the associated operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
+ ImmutableCallSite CS) const {
+ /// ConstraintOperands - Information about all of the constraints.
+ AsmOperandInfoVector ConstraintOperands;
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ InlineAsm::ConstraintInfoVector
+ ConstraintInfos = IA->ParseConstraints();
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+ AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Update multiple alternative constraint count.
+ if (OpInfo.multipleAlternatives.size() > maCount)
+ maCount = OpInfo.multipleAlternatives.size();
+
+ OpInfo.ConstraintVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() &&
+ "Bad inline asm!");
+ if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT = getValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ if (OpInfo.CallOperandVal) {
+ const llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+ if (OpInfo.isIndirect) {
+ const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpInfo.ConstraintVT =
+ EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ break;
+ }
+ } else if (dyn_cast<PointerType>(OpTy)) {
+ OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize());
+ } else {
+ OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
+ }
+ }
+ }
+
+ // If we have multiple alternative constraints, select the best alternative.
+ if (ConstraintInfos.size()) {
+ if (maCount) {
+ unsigned bestMAIndex = 0;
+ int bestWeight = -1;
+ // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
+ int weight = -1;
+ unsigned maIndex;
+ // Compute the sums of the weights for each alternative, keeping track
+ // of the best (highest weight) one so far.
+ for (maIndex = 0; maIndex < maCount; ++maIndex) {
+ int weightSum = 0;
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+ if (OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is an output operand with a matching input operand,
+ // look up the matching input. If their types mismatch, e.g. one
+ // is an integer, the other is floating point, or their sizes are
+ // different, flag it as an maCantMatch.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ weightSum = -1; // Can't match.
+ break;
+ }
+ }
+ }
+ weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+ if (weight == -1) {
+ weightSum = -1;
+ break;
+ }
+ weightSum += weight;
+ }
+ // Update best.
+ if (weightSum > bestWeight) {
+ bestWeight = weightSum;
+ bestMAIndex = maIndex;
+ }
+ }
+
+ // Now select chosen alternative in each constraint.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+ if (cInfo.Type == InlineAsm::isClobber)
+ continue;
+ cInfo.selectAlternative(bestMAIndex);
+ }
+ }
+ }
+
+ // Check and hook up tied operands, choose constraint code to use.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ }
+
+ }
+ }
+
+ return ConstraintOperands;
+}
+
+
/// getConstraintGenerality - Return an integer indicating how general CT
/// is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
@@ -2672,6 +2882,79 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
}
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getMultipleConstraintMatchWeight(
+ AsmOperandInfo &info, int maIndex) const {
+ InlineAsm::ConstraintCodeVector *rCodes;
+ if (maIndex >= (int)info.multipleAlternatives.size())
+ rCodes = &info.Codes;
+ else
+ rCodes = &info.multipleAlternatives[maIndex].Codes;
+ ConstraintWeight BestWeight = CW_Invalid;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+ ConstraintWeight weight =
+ getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+ if (weight > BestWeight)
+ BestWeight = weight;
+ }
+
+ return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ case 'i': // immediate integer.
+ case 'n': // immediate integer with a known value.
+ if (isa<ConstantInt>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 's': // non-explicit intregal immediate.
+ if (isa<GlobalValue>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 'E': // immediate float if host format.
+ case 'F': // immediate float.
+ if (isa<ConstantFP>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case '<': // memory operand with autodecrement.
+ case '>': // memory operand with autoincrement.
+ case 'm': // memory operand.
+ case 'o': // offsettable memory operand
+ case 'V': // non-offsettable memory operand
+ weight = CW_Memory;
+ break;
+ case 'r': // general register.
+ case 'g': // general register, memory operand or immediate integer.
+ // note: Clang converts "g" to "imr".
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'X': // any operand.
+ default:
+ weight = CW_Default;
+ break;
+ }
+ return weight;
+}
+
/// ChooseConstraint - If there are multiple different constraints that we
/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
/// This is somewhat tricky: constraints fall into four classes:
@@ -2721,12 +3004,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
break;
}
}
-
+
// Things with matching constraints can only be registers, per gcc
// documentation. This mainly affects "g" constraints.
if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
continue;
-
+
// This constraint letter is more general than the previous one, use it.
int Generality = getConstraintGenerality(CType);
if (Generality > BestGenerality) {
@@ -2735,7 +3018,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
BestGenerality = Generality;
}
}
-
+
OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
OpInfo.ConstraintType = BestType;
}
@@ -2744,10 +3027,10 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
/// type to use for the specific AsmOperandInfo, setting
/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
- SDValue Op,
+ SDValue Op,
SelectionDAG *DAG) const {
assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
-
+
// Single-letter constraints ('r') are very common.
if (OpInfo.Codes.size() == 1) {
OpInfo.ConstraintCode = OpInfo.Codes[0];
@@ -2755,7 +3038,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
} else {
ChooseConstraint(OpInfo, *this, Op, DAG);
}
-
+
// 'X' matches anything.
if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
// Labels and constants are handled elsewhere ('X' is the only thing
@@ -2766,7 +3049,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
OpInfo.CallOperandVal = v;
return;
}
-
+
// Otherwise, try to resolve it to something we know about by looking at
// the actual operand type.
if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
@@ -2782,7 +3065,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
-bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const {
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.
@@ -2790,12 +3073,12 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
// Allows a sign-extended 16-bit immediate field.
if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
return false;
-
+
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
-
- // Only support r+r,
+
+ // Only support r+r,
switch (AM.Scale) {
case 0: // "r+i" or just "i", depending on HasBaseReg.
break;
@@ -2810,7 +3093,7 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
// Allow 2*r as r+r.
break;
}
-
+
return true;
}
@@ -2818,19 +3101,19 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
std::vector<SDNode*>* Created) const {
EVT VT = N->getValueType(0);
DebugLoc dl= N->getDebugLoc();
-
+
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
if (!isTypeLegal(VT))
return SDValue();
-
+
APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
APInt::ms magics = d.magic();
-
+
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
SDValue Q;
@@ -2844,7 +3127,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
else
return SDValue(); // No mulhs or equvialent
// If d > 0 and m < 0, add the numerator
- if (d.isStrictlyPositive() && magics.m.isNegative()) {
+ if (d.isStrictlyPositive() && magics.m.isNegative()) {
Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
if (Created)
Created->push_back(Q.getNode());
@@ -2857,7 +3140,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
}
// Shift right algebraic if shift value is nonzero
if (magics.s > 0) {
- Q = DAG.getNode(ISD::SRA, dl, VT, Q,
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q,
DAG.getConstant(magics.s, getShiftAmountTy()));
if (Created)
Created->push_back(Q.getNode());
@@ -2908,20 +3191,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
if (magics.a == 0) {
assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
"We shouldn't generate an undefined shift!");
- return DAG.getNode(ISD::SRL, dl, VT, Q,
+ return DAG.getNode(ISD::SRL, dl, VT, Q,
DAG.getConstant(magics.s, getShiftAmountTy()));
} else {
SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
if (Created)
Created->push_back(NPQ.getNode());
- NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
DAG.getConstant(1, getShiftAmountTy()));
if (Created)
Created->push_back(NPQ.getNode());
NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
if (Created)
Created->push_back(NPQ.getNode());
- return DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ return DAG.getNode(ISD::SRL, dl, VT, NPQ,
DAG.getConstant(magics.s-1, getShiftAmountTy()));
}
}
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
index aeaa38b..7b5bca4 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
@@ -226,7 +226,7 @@ bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
// AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
CSRegSet prevAnticIn = AnticIn[MBB];
AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
- if (prevAnticIn |= AnticIn[MBB])
+ if (prevAnticIn != AnticIn[MBB])
changed = true;
return changed;
}
@@ -264,7 +264,7 @@ bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
// AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
CSRegSet prevAvailOut = AvailOut[MBB];
AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
- if (prevAvailOut |= AvailOut[MBB])
+ if (prevAvailOut != AvailOut[MBB])
changed = true;
return changed;
}
diff --git a/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp b/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp
index b29ea19..2843c1a 100644
--- a/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "regcoalescing"
#include "SimpleRegisterCoalescing.h"
#include "VirtRegMap.h"
+#include "LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/Value.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -64,9 +65,25 @@ DisablePhysicalJoin("disable-physical-join",
cl::desc("Avoid coalescing physical register copies"),
cl::init(false), cl::Hidden);
-INITIALIZE_AG_PASS(SimpleRegisterCoalescing, RegisterCoalescer,
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+ cl::desc("Verify machine instrs before and after register coalescing"),
+ cl::Hidden);
+
+INITIALIZE_AG_PASS_BEGIN(SimpleRegisterCoalescing, RegisterCoalescer,
"simple-register-coalescing", "Simple Register Coalescing",
- false, false, true);
+ false, false, true)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_PASS_END(SimpleRegisterCoalescing, RegisterCoalescer,
+ "simple-register-coalescing", "Simple Register Coalescing",
+ false, false, true)
char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID;
@@ -75,14 +92,14 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addPreservedID(MachineDominatorsID);
- if (StrongPHIElim)
- AU.addPreservedID(StrongPHIEliminationID);
- else
- AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(StrongPHIEliminationID);
+ AU.addPreservedID(PHIEliminationID);
AU.addPreservedID(TwoAddressInstructionPassID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -124,7 +141,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
// Get the location that B is defined at. Two options: either this value has
// an unknown definition point or it is defined at CopyIdx. If unknown, we
// can't process it.
- if (!BValNo->getCopy()) return false;
+ if (!BValNo->isDefByCopy()) return false;
assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
// AValNo is the value number in A that defines the copy, A3 in the example.
@@ -218,7 +235,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
continue;
LiveInterval &SRLI = li_->getInterval(*SR);
SRLI.addRange(LiveRange(FillerStart, FillerEnd,
- SRLI.getNextValue(FillerStart, 0, true,
+ SRLI.getNextValue(FillerStart, 0,
li_->getVNInfoAllocator())));
}
}
@@ -266,9 +283,6 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
if (BI->valno == BValNo)
continue;
- // When BValNo is null, we're looking for a dummy clobber-value for a subreg.
- if (!BValNo && !BI->valno->isDefAccurate() && !BI->valno->getCopy())
- continue;
if (BI->start <= AI->start && BI->end > AI->start)
return true;
if (BI->start > AI->start && BI->start < AI->end)
@@ -278,16 +292,6 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
return false;
}
-static void
-TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) {
- for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
- i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isImplicit())
- NewMI->addOperand(MO);
- }
-}
-
/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
/// IntA being the source and IntB being the dest, thus this defines a value
/// number in IntB. If the source value number (in IntA) is defined by a
@@ -324,8 +328,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
if (!li_->hasInterval(CP.getDstReg()))
return false;
- SlotIndex CopyIdx =
- li_->getInstructionIndex(CopyMI).getDefIndex();
+ SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
LiveInterval &IntA =
li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
@@ -334,27 +337,19 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// BValNo is a value number in B that is defined by a copy from A. 'B3' in
// the example above.
- LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
- if (BLR == IntB.end()) return false;
- VNInfo *BValNo = BLR->valno;
+ VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+ if (!BValNo || !BValNo->isDefByCopy())
+ return false;
- // Get the location that B is defined at. Two options: either this value has
- // an unknown definition point or it is defined at CopyIdx. If unknown, we
- // can't process it.
- if (!BValNo->getCopy()) return false;
assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
// AValNo is the value number in A that defines the copy, A3 in the example.
- LiveInterval::iterator ALR =
- IntA.FindLiveRangeContaining(CopyIdx.getUseIndex()); //
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+ assert(AValNo && "COPY source not live");
- assert(ALR != IntA.end() && "Live range not found!");
- VNInfo *AValNo = ALR->valno;
// If other defs can reach uses of this def, then it's not safe to perform
- // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
- // tested?
- if (AValNo->isPHIDef() || !AValNo->isDefAccurate() ||
- AValNo->isUnused() || AValNo->hasPHIKill())
+ // the optimization.
+ if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
return false;
MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
if (!DefMI)
@@ -411,7 +406,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
return false;
}
- DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << *DefMI);
+ DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
+ << *DefMI);
// At this point we have decided that it is legal to do this
// transformation. Start by commuting the instruction.
@@ -427,10 +423,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
NewMI->getOperand(OpIdx).setIsKill();
- bool BHasPHIKill = BValNo->hasPHIKill();
- SmallVector<VNInfo*, 4> BDeadValNos;
- std::map<SlotIndex, SlotIndex> BExtend;
-
// If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
// A = or A, B
// ...
@@ -439,9 +431,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// C = A<kill>
// ...
// = B
- bool Extended = BLR->end > ALR->end && ALR->end != ALR->start;
- if (Extended)
- BExtend[ALR->end] = BLR->end;
// Update uses of IntA of the specific Val# with IntB.
for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
@@ -467,52 +456,24 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
UseMO.setReg(NewReg);
if (UseMI == CopyMI)
continue;
- if (UseMO.isKill()) {
- if (Extended)
- UseMO.setIsKill(false);
- }
if (!UseMI->isCopy())
continue;
if (UseMI->getOperand(0).getReg() != IntB.reg ||
UseMI->getOperand(0).getSubReg())
continue;
- // This copy will become a noop. If it's defining a new val#,
- // remove that val# as well. However this live range is being
- // extended to the end of the existing live range defined by the copy.
+ // This copy will become a noop. If it's defining a new val#, merge it into
+ // BValNo.
SlotIndex DefIdx = UseIdx.getDefIndex();
- const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
- if (!DLR)
+ VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+ if (!DVNI)
continue;
- BHasPHIKill |= DLR->valno->hasPHIKill();
- assert(DLR->valno->def == DefIdx);
- BDeadValNos.push_back(DLR->valno);
- BExtend[DLR->start] = DLR->end;
+ DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+ assert(DVNI->def == DefIdx);
+ BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
JoinedCopies.insert(UseMI);
}
- // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
- // simply extend BLR if CopyMI doesn't end the range.
- DEBUG({
- dbgs() << "Extending: ";
- IntB.print(dbgs(), tri_);
- });
-
- // Remove val#'s defined by copies that will be coalesced away.
- for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) {
- VNInfo *DeadVNI = BDeadValNos[i];
- if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
- for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) {
- if (!li_->hasInterval(*AS))
- continue;
- LiveInterval &ASLI = li_->getInterval(*AS);
- if (const LiveRange *ASLR = ASLI.getLiveRangeContaining(DeadVNI->def))
- ASLI.removeValNo(ASLR->valno);
- }
- }
- IntB.removeValNo(BDeadValNos[i]);
- }
-
// Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
// is updated.
VNInfo *ValNo = BValNo;
@@ -521,30 +482,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
- SlotIndex End = AI->end;
- std::map<SlotIndex, SlotIndex>::iterator
- EI = BExtend.find(End);
- if (EI != BExtend.end())
- End = EI->second;
- IntB.addRange(LiveRange(AI->start, End, ValNo));
+ IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
}
- ValNo->setHasPHIKill(BHasPHIKill);
-
- DEBUG({
- dbgs() << " result = ";
- IntB.print(dbgs(), tri_);
- dbgs() << "\nShortening: ";
- IntA.print(dbgs(), tri_);
- });
+ DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
IntA.removeValNo(AValNo);
-
- DEBUG({
- dbgs() << " result = ";
- IntA.print(dbgs(), tri_);
- dbgs() << '\n';
- });
-
+ DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
++numCommutes;
return true;
}
@@ -644,6 +587,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
/// computation, replace the copy by rematerialize the definition.
bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+ bool preserveSrcInt,
unsigned DstReg,
unsigned DstSubIdx,
MachineInstr *CopyMI) {
@@ -652,12 +596,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
assert(SrcLR != SrcInt.end() && "Live range not found!");
VNInfo *ValNo = SrcLR->valno;
// If other defs can reach uses of this def, then it's not safe to perform
- // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
- // tested?
- if (ValNo->isPHIDef() || !ValNo->isDefAccurate() ||
- ValNo->isUnused() || ValNo->hasPHIKill())
+ // the optimization.
+ if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill())
return false;
MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+ if (!DefMI)
+ return false;
assert(DefMI && "Defining instruction disappeared");
const TargetInstrDesc &TID = DefMI->getDesc();
if (!TID.isAsCheapAsAMove())
@@ -681,8 +625,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
return false;
}
- // If destination register has a sub-register index on it, make sure it mtches
- // the instruction register class.
+ // If destination register has a sub-register index on it, make sure it
+ // matches the instruction register class.
if (DstSubIdx) {
const TargetInstrDesc &TID = DefMI->getDesc();
if (TID.getNumDefs() != 1)
@@ -699,30 +643,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
RemoveCopyFlag(DstReg, CopyMI);
- // If copy kills the source register, find the last use and propagate
- // kill.
- bool checkForDeadDef = false;
MachineBasicBlock *MBB = CopyMI->getParent();
- if (SrcLR->end == CopyIdx.getDefIndex())
- if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) {
- checkForDeadDef = true;
- }
-
MachineBasicBlock::iterator MII =
llvm::next(MachineBasicBlock::iterator(CopyMI));
tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
MachineInstr *NewMI = prior(MII);
- if (checkForDeadDef) {
- // PR4090 fix: Trim interval failed because there was no use of the
- // source interval in this MBB. If the def is in this MBB too then we
- // should mark it dead:
- if (DefMI->getParent() == MBB) {
- DefMI->addRegisterDead(SrcInt.reg, tri_);
- SrcLR->end = SrcLR->start.getNextSlot();
- }
- }
-
// CopyMI may have implicit operands, transfer them over to the newly
// rematerialized instruction. And update implicit def interval valnos.
for (unsigned i = CopyMI->getDesc().getNumOperands(),
@@ -734,13 +660,18 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
RemoveCopyFlag(MO.getReg(), CopyMI);
}
- TransferImplicitOps(CopyMI, NewMI);
+ NewMI->copyImplicitOps(CopyMI);
li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
CopyMI->eraseFromParent();
ReMatCopies.insert(CopyMI);
ReMatDefs.insert(DefMI);
DEBUG(dbgs() << "Remat: " << *NewMI);
++NumReMats;
+
+ // The source interval can become smaller because we removed a use.
+ if (preserveSrcInt)
+ li_->shrinkToUses(&SrcInt);
+
return true;
}
@@ -756,6 +687,9 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
unsigned DstReg = CP.getDstReg();
unsigned SubIdx = CP.getSubIdx();
+ // Update LiveDebugVariables.
+ ldv_->renameRegister(SrcReg, DstReg, SubIdx);
+
for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
MachineInstr *UseMI = I.skipInstruction();) {
// A PhysReg copy that won't be coalesced can perhaps be rematerialized
@@ -768,7 +702,7 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
UseMI->getOperand(0).getReg() != SrcReg &&
UseMI->getOperand(0).getReg() != DstReg &&
!JoinedCopies.count(UseMI) &&
- ReMaterializeTrivialDef(li_->getInterval(SrcReg),
+ ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
UseMI->getOperand(0).getReg(), 0, UseMI))
continue;
}
@@ -874,7 +808,7 @@ void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
if (li_->hasInterval(DstReg)) {
LiveInterval &LI = li_->getInterval(DstReg);
if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
- if (LR->valno->getCopy() == CopyMI)
+ if (LR->valno->def == DefIdx)
LR->valno->setCopy(0);
}
if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
@@ -884,7 +818,7 @@ void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
continue;
LiveInterval &LI = li_->getInterval(*AS);
if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
- if (LR->valno->getCopy() == CopyMI)
+ if (LR->valno->def == DefIdx)
LR->valno->setCopy(0);
}
}
@@ -1044,23 +978,19 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
return false;
}
- DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg());
+ DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_));
// Enforce policies.
if (CP.isPhys()) {
- DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n");
+ DEBUG(dbgs() <<" with physreg " << PrintReg(CP.getDstReg(), tri_) << "\n");
// Only coalesce to allocatable physreg.
if (!li_->isAllocatable(CP.getDstReg())) {
DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
return false; // Not coalescable.
}
} else {
- DEBUG({
- dbgs() << " with reg%" << CP.getDstReg();
- if (CP.getSubIdx())
- dbgs() << ":" << tri_->getSubRegIndexName(CP.getSubIdx());
- dbgs() << " to " << CP.getNewRC()->getName() << "\n";
- });
+ DEBUG(dbgs() << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
+ << " to " << CP.getNewRC()->getName() << "\n");
// Avoid constraining virtual register regclass too much.
if (CP.isCrossClass()) {
@@ -1114,7 +1044,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// Before giving up coalescing, if definition of source is defined by
// trivial computation, try rematerializing it.
if (!CP.isFlipped() &&
- ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI))
+ ReMaterializeTrivialDef(JoinVInt, true, CP.getDstReg(), 0, CopyMI))
return true;
++numAborts;
@@ -1134,7 +1064,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// If definition of source is defined by trivial computation, try
// rematerializing it.
if (!CP.isFlipped() &&
- ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()),
+ ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
CP.getDstReg(), 0, CopyMI))
return true;
@@ -1317,7 +1247,7 @@ bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
i != e; ++i) {
VNInfo *VNI = *i;
- if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
+ if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy?
continue;
// Never join with a register that has EarlyClobber redefs.
@@ -1341,7 +1271,7 @@ bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
i != e; ++i) {
VNInfo *VNI = *i;
- if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
+ if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy?
continue;
// Never join with a register that has EarlyClobber redefs.
@@ -1495,9 +1425,9 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
std::vector<CopyRec> &TryAgain) {
DEBUG(dbgs() << MBB->getName() << ":\n");
- std::vector<CopyRec> VirtCopies;
- std::vector<CopyRec> PhysCopies;
- std::vector<CopyRec> ImpDefCopies;
+ SmallVector<CopyRec, 8> VirtCopies;
+ SmallVector<CopyRec, 8> PhysCopies;
+ SmallVector<CopyRec, 8> ImpDefCopies;
for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
MII != E;) {
MachineInstr *Inst = MII++;
@@ -1690,6 +1620,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
tri_ = tm_->getRegisterInfo();
tii_ = tm_->getInstrInfo();
li_ = &getAnalysis<LiveIntervals>();
+ ldv_ = &getAnalysis<LiveDebugVariables>();
AA = &getAnalysis<AliasAnalysis>();
loopInfo = &getAnalysis<MachineLoopInfo>();
@@ -1697,6 +1628,9 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
<< "********** Function: "
<< ((Value*)mf_->getFunction())->getName() << '\n');
+ if (VerifyCoalescing)
+ mf_->verify(this, "Before register coalescing");
+
for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
E = tri_->regclass_end(); I != E; ++I)
allocatableRCRegs_.insert(std::make_pair(*I,
@@ -1739,9 +1673,11 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
DoDelete = false;
if (MI->allDefsAreDead()) {
- LiveInterval &li = li_->getInterval(SrcReg);
- if (!ShortenDeadCopySrcLiveRange(li, MI))
- ShortenDeadCopyLiveRange(li, MI);
+ if (li_->hasInterval(SrcReg)) {
+ LiveInterval &li = li_->getInterval(SrcReg);
+ if (!ShortenDeadCopySrcLiveRange(li, MI))
+ ShortenDeadCopyLiveRange(li, MI);
+ }
DoDelete = true;
}
if (!DoDelete) {
@@ -1821,13 +1757,26 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
if (!MO.isReg() || !MO.isKill()) continue;
unsigned reg = MO.getReg();
if (!reg || !li_->hasInterval(reg)) continue;
- if (!li_->getInterval(reg).killedAt(DefIdx))
+ if (!li_->getInterval(reg).killedAt(DefIdx)) {
MO.setIsKill(false);
+ continue;
+ }
+ // When leaving a kill flag on a physreg, check if any subregs should
+ // remain alive.
+ if (!TargetRegisterInfo::isPhysicalRegister(reg))
+ continue;
+ for (const unsigned *SR = tri_->getSubRegisters(reg);
+ unsigned S = *SR; ++SR)
+ if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx))
+ MI->addRegisterDefined(S, tri_);
}
}
}
DEBUG(dump());
+ DEBUG(ldv_->dump());
+ if (VerifyCoalescing)
+ mf_->verify(this, "After register coalescing");
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.h b/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.h
index 855bdb9..56703df 100644
--- a/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/contrib/llvm/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -21,7 +21,7 @@
namespace llvm {
class SimpleRegisterCoalescing;
- class LiveVariables;
+ class LiveDebugVariables;
class TargetRegisterInfo;
class TargetInstrInfo;
class VirtRegMap;
@@ -44,6 +44,7 @@ namespace llvm {
const TargetRegisterInfo* tri_;
const TargetInstrInfo* tii_;
LiveIntervals *li_;
+ LiveDebugVariables *ldv_;
const MachineLoopInfo* loopInfo;
AliasAnalysis *AA;
@@ -63,7 +64,9 @@ namespace llvm {
public:
static char ID; // Pass identifcation, replacement for typeid
- SimpleRegisterCoalescing() : MachineFunctionPass(ID) {}
+ SimpleRegisterCoalescing() : MachineFunctionPass(ID) {
+ initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry());
+ }
struct InstrSlots {
enum {
@@ -140,8 +143,10 @@ namespace llvm {
/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
/// computation, replace the copy by rematerialize the definition.
- bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
- unsigned DstSubIdx, MachineInstr *CopyMI);
+ /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
+ bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
+ unsigned DstReg, unsigned DstSubIdx,
+ MachineInstr *CopyMI);
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
/// two virtual registers from different register classes.
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index b637980..13e1454 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -21,15 +21,14 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <set>
using namespace llvm;
STATISTIC(NumInvokes, "Number of invokes replaced");
@@ -53,6 +52,7 @@ namespace {
Constant *SelectorFn;
Constant *ExceptionFn;
Constant *CallSiteFn;
+ Constant *DispatchSetupFn;
Value *CallSite;
public:
@@ -116,6 +116,8 @@ bool SjLjEHPass::doInitialization(Module &M) {
SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+ DispatchSetupFn
+ = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
PersonalityFn = 0;
return true;
@@ -317,8 +319,12 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
Unwinds.push_back(UI);
}
}
- // If we don't have any invokes or unwinds, there's nothing to do.
- if (Unwinds.empty() && Invokes.empty()) return false;
+
+ NumInvokes += Invokes.size();
+ NumUnwinds += Unwinds.size();
+
+ // If we don't have any invokes, there's nothing to do.
+ if (Invokes.empty()) return false;
// Find the eh.selector.*, eh.exception and alloca calls.
//
@@ -332,6 +338,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
SmallVector<CallInst*,16> EH_Selectors;
SmallVector<CallInst*,16> EH_Exceptions;
SmallVector<Instruction*,16> JmpbufUpdatePoints;
+
// Note: Skip the entry block since there's nothing there that interests
// us. eh.selector and eh.exception shouldn't ever be there, and we
// want to disregard any allocas that are there.
@@ -351,228 +358,231 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
}
}
}
+
// If we don't have any eh.selector calls, we can't determine the personality
// function. Without a personality function, we can't process exceptions.
if (!PersonalityFn) return false;
- NumInvokes += Invokes.size();
- NumUnwinds += Unwinds.size();
+ // We have invokes, so we need to add register/unregister calls to get this
+ // function onto the global unwind stack.
+ //
+ // First thing we need to do is scan the whole function for values that are
+ // live across unwind edges. Each value that is live across an unwind edge we
+ // spill into a stack location, guaranteeing that there is nothing live across
+ // the unwind edge. This process also splits all critical edges coming out of
+ // invoke's.
+ splitLiveRangesAcrossInvokes(Invokes);
+
+ BasicBlock *EntryBB = F.begin();
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an
+ // alloca because the value needs to be added to the global context list.
+ unsigned Align = 4; // FIXME: Should be a TLI check?
+ AllocaInst *FunctionContext =
+ new AllocaInst(FunctionContextTy, 0, Align,
+ "fcn_context", F.begin()->begin());
+
+ Value *Idxs[2];
+ const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ // We need to also keep around a reference to the call_site field
+ Idxs[0] = Zero;
+ Idxs[1] = ConstantInt::get(Int32Ty, 1);
+ CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "call_site",
+ EntryBB->getTerminator());
+
+ // The exception selector comes back in context->data[1]
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "fc_data",
+ EntryBB->getTerminator());
+ Idxs[1] = ConstantInt::get(Int32Ty, 1);
+ Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+ "exc_selector_gep",
+ EntryBB->getTerminator());
+ // The exception value comes back in context->data[0]
+ Idxs[1] = Zero;
+ Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+ "exception_gep",
+ EntryBB->getTerminator());
+
+ // The result of the eh.selector call will be replaced with a a reference to
+ // the selector value returned in the function context. We leave the selector
+ // itself so the EH analysis later can use it.
+ for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
+ CallInst *I = EH_Selectors[i];
+ Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
+ I->replaceAllUsesWith(SelectorVal);
+ }
- if (!Invokes.empty()) {
- // We have invokes, so we need to add register/unregister calls to get
- // this function onto the global unwind stack.
- //
- // First thing we need to do is scan the whole function for values that are
- // live across unwind edges. Each value that is live across an unwind edge
- // we spill into a stack location, guaranteeing that there is nothing live
- // across the unwind edge. This process also splits all critical edges
- // coming out of invoke's.
- splitLiveRangesAcrossInvokes(Invokes);
-
- BasicBlock *EntryBB = F.begin();
- // Create an alloca for the incoming jump buffer ptr and the new jump buffer
- // that needs to be restored on all exits from the function. This is an
- // alloca because the value needs to be added to the global context list.
- unsigned Align = 4; // FIXME: Should be a TLI check?
- AllocaInst *FunctionContext =
- new AllocaInst(FunctionContextTy, 0, Align,
- "fcn_context", F.begin()->begin());
-
- Value *Idxs[2];
- const Type *Int32Ty = Type::getInt32Ty(F.getContext());
- Value *Zero = ConstantInt::get(Int32Ty, 0);
- // We need to also keep around a reference to the call_site field
- Idxs[0] = Zero;
- Idxs[1] = ConstantInt::get(Int32Ty, 1);
- CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "call_site",
- EntryBB->getTerminator());
-
- // The exception selector comes back in context->data[1]
- Idxs[1] = ConstantInt::get(Int32Ty, 2);
- Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "fc_data",
- EntryBB->getTerminator());
- Idxs[1] = ConstantInt::get(Int32Ty, 1);
- Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
- "exc_selector_gep",
- EntryBB->getTerminator());
- // The exception value comes back in context->data[0]
- Idxs[1] = Zero;
- Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
- "exception_gep",
- EntryBB->getTerminator());
-
- // The result of the eh.selector call will be replaced with a
- // a reference to the selector value returned in the function
- // context. We leave the selector itself so the EH analysis later
- // can use it.
- for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
- CallInst *I = EH_Selectors[i];
- Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
- I->replaceAllUsesWith(SelectorVal);
- }
- // eh.exception calls are replaced with references to the proper
- // location in the context. Unlike eh.selector, the eh.exception
- // calls are removed entirely.
- for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
- CallInst *I = EH_Exceptions[i];
- // Possible for there to be duplicates, so check to make sure
- // the instruction hasn't already been removed.
- if (!I->getParent()) continue;
- Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
- const Type *Ty = Type::getInt8PtrTy(F.getContext());
- Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
-
- I->replaceAllUsesWith(Val);
- I->eraseFromParent();
- }
+ // eh.exception calls are replaced with references to the proper location in
+ // the context. Unlike eh.selector, the eh.exception calls are removed
+ // entirely.
+ for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
+ CallInst *I = EH_Exceptions[i];
+ // Possible for there to be duplicates, so check to make sure the
+ // instruction hasn't already been removed.
+ if (!I->getParent()) continue;
+ Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
+ const Type *Ty = Type::getInt8PtrTy(F.getContext());
+ Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
+
+ I->replaceAllUsesWith(Val);
+ I->eraseFromParent();
+ }
- // The entry block changes to have the eh.sjlj.setjmp, with a conditional
- // branch to a dispatch block for non-zero returns. If we return normally,
- // we're not handling an exception and just register the function context
- // and continue.
-
- // Create the dispatch block. The dispatch block is basically a big switch
- // statement that goes to all of the invoke landing pads.
- BasicBlock *DispatchBlock =
- BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
-
- // Insert a load in the Catch block, and a switch on its value. By default,
- // we go to a block that just does an unwind (which is the correct action
- // for a standard call).
- BasicBlock *UnwindBlock =
- BasicBlock::Create(F.getContext(), "unwindbb", &F);
- Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
-
- Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
- DispatchBlock);
- SwitchInst *DispatchSwitch =
- SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
- DispatchBlock);
- // Split the entry block to insert the conditional branch for the setjmp.
- BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
- "eh.sjlj.setjmp.cont");
-
- // Populate the Function Context
- // 1. LSDA address
- // 2. Personality function address
- // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
-
- // LSDA address
- Idxs[0] = Zero;
- Idxs[1] = ConstantInt::get(Int32Ty, 4);
- Value *LSDAFieldPtr =
- GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "lsda_gep",
- EntryBB->getTerminator());
- Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
- EntryBB->getTerminator());
- new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
-
- Idxs[1] = ConstantInt::get(Int32Ty, 3);
- Value *PersonalityFieldPtr =
- GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "lsda_gep",
+ // The entry block changes to have the eh.sjlj.setjmp, with a conditional
+ // branch to a dispatch block for non-zero returns. If we return normally,
+ // we're not handling an exception and just register the function context and
+ // continue.
+
+ // Create the dispatch block. The dispatch block is basically a big switch
+ // statement that goes to all of the invoke landing pads.
+ BasicBlock *DispatchBlock =
+ BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
+
+ // Add a call to dispatch_setup at the start of the dispatch block. This is
+ // expanded to any target-specific setup that needs to be done.
+ Value *SetupArg =
+ CastInst::Create(Instruction::BitCast, FunctionContext,
+ Type::getInt8PtrTy(F.getContext()), "",
+ DispatchBlock);
+ CallInst::Create(DispatchSetupFn, SetupArg, "", DispatchBlock);
+
+ // Insert a load of the callsite in the dispatch block, and a switch on its
+ // value. By default, we go to a block that just does an unwind (which is the
+ // correct action for a standard call).
+ BasicBlock *UnwindBlock =
+ BasicBlock::Create(F.getContext(), "unwindbb", &F);
+ Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
+
+ Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
+ DispatchBlock);
+ SwitchInst *DispatchSwitch =
+ SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
+ DispatchBlock);
+ // Split the entry block to insert the conditional branch for the setjmp.
+ BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+ "eh.sjlj.setjmp.cont");
+
+ // Populate the Function Context
+ // 1. LSDA address
+ // 2. Personality function address
+ // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
+
+ // LSDA address
+ Idxs[0] = Zero;
+ Idxs[1] = ConstantInt::get(Int32Ty, 4);
+ Value *LSDAFieldPtr =
+ GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "lsda_gep",
+ EntryBB->getTerminator());
+ Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+ EntryBB->getTerminator());
+ new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+ Idxs[1] = ConstantInt::get(Int32Ty, 3);
+ Value *PersonalityFieldPtr =
+ GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "lsda_gep",
+ EntryBB->getTerminator());
+ new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+ EntryBB->getTerminator());
+
+ // Save the frame pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 5);
+ Value *JBufPtr
+ = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "jbuf_gep",
EntryBB->getTerminator());
- new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
- EntryBB->getTerminator());
-
- // Save the frame pointer.
- Idxs[1] = ConstantInt::get(Int32Ty, 5);
- Value *JBufPtr
- = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "jbuf_gep",
- EntryBB->getTerminator());
- Idxs[1] = ConstantInt::get(Int32Ty, 0);
- Value *FramePtr =
- GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+ Idxs[1] = ConstantInt::get(Int32Ty, 0);
+ Value *FramePtr =
+ GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+ EntryBB->getTerminator());
+
+ Value *Val = CallInst::Create(FrameAddrFn,
+ ConstantInt::get(Int32Ty, 0),
+ "fp",
EntryBB->getTerminator());
+ new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+
+ // Save the stack pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *StackPtr =
+ GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
+ EntryBB->getTerminator());
+
+ Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
+ new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+
+ // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
+ Value *SetjmpArg =
+ CastInst::Create(Instruction::BitCast, JBufPtr,
+ Type::getInt8PtrTy(F.getContext()), "",
+ EntryBB->getTerminator());
+ Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
+ "dispatch",
+ EntryBB->getTerminator());
+ // check the return value of the setjmp. non-zero goes to dispatcher.
+ Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+ ICmpInst::ICMP_EQ, DispatchVal, Zero,
+ "notunwind");
+ // Nuke the uncond branch.
+ EntryBB->getTerminator()->eraseFromParent();
+
+ // Put in a new condbranch in its place.
+ BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
+
+ // Register the function context and make sure it's known to not throw
+ CallInst *Register =
+ CallInst::Create(RegisterFn, FunctionContext, "",
+ ContBlock->getTerminator());
+ Register->setDoesNotThrow();
+
+ // At this point, we are all set up, update the invoke instructions to mark
+ // their call_site values, and fill in the dispatch switch accordingly.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+ markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
+
+ // Mark call instructions that aren't nounwind as no-action (call_site ==
+ // -1). Skip the entry block, as prior to then, no function context has been
+ // created for this function and any unexpected exceptions thrown will go
+ // directly to the caller's context, which is what we want anyway, so no need
+ // to do anything here.
+ for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+ for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ // Ignore calls to the EH builtins (eh.selector, eh.exception)
+ Constant *Callee = CI->getCalledFunction();
+ if (Callee != SelectorFn && Callee != ExceptionFn
+ && !CI->doesNotThrow())
+ insertCallSiteStore(CI, -1, CallSite);
+ }
+ }
- Value *Val = CallInst::Create(FrameAddrFn,
- ConstantInt::get(Int32Ty, 0),
- "fp",
- EntryBB->getTerminator());
- new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
-
- // Save the stack pointer.
- Idxs[1] = ConstantInt::get(Int32Ty, 2);
- Value *StackPtr =
- GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
- EntryBB->getTerminator());
-
- Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
- new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
-
- // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
- Value *SetjmpArg =
- CastInst::Create(Instruction::BitCast, JBufPtr,
- Type::getInt8PtrTy(F.getContext()), "",
- EntryBB->getTerminator());
- Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
- "dispatch",
- EntryBB->getTerminator());
- // check the return value of the setjmp. non-zero goes to dispatcher.
- Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
- ICmpInst::ICMP_EQ, DispatchVal, Zero,
- "notunwind");
- // Nuke the uncond branch.
- EntryBB->getTerminator()->eraseFromParent();
-
- // Put in a new condbranch in its place.
- BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
-
- // Register the function context and make sure it's known to not throw
- CallInst *Register =
- CallInst::Create(RegisterFn, FunctionContext, "",
- ContBlock->getTerminator());
- Register->setDoesNotThrow();
-
- // At this point, we are all set up, update the invoke instructions
- // to mark their call_site values, and fill in the dispatch switch
- // accordingly.
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
- markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
-
- // Mark call instructions that aren't nounwind as no-action
- // (call_site == -1). Skip the entry block, as prior to then, no function
- // context has been created for this function and any unexpected exceptions
- // thrown will go directly to the caller's context, which is what we want
- // anyway, so no need to do anything here.
- for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
- for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- // Ignore calls to the EH builtins (eh.selector, eh.exception)
- Constant *Callee = CI->getCalledFunction();
- if (Callee != SelectorFn && Callee != ExceptionFn
- && !CI->doesNotThrow())
- insertCallSiteStore(CI, -1, CallSite);
- }
- }
-
- // Replace all unwinds with a branch to the unwind handler.
- // ??? Should this ever happen with sjlj exceptions?
- for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
- BranchInst::Create(UnwindBlock, Unwinds[i]);
- Unwinds[i]->eraseFromParent();
- }
-
- // Following any allocas not in the entry block, update the saved SP
- // in the jmpbuf to the new value.
- for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
- Instruction *AI = JmpbufUpdatePoints[i];
- Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
- StackAddr->insertAfter(AI);
- Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
- StoreStackAddr->insertAfter(StackAddr);
- }
+ // Replace all unwinds with a branch to the unwind handler.
+ // ??? Should this ever happen with sjlj exceptions?
+ for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+ BranchInst::Create(UnwindBlock, Unwinds[i]);
+ Unwinds[i]->eraseFromParent();
+ }
- // Finally, for any returns from this function, if this function contains an
- // invoke, add a call to unregister the function context.
- for (unsigned i = 0, e = Returns.size(); i != e; ++i)
- CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+ // Following any allocas not in the entry block, update the saved SP in the
+ // jmpbuf to the new value.
+ for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
+ Instruction *AI = JmpbufUpdatePoints[i];
+ Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+ StackAddr->insertAfter(AI);
+ Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+ StoreStackAddr->insertAfter(StackAddr);
}
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, add a call to unregister the function context.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i)
+ CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
index 1bc148f..6e3fa90 100644
--- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -41,7 +41,7 @@ namespace {
char SlotIndexes::ID = 0;
INITIALIZE_PASS(SlotIndexes, "slotindexes",
- "Slot index numbering", false, false);
+ "Slot index numbering", false, false)
IndexListEntry* IndexListEntry::getEmptyKeyEntry() {
return &*IndexListEntryEmptyKey;
@@ -61,7 +61,6 @@ void SlotIndexes::releaseMemory() {
mi2iMap.clear();
mbb2IdxMap.clear();
idx2MBBMap.clear();
- terminatorGaps.clear();
clearList();
}
@@ -112,13 +111,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
if (mi->isDebugValue())
continue;
- if (miItr == mbb->getFirstTerminator()) {
- push_back(createEntry(0, index));
- terminatorGaps.insert(
- std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
- index += SlotIndex::NUM;
- }
-
// Insert a store index for the instr.
push_back(createEntry(mi, index));
@@ -135,15 +127,12 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
index += (Slots + 1) * SlotIndex::NUM;
}
- if (mbb->getFirstTerminator() == mbb->end()) {
- push_back(createEntry(0, index));
- terminatorGaps.insert(
- std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
- index += SlotIndex::NUM;
- }
+ // We insert two blank instructions between basic blocks.
+ // One to represent live-out registers and one to represent live-ins.
+ push_back(createEntry(0, index));
+ index += SlotIndex::NUM;
- // One blank instruction at the end.
- push_back(createEntry(0, index));
+ push_back(createEntry(0, index));
SlotIndex blockEndIndex(back(), SlotIndex::LOAD);
mbb2IdxMap.insert(
@@ -169,6 +158,7 @@ void SlotIndexes::renumberIndexes() {
// resulting numbering will match what would have been generated by the
// pass during the initial numbering of the function if the new instructions
// had been present.
+ DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
functionSize = 0;
unsigned index = 0;
@@ -179,7 +169,7 @@ void SlotIndexes::renumberIndexes() {
curEntry->setIndex(index);
if (curEntry->getInstr() == 0) {
- // MBB start entry or terminator gap. Just step index by 1.
+ // MBB start entry. Just step index by 1.
index += SlotIndex::NUM;
}
else {
@@ -214,11 +204,10 @@ void SlotIndexes::dump() const {
// Print a SlotIndex to a raw_ostream.
void SlotIndex::print(raw_ostream &os) const {
- os << entry().getIndex();
- if (isPHI())
- os << "*";
+ if (isValid())
+ os << entry().getIndex() << "LudS"[getSlot()];
else
- os << "LudS"[getSlot()];
+ os << "invalid";
}
// Dump a SlotIndex to stderr.
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
new file mode 100644
index 0000000..9c0bf16
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -0,0 +1,330 @@
+//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the spill code placement analysis.
+//
+// Each edge bundle corresponds to a node in a Hopfield network. Constraints on
+// basic blocks are weighted by the block frequency and added to become the node
+// bias.
+//
+// Transparent basic blocks have the variable live through, but don't care if it
+// is spilled or in a register. These blocks become connections in the Hopfield
+// network, again weighted by block frequency.
+//
+// The Hopfield network minimizes (possibly locally) its energy function:
+//
+// E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b )
+//
+// The energy function represents the expected spill code execution frequency,
+// or the cost of spilling. This is a Lyapunov function which never increases
+// when a node is updated. It is guaranteed to converge to a local minimum.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spillplacement"
+#include "SpillPlacement.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+char SpillPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
+void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<EdgeBundles>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Node - Each edge bundle corresponds to a Hopfield node.
+///
+/// The node contains precomputed frequency data that only depends on the CFG,
+/// but Bias and Links are computed each time placeSpills is called.
+///
+/// The node Value is positive when the variable should be in a register. The
+/// value can change when linked nodes change, but convergence is very fast
+/// because all weights are positive.
+///
+struct SpillPlacement::Node {
+ /// Frequency - Total block frequency feeding into[0] or out of[1] the bundle.
+ /// Ideally, these two numbers should be identical, but inaccuracies in the
+ /// block frequency estimates means that we need to normalize ingoing and
+ /// outgoing frequencies separately so they are commensurate.
+ float Frequency[2];
+
+ /// Bias - Normalized contributions from non-transparent blocks.
+ /// A bundle connected to a MustSpill block has a huge negative bias,
+ /// otherwise it is a number in the range [-2;2].
+ float Bias;
+
+ /// Value - Output value of this node computed from the Bias and links.
+ /// This is always in the range [-1;1]. A positive number means the variable
+ /// should go in a register through this bundle.
+ float Value;
+
+ typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector;
+
+ /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
+ /// bundles. The weights are all positive and add up to at most 2, weights
+ /// from ingoing and outgoing nodes separately add up to a most 1. The weight
+ /// sum can be less than 2 when the variable is not live into / out of some
+ /// connected basic blocks.
+ LinkVector Links;
+
+ /// preferReg - Return true when this node prefers to be in a register.
+ bool preferReg() const {
+ // Undecided nodes (Value==0) go on the stack.
+ return Value > 0;
+ }
+
+ /// mustSpill - Return True if this node is so biased that it must spill.
+ bool mustSpill() const {
+ // Actually, we must spill if Bias < sum(weights).
+ // It may be worth it to compute the weight sum here?
+ return Bias < -2.0f;
+ }
+
+ /// Node - Create a blank Node.
+ Node() {
+ Frequency[0] = Frequency[1] = 0;
+ }
+
+ /// clear - Reset per-query data, but preserve frequencies that only depend on
+ // the CFG.
+ void clear() {
+ Bias = Value = 0;
+ Links.clear();
+ }
+
+ /// addLink - Add a link to bundle b with weight w.
+ /// out=0 for an ingoing link, and 1 for an outgoing link.
+ void addLink(unsigned b, float w, bool out) {
+ // Normalize w relative to all connected blocks from that direction.
+ w /= Frequency[out];
+
+ // There can be multiple links to the same bundle, add them up.
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ if (I->second == b) {
+ I->first += w;
+ return;
+ }
+ // This must be the first link to b.
+ Links.push_back(std::make_pair(w, b));
+ }
+
+ /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
+ void addBias(float w, bool out) {
+ // Normalize w relative to all connected blocks from that direction.
+ w /= Frequency[out];
+ Bias += w;
+ }
+
+ /// update - Recompute Value from Bias and Links. Return true when node
+ /// preference changes.
+ bool update(const Node nodes[]) {
+ // Compute the weighted sum of inputs.
+ float Sum = Bias;
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ Sum += I->first * nodes[I->second].Value;
+
+ // The weighted sum is going to be in the range [-2;2]. Ideally, we should
+ // simply set Value = sign(Sum), but we will add a dead zone around 0 for
+ // two reasons:
+ // 1. It avoids arbitrary bias when all links are 0 as is possible during
+ // initial iterations.
+ // 2. It helps tame rounding errors when the links nominally sum to 0.
+ const float Thres = 1e-4f;
+ bool Before = preferReg();
+ if (Sum < -Thres)
+ Value = -1;
+ else if (Sum > Thres)
+ Value = 1;
+ else
+ Value = 0;
+ return Before != preferReg();
+ }
+};
+
+bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ bundles = &getAnalysis<EdgeBundles>();
+ loops = &getAnalysis<MachineLoopInfo>();
+
+ assert(!nodes && "Leaking node array");
+ nodes = new Node[bundles->getNumBundles()];
+
+ // Compute total ingoing and outgoing block frequencies for all bundles.
+ for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
+ float Freq = getBlockFrequency(I);
+ unsigned Num = I->getNumber();
+ nodes[bundles->getBundle(Num, 1)].Frequency[0] += Freq;
+ nodes[bundles->getBundle(Num, 0)].Frequency[1] += Freq;
+ }
+
+ // We never change the function.
+ return false;
+}
+
+void SpillPlacement::releaseMemory() {
+ delete[] nodes;
+ nodes = 0;
+}
+
+/// activate - mark node n as active if it wasn't already.
+void SpillPlacement::activate(unsigned n) {
+ if (ActiveNodes->test(n))
+ return;
+ ActiveNodes->set(n);
+ nodes[n].clear();
+}
+
+
+/// prepareNodes - Compute node biases and weights from a set of constraints.
+/// Set a bit in NodeMask for each active node.
+void SpillPlacement::
+prepareNodes(const SmallVectorImpl<BlockConstraint> &LiveBlocks) {
+ for (SmallVectorImpl<BlockConstraint>::const_iterator I = LiveBlocks.begin(),
+ E = LiveBlocks.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(I->Number);
+ float Freq = getBlockFrequency(MBB);
+
+ // Is this a transparent block? Link ingoing and outgoing bundles.
+ if (I->Entry == DontCare && I->Exit == DontCare) {
+ unsigned ib = bundles->getBundle(I->Number, 0);
+ unsigned ob = bundles->getBundle(I->Number, 1);
+
+ // Ignore self-loops.
+ if (ib == ob)
+ continue;
+ activate(ib);
+ activate(ob);
+ nodes[ib].addLink(ob, Freq, 1);
+ nodes[ob].addLink(ib, Freq, 0);
+ continue;
+ }
+
+ // This block is not transparent, but it can still add bias.
+ const float Bias[] = {
+ 0, // DontCare,
+ 1, // PrefReg,
+ -1, // PrefSpill
+ -HUGE_VALF // MustSpill
+ };
+
+ // Live-in to block?
+ if (I->Entry != DontCare) {
+ unsigned ib = bundles->getBundle(I->Number, 0);
+ activate(ib);
+ nodes[ib].addBias(Freq * Bias[I->Entry], 1);
+ }
+
+ // Live-out from block?
+ if (I->Exit != DontCare) {
+ unsigned ob = bundles->getBundle(I->Number, 1);
+ activate(ob);
+ nodes[ob].addBias(Freq * Bias[I->Exit], 0);
+ }
+ }
+}
+
+/// iterate - Repeatedly update the Hopfield nodes until stability or the
+/// maximum number of iterations is reached.
+/// @param Linked - Numbers of linked nodes that need updating.
+void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) {
+ if (Linked.empty())
+ return;
+
+ // Run up to 10 iterations. The edge bundle numbering is closely related to
+ // basic block numbering, so there is a strong tendency towards chains of
+ // linked nodes with sequential numbers. By scanning the linked nodes
+ // backwards and forwards, we make it very likely that a single node can
+ // affect the entire network in a single iteration. That means very fast
+ // convergence, usually in a single iteration.
+ for (unsigned iteration = 0; iteration != 10; ++iteration) {
+ // Scan backwards, skipping the last node which was just updated.
+ bool Changed = false;
+ for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
+ llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
+ unsigned n = *I;
+ bool C = nodes[n].update(nodes);
+ Changed |= C;
+ }
+ if (!Changed)
+ return;
+
+ // Scan forwards, skipping the first node which was just updated.
+ Changed = false;
+ for (SmallVectorImpl<unsigned>::const_iterator I =
+ llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
+ unsigned n = *I;
+ bool C = nodes[n].update(nodes);
+ Changed |= C;
+ }
+ if (!Changed)
+ return;
+ }
+}
+
+bool
+SpillPlacement::placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks,
+ BitVector &RegBundles) {
+ // Reuse RegBundles as our ActiveNodes vector.
+ ActiveNodes = &RegBundles;
+ ActiveNodes->clear();
+ ActiveNodes->resize(bundles->getNumBundles());
+
+ // Compute active nodes, links and biases.
+ prepareNodes(LiveBlocks);
+
+ // Update all active nodes, and find the ones that are actually linked to
+ // something so their value may change when iterating.
+ SmallVector<unsigned, 8> Linked;
+ for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) {
+ nodes[n].update(nodes);
+ // A node that must spill, or a node without any links is not going to
+ // change its value ever again, so exclude it from iterations.
+ if (!nodes[n].Links.empty() && !nodes[n].mustSpill())
+ Linked.push_back(n);
+ }
+
+ // Iterate the network to convergence.
+ iterate(Linked);
+
+ // Write preferences back to RegBundles.
+ bool Perfect = true;
+ for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n))
+ if (!nodes[n].preferReg()) {
+ RegBundles.reset(n);
+ Perfect = false;
+ }
+ return Perfect;
+}
+
+/// getBlockFrequency - Return our best estimate of the block frequency which is
+/// the expected number of block executions per function invocation.
+float SpillPlacement::getBlockFrequency(const MachineBasicBlock *MBB) {
+ // Use the unnormalized spill weight for real block frequencies.
+ return LiveIntervals::getSpillWeight(true, false, loops->getLoopDepth(MBB));
+}
+
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h
new file mode 100644
index 0000000..ef2d516
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h
@@ -0,0 +1,108 @@
+//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis computes the optimal spill code placement between basic blocks.
+//
+// The runOnMachineFunction() method only precomputes some profiling information
+// about the CFG. The real work is done by placeSpills() which is called by the
+// register allocator.
+//
+// Given a variable that is live across multiple basic blocks, and given
+// constraints on the basic blocks where the variable is live, determine which
+// edge bundles should have the variable in a register and which edge bundles
+// should have the variable in a stack slot.
+//
+// The returned bit vector can be used to place optimal spill code at basic
+// block entries and exits. Spill code placement inside a basic block is not
+// considered.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_CODEGEN_SPILLPLACEMENT_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class BitVector;
+class EdgeBundles;
+class MachineBasicBlock;
+class MachineLoopInfo;
+template <typename> class SmallVectorImpl;
+
+class SpillPlacement : public MachineFunctionPass {
+ struct Node;
+ const MachineFunction *MF;
+ const EdgeBundles *bundles;
+ const MachineLoopInfo *loops;
+ Node *nodes;
+
+ // Nodes that are active in the current computation. Owned by the placeSpills
+ // caller.
+ BitVector *ActiveNodes;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ SpillPlacement() : MachineFunctionPass(ID), nodes(0) {}
+ ~SpillPlacement() { releaseMemory(); }
+
+ /// BorderConstraint - A basic block has separate constraints for entry and
+ /// exit.
+ enum BorderConstraint {
+ DontCare, ///< Block doesn't care / variable not live.
+ PrefReg, ///< Block entry/exit prefers a register.
+ PrefSpill, ///< Block entry/exit prefers a stack slot.
+ MustSpill ///< A register is impossible, variable must be spilled.
+ };
+
+ /// BlockConstraint - Entry and exit constraints for a basic block.
+ struct BlockConstraint {
+ unsigned Number; ///< Basic block number (from MBB::getNumber()).
+ BorderConstraint Entry : 8; ///< Constraint on block entry.
+ BorderConstraint Exit : 8; ///< Constraint on block exit.
+ };
+
+ /// placeSpills - Compute the optimal spill code placement given the
+ /// constraints. No MustSpill constraints will be violated, and the smallest
+ /// possible number of PrefX constraints will be violated, weighted by
+ /// expected execution frequencies.
+ /// @param LiveBlocks Constraints for blocks that have the variable live in or
+ /// live out. DontCare/DontCare means the variable is live
+ /// through the block. DontCare/X means the variable is live
+ /// out, but not live in.
+ /// @param RegBundles Bit vector to receive the edge bundles where the
+ /// variable should be kept in a register. Each bit
+ /// corresponds to an edge bundle, a set bit means the
+ /// variable should be kept in a register through the
+ /// bundle. A clear bit means the variable should be
+ /// spilled.
+ /// @return True if a perfect solution was found, allowing the variable to be
+ /// in a register through all relevant bundles.
+ bool placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks,
+ BitVector &RegBundles);
+
+ /// getBlockFrequency - Return the estimated block execution frequency per
+ /// function invocation.
+ float getBlockFrequency(const MachineBasicBlock*);
+
+private:
+ virtual bool runOnMachineFunction(MachineFunction&);
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ virtual void releaseMemory();
+
+ void activate(unsigned);
+ void prepareNodes(const SmallVectorImpl<BlockConstraint>&);
+ void iterate(const SmallVectorImpl<unsigned>&);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp
index 59d5ab3..fd38582 100644
--- a/contrib/llvm/lib/CodeGen/Spiller.cpp
+++ b/contrib/llvm/lib/CodeGen/Spiller.cpp
@@ -12,6 +12,7 @@
#include "Spiller.h"
#include "VirtRegMap.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,7 +29,7 @@
using namespace llvm;
namespace {
- enum SpillerName { trivial, standard, splitting, inline_ };
+ enum SpillerName { trivial, standard, inline_ };
}
static cl::opt<SpillerName>
@@ -37,7 +38,6 @@ spillerOpt("spiller",
cl::Prefix,
cl::values(clEnumVal(trivial, "trivial spiller"),
clEnumVal(standard, "default spiller"),
- clEnumVal(splitting, "splitting spiller"),
clEnumValN(inline_, "inline", "inline spiller"),
clEnumValEnd),
cl::init(standard));
@@ -80,7 +80,7 @@ protected:
assert(li->weight != HUGE_VALF &&
"Attempting to spill already spilled value.");
- assert(!li->isStackSlot() &&
+ assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
"Trying to spill a stack slot.");
DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
@@ -144,7 +144,7 @@ protected:
vrm->addSpillSlotUse(ss, loadInstr);
SlotIndex endIndex = loadIndex.getNextIndex();
VNInfo *loadVNI =
- newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator());
+ newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator());
newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
}
@@ -158,7 +158,7 @@ protected:
vrm->addSpillSlotUse(ss, storeInstr);
SlotIndex beginIndex = storeIndex.getPrevIndex();
VNInfo *storeVNI =
- newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator());
+ newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator());
newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
}
@@ -182,7 +182,7 @@ public:
void spill(LiveInterval *li,
SmallVectorImpl<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &) {
+ const SmallVectorImpl<LiveInterval*> &) {
// Ignore spillIs - we don't use it.
trivialSpillEverywhere(li, newIntervals);
}
@@ -195,315 +195,42 @@ namespace {
/// Falls back on LiveIntervals::addIntervalsForSpills.
class StandardSpiller : public Spiller {
protected:
+ MachineFunction *mf;
LiveIntervals *lis;
+ LiveStacks *lss;
MachineLoopInfo *loopInfo;
VirtRegMap *vrm;
public:
StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
VirtRegMap &vrm)
- : lis(&pass.getAnalysis<LiveIntervals>()),
+ : mf(&mf),
+ lis(&pass.getAnalysis<LiveIntervals>()),
+ lss(&pass.getAnalysis<LiveStacks>()),
loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
vrm(&vrm) {}
/// Falls back on LiveIntervals::addIntervalsForSpills.
void spill(LiveInterval *li,
SmallVectorImpl<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs) {
+ const SmallVectorImpl<LiveInterval*> &spillIs) {
std::vector<LiveInterval*> added =
lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
newIntervals.insert(newIntervals.end(), added.begin(), added.end());
- }
-};
-
-} // end anonymous namespace
-
-namespace {
-
-/// When a call to spill is placed this spiller will first try to break the
-/// interval up into its component values (one new interval per value).
-/// If this fails, or if a call is placed to spill a previously split interval
-/// then the spiller falls back on the standard spilling mechanism.
-class SplittingSpiller : public StandardSpiller {
-public:
- SplittingSpiller(MachineFunctionPass &pass, MachineFunction &mf,
- VirtRegMap &vrm)
- : StandardSpiller(pass, mf, vrm) {
- mri = &mf.getRegInfo();
- tii = mf.getTarget().getInstrInfo();
- tri = mf.getTarget().getRegisterInfo();
- }
- void spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs) {
- if (worthTryingToSplit(li))
- tryVNISplit(li);
- else
- StandardSpiller::spill(li, newIntervals, spillIs);
+ // Update LiveStacks.
+ int SS = vrm->getStackSlot(li->reg);
+ if (SS == VirtRegMap::NO_STACK_SLOT)
+ return;
+ const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(li->reg);
+ LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
+ if (!SI.hasAtLeastOneValue())
+ SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
+ SI.MergeRangesInAsValue(*li, SI.getValNumInfo(0));
}
-
-private:
-
- MachineRegisterInfo *mri;
- const TargetInstrInfo *tii;
- const TargetRegisterInfo *tri;
- DenseSet<LiveInterval*> alreadySplit;
-
- bool worthTryingToSplit(LiveInterval *li) const {
- return (!alreadySplit.count(li) && li->getNumValNums() > 1);
- }
-
- /// Try to break a LiveInterval into its component values.
- std::vector<LiveInterval*> tryVNISplit(LiveInterval *li) {
-
- DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n");
-
- std::vector<LiveInterval*> added;
- SmallVector<VNInfo*, 4> vnis;
-
- std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis));
-
- for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(),
- vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) {
- VNInfo *vni = *vniItr;
-
- // Skip unused VNIs.
- if (vni->isUnused())
- continue;
-
- DEBUG(dbgs() << " Extracted Val #" << vni->id << " as ");
- LiveInterval *splitInterval = extractVNI(li, vni);
-
- if (splitInterval != 0) {
- DEBUG(dbgs() << *splitInterval << "\n");
- added.push_back(splitInterval);
- alreadySplit.insert(splitInterval);
- } else {
- DEBUG(dbgs() << "0\n");
- }
- }
-
- DEBUG(dbgs() << "Original LI: " << *li << "\n");
-
- // If there original interval still contains some live ranges
- // add it to added and alreadySplit.
- if (!li->empty()) {
- added.push_back(li);
- alreadySplit.insert(li);
- }
-
- return added;
- }
-
- /// Extract the given value number from the interval.
- LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const {
- assert(vni->isDefAccurate() || vni->isPHIDef());
-
- // Create a new vreg and live interval, copy VNI ranges over.
- const TargetRegisterClass *trc = mri->getRegClass(li->reg);
- unsigned newVReg = mri->createVirtualRegister(trc);
- vrm->grow();
- LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
- VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator());
-
- // Start by copying all live ranges in the VN to the new interval.
- for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end();
- rItr != rEnd; ++rItr) {
- if (rItr->valno == vni) {
- newLI->addRange(LiveRange(rItr->start, rItr->end, newVNI));
- }
- }
-
- // Erase the old VNI & ranges.
- li->removeValNo(vni);
-
- // Collect all current uses of the register belonging to the given VNI.
- // We'll use this to rename the register after we've dealt with the def.
- std::set<MachineInstr*> uses;
- for (MachineRegisterInfo::use_iterator
- useItr = mri->use_begin(li->reg), useEnd = mri->use_end();
- useItr != useEnd; ++useItr) {
- uses.insert(&*useItr);
- }
-
- // Process the def instruction for this VNI.
- if (newVNI->isPHIDef()) {
- // Insert a copy at the start of the MBB. The range proceeding the
- // copy will be attached to the original LiveInterval.
- MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def);
- MachineInstr *copyMI = BuildMI(*defMBB, defMBB->begin(), DebugLoc(),
- tii->get(TargetOpcode::COPY), newVReg)
- .addReg(li->reg, RegState::Kill);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
- VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB),
- 0, false, lis->getVNInfoAllocator());
- phiDefVNI->setIsPHIDef(true);
- li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI));
- LiveRange *oldPHIDefRange =
- newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB));
-
- // If the old phi def starts in the middle of the range chop it up.
- if (oldPHIDefRange->start < lis->getMBBStartIdx(defMBB)) {
- LiveRange oldPHIDefRange2(copyIdx.getDefIndex(), oldPHIDefRange->end,
- oldPHIDefRange->valno);
- oldPHIDefRange->end = lis->getMBBStartIdx(defMBB);
- newLI->addRange(oldPHIDefRange2);
- } else if (oldPHIDefRange->start == lis->getMBBStartIdx(defMBB)) {
- // Otherwise if it's at the start of the range just trim it.
- oldPHIDefRange->start = copyIdx.getDefIndex();
- } else {
- assert(false && "PHI def range doesn't cover PHI def?");
- }
-
- newVNI->def = copyIdx.getDefIndex();
- newVNI->setCopy(copyMI);
- newVNI->setIsPHIDef(false); // not a PHI def anymore.
- newVNI->setIsDefAccurate(true);
- } else {
- // non-PHI def. Rename the def. If it's two-addr that means renaming the
- // use and inserting a new copy too.
- MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def);
- // We'll rename this now, so we can remove it from uses.
- uses.erase(defInst);
- unsigned defOpIdx = defInst->findRegisterDefOperandIdx(li->reg);
- bool isTwoAddr = defInst->isRegTiedToUseOperand(defOpIdx),
- twoAddrUseIsUndef = false;
-
- for (unsigned i = 0; i < defInst->getNumOperands(); ++i) {
- MachineOperand &mo = defInst->getOperand(i);
- if (mo.isReg() && (mo.isDef() || isTwoAddr) && (mo.getReg()==li->reg)) {
- mo.setReg(newVReg);
- if (isTwoAddr && mo.isUse() && mo.isUndef())
- twoAddrUseIsUndef = true;
- }
- }
-
- SlotIndex defIdx = lis->getInstructionIndex(defInst);
- newVNI->def = defIdx.getDefIndex();
-
- if (isTwoAddr && !twoAddrUseIsUndef) {
- MachineBasicBlock *defMBB = defInst->getParent();
- MachineInstr *copyMI = BuildMI(*defMBB, defInst, DebugLoc(),
- tii->get(TargetOpcode::COPY), newVReg)
- .addReg(li->reg, RegState::Kill);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
- LiveRange *origUseRange =
- li->getLiveRangeContaining(newVNI->def.getUseIndex());
- origUseRange->end = copyIdx.getDefIndex();
- VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI,
- true, lis->getVNInfoAllocator());
- LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI);
- newLI->addRange(copyRange);
- }
- }
-
- for (std::set<MachineInstr*>::iterator
- usesItr = uses.begin(), usesEnd = uses.end();
- usesItr != usesEnd; ++usesItr) {
- MachineInstr *useInst = *usesItr;
- SlotIndex useIdx = lis->getInstructionIndex(useInst);
- LiveRange *useRange =
- newLI->getLiveRangeContaining(useIdx.getUseIndex());
-
- // If this use doesn't belong to the new interval skip it.
- if (useRange == 0)
- continue;
-
- // This use doesn't belong to the VNI, skip it.
- if (useRange->valno != newVNI)
- continue;
-
- // Check if this instr is two address.
- unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg);
- bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx);
-
- // Rename uses (and defs for two-address instrs).
- for (unsigned i = 0; i < useInst->getNumOperands(); ++i) {
- MachineOperand &mo = useInst->getOperand(i);
- if (mo.isReg() && (mo.isUse() || isTwoAddress) &&
- (mo.getReg() == li->reg)) {
- mo.setReg(newVReg);
- }
- }
-
- // If this is a two address instruction we've got some extra work to do.
- if (isTwoAddress) {
- // We modified the def operand, so we need to copy back to the original
- // reg.
- MachineBasicBlock *useMBB = useInst->getParent();
- MachineBasicBlock::iterator useItr(useInst);
- MachineInstr *copyMI = BuildMI(*useMBB, llvm::next(useItr), DebugLoc(),
- tii->get(TargetOpcode::COPY), newVReg)
- .addReg(li->reg, RegState::Kill);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-
- // Change the old two-address defined range & vni to start at
- // (and be defined by) the copy.
- LiveRange *origDefRange =
- li->getLiveRangeContaining(useIdx.getDefIndex());
- origDefRange->start = copyIdx.getDefIndex();
- origDefRange->valno->def = copyIdx.getDefIndex();
- origDefRange->valno->setCopy(copyMI);
-
- // Insert a new range & vni for the two-address-to-copy value. This
- // will be attached to the new live interval.
- VNInfo *copyVNI =
- newLI->getNextValue(useIdx.getDefIndex(), 0, true,
- lis->getVNInfoAllocator());
- LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI);
- newLI->addRange(copyRange);
- }
- }
-
- // Iterate over any PHI kills - we'll need to insert new copies for them.
- for (LiveInterval::iterator LRI = newLI->begin(), LRE = newLI->end();
- LRI != LRE; ++LRI) {
- if (LRI->valno != newVNI || LRI->end.isPHI())
- continue;
- SlotIndex killIdx = LRI->end;
- MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx);
- MachineInstr *copyMI = BuildMI(*killMBB, killMBB->getFirstTerminator(),
- DebugLoc(), tii->get(TargetOpcode::COPY),
- li->reg)
- .addReg(newVReg, RegState::Kill);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-
- // Save the current end. We may need it to add a new range if the
- // current range runs of the end of the MBB.
- SlotIndex newKillRangeEnd = LRI->end;
- LRI->end = copyIdx.getDefIndex();
-
- if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) {
- assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) &&
- "PHI kill range doesn't reach kill-block end. Not sane.");
- newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB),
- newKillRangeEnd, newVNI));
- }
-
- VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(),
- copyMI, true,
- lis->getVNInfoAllocator());
- newKillVNI->setHasPHIKill(true);
- li->addRange(LiveRange(copyIdx.getDefIndex(),
- lis->getMBBEndIdx(killMBB),
- newKillVNI));
- }
- newVNI->setHasPHIKill(false);
-
- return newLI;
- }
-
};
} // end anonymous namespace
-
-namespace llvm {
-Spiller *createInlineSpiller(MachineFunctionPass &pass,
- MachineFunction &mf,
- VirtRegMap &vrm);
-}
-
llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm) {
@@ -511,7 +238,6 @@ llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
default: assert(0 && "unknown spiller");
case trivial: return new TrivialSpiller(pass, mf, vrm);
case standard: return new StandardSpiller(pass, mf, vrm);
- case splitting: return new SplittingSpiller(pass, mf, vrm);
case inline_: return createInlineSpiller(pass, mf, vrm);
}
}
diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h
index 59bc0ec..f017583 100644
--- a/contrib/llvm/lib/CodeGen/Spiller.h
+++ b/contrib/llvm/lib/CodeGen/Spiller.h
@@ -10,14 +10,13 @@
#ifndef LLVM_CODEGEN_SPILLER_H
#define LLVM_CODEGEN_SPILLER_H
-#include "llvm/ADT/SmallVector.h"
-
namespace llvm {
class LiveInterval;
class MachineFunction;
class MachineFunctionPass;
class SlotIndex;
+ template <typename T> class SmallVectorImpl;
class VirtRegMap;
/// Spiller interface.
@@ -37,7 +36,7 @@ namespace llvm {
/// @param newIntervals The newly created intervals will be appended here.
virtual void spill(LiveInterval *li,
SmallVectorImpl<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs) = 0;
+ const SmallVectorImpl<LiveInterval*> &spillIs) = 0;
};
@@ -45,6 +44,13 @@ namespace llvm {
Spiller* createSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm);
+
+ /// Create and return a spiller that will insert spill code directly instead
+ /// of deferring though VirtRegMap.
+ Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
+
}
#endif
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index 29474f0..5663936 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -12,13 +12,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "splitter"
+#define DEBUG_TYPE "regalloc"
#include "SplitKit.h"
+#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -36,371 +37,231 @@ AllowSplit("spiller-splits-edges",
// Split Analysis
//===----------------------------------------------------------------------===//
-SplitAnalysis::SplitAnalysis(const MachineFunction &mf,
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
const LiveIntervals &lis,
const MachineLoopInfo &mli)
- : mf_(mf),
- lis_(lis),
- loops_(mli),
- tii_(*mf.getTarget().getInstrInfo()),
- curli_(0) {}
+ : MF(vrm.getMachineFunction()),
+ VRM(vrm),
+ LIS(lis),
+ Loops(mli),
+ TII(*MF.getTarget().getInstrInfo()),
+ CurLI(0) {}
void SplitAnalysis::clear() {
- usingInstrs_.clear();
- usingBlocks_.clear();
- usingLoops_.clear();
- curli_ = 0;
+ UseSlots.clear();
+ UsingInstrs.clear();
+ UsingBlocks.clear();
+ LiveBlocks.clear();
+ CurLI = 0;
}
bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) {
MachineBasicBlock *T, *F;
SmallVector<MachineOperand, 4> Cond;
- return !tii_.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
+ return !TII.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
}
-/// analyzeUses - Count instructions, basic blocks, and loops using curli.
+/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
void SplitAnalysis::analyzeUses() {
- const MachineRegisterInfo &MRI = mf_.getRegInfo();
- for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(curli_->reg);
- MachineInstr *MI = I.skipInstruction();) {
- if (MI->isDebugValue() || !usingInstrs_.insert(MI))
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(CurLI->reg),
+ E = MRI.reg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ if (MO.isUse() && MO.isUndef())
continue;
- MachineBasicBlock *MBB = MI->getParent();
- if (usingBlocks_[MBB]++)
+ MachineInstr *MI = MO.getParent();
+ if (MI->isDebugValue() || !UsingInstrs.insert(MI))
continue;
- if (MachineLoop *Loop = loops_.getLoopFor(MBB))
- usingLoops_[Loop]++;
+ UseSlots.push_back(LIS.getInstructionIndex(MI).getDefIndex());
+ MachineBasicBlock *MBB = MI->getParent();
+ UsingBlocks[MBB]++;
}
+ array_pod_sort(UseSlots.begin(), UseSlots.end());
+ calcLiveBlockInfo();
DEBUG(dbgs() << " counted "
- << usingInstrs_.size() << " instrs, "
- << usingBlocks_.size() << " blocks, "
- << usingLoops_.size() << " loops.\n");
+ << UsingInstrs.size() << " instrs, "
+ << UsingBlocks.size() << " blocks.\n");
}
-/// removeUse - Update statistics by noting that MI no longer uses curli.
-void SplitAnalysis::removeUse(const MachineInstr *MI) {
- if (!usingInstrs_.erase(MI))
+/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
+/// where CurLI is live.
+void SplitAnalysis::calcLiveBlockInfo() {
+ if (CurLI->empty())
return;
- // Decrement MBB count.
- const MachineBasicBlock *MBB = MI->getParent();
- BlockCountMap::iterator bi = usingBlocks_.find(MBB);
- assert(bi != usingBlocks_.end() && "MBB missing");
- assert(bi->second && "0 count in map");
- if (--bi->second)
- return;
- // No more uses in MBB.
- usingBlocks_.erase(bi);
+ LiveInterval::const_iterator LVI = CurLI->begin();
+ LiveInterval::const_iterator LVE = CurLI->end();
+
+ SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE;
+ UseI = UseSlots.begin();
+ UseE = UseSlots.end();
+
+ // Loop over basic blocks where CurLI is live.
+ MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+ for (;;) {
+ BlockInfo BI;
+ BI.MBB = MFI;
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ // The last split point is the latest possible insertion point that dominates
+ // all successor blocks. If interference reaches LastSplitPoint, it is not
+ // possible to insert a split or reload that makes CurLI live in the
+ // outgoing bundle.
+ MachineBasicBlock::iterator LSP = LIS.getLastSplitPoint(*CurLI, BI.MBB);
+ if (LSP == BI.MBB->end())
+ BI.LastSplitPoint = Stop;
+ else
+ BI.LastSplitPoint = LIS.getInstructionIndex(LSP);
+
+ // LVI is the first live segment overlapping MBB.
+ BI.LiveIn = LVI->start <= Start;
+ if (!BI.LiveIn)
+ BI.Def = LVI->start;
+
+ // Find the first and last uses in the block.
+ BI.Uses = hasUses(MFI);
+ if (BI.Uses && UseI != UseE) {
+ BI.FirstUse = *UseI;
+ assert(BI.FirstUse >= Start);
+ do ++UseI;
+ while (UseI != UseE && *UseI < Stop);
+ BI.LastUse = UseI[-1];
+ assert(BI.LastUse < Stop);
+ }
- // Decrement loop count.
- MachineLoop *Loop = loops_.getLoopFor(MBB);
- if (!Loop)
- return;
- LoopCountMap::iterator li = usingLoops_.find(Loop);
- assert(li != usingLoops_.end() && "Loop missing");
- assert(li->second && "0 count in map");
- if (--li->second)
- return;
- // No more blocks in Loop.
- usingLoops_.erase(li);
-}
+ // Look for gaps in the live range.
+ bool hasGap = false;
+ BI.LiveOut = true;
+ while (LVI->end < Stop) {
+ SlotIndex LastStop = LVI->end;
+ if (++LVI == LVE || LVI->start >= Stop) {
+ BI.Kill = LastStop;
+ BI.LiveOut = false;
+ break;
+ }
+ if (LastStop < LVI->start) {
+ hasGap = true;
+ BI.Kill = LastStop;
+ BI.Def = LVI->start;
+ }
+ }
-// Get three sets of basic blocks surrounding a loop: Blocks inside the loop,
-// predecessor blocks, and exit blocks.
-void SplitAnalysis::getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks) {
- Blocks.clear();
-
- // Blocks in the loop.
- Blocks.Loop.insert(Loop->block_begin(), Loop->block_end());
-
- // Predecessor blocks.
- const MachineBasicBlock *Header = Loop->getHeader();
- for (MachineBasicBlock::const_pred_iterator I = Header->pred_begin(),
- E = Header->pred_end(); I != E; ++I)
- if (!Blocks.Loop.count(*I))
- Blocks.Preds.insert(*I);
-
- // Exit blocks.
- for (MachineLoop::block_iterator I = Loop->block_begin(),
- E = Loop->block_end(); I != E; ++I) {
- const MachineBasicBlock *MBB = *I;
- for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI)
- if (!Blocks.Loop.count(*SI))
- Blocks.Exits.insert(*SI);
- }
-}
+ // Don't set LiveThrough when the block has a gap.
+ BI.LiveThrough = !hasGap && BI.LiveIn && BI.LiveOut;
+ LiveBlocks.push_back(BI);
-/// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
-/// and around the Loop.
-SplitAnalysis::LoopPeripheralUse SplitAnalysis::
-analyzeLoopPeripheralUse(const SplitAnalysis::LoopBlocks &Blocks) {
- LoopPeripheralUse use = ContainedInLoop;
- for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
- I != E; ++I) {
- const MachineBasicBlock *MBB = I->first;
- // Is this a peripheral block?
- if (use < MultiPeripheral &&
- (Blocks.Preds.count(MBB) || Blocks.Exits.count(MBB))) {
- if (I->second > 1) use = MultiPeripheral;
- else use = SinglePeripheral;
- continue;
- }
- // Is it a loop block?
- if (Blocks.Loop.count(MBB))
- continue;
- // It must be an unrelated block.
- return OutsideLoop;
- }
- return use;
-}
+ // LVI is now at LVE or LVI->end >= Stop.
+ if (LVI == LVE)
+ break;
-/// getCriticalExits - It may be necessary to partially break critical edges
-/// leaving the loop if an exit block has phi uses of curli. Collect the exit
-/// blocks that need special treatment into CriticalExits.
-void SplitAnalysis::getCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
- BlockPtrSet &CriticalExits) {
- CriticalExits.clear();
-
- // A critical exit block contains a phi def of curli, and has a predecessor
- // that is not in the loop nor a loop predecessor.
- // For such an exit block, the edges carrying the new variable must be moved
- // to a new pre-exit block.
- for (BlockPtrSet::iterator I = Blocks.Exits.begin(), E = Blocks.Exits.end();
- I != E; ++I) {
- const MachineBasicBlock *Succ = *I;
- SlotIndex SuccIdx = lis_.getMBBStartIdx(Succ);
- VNInfo *SuccVNI = curli_->getVNInfoAt(SuccIdx);
- // This exit may not have curli live in at all. No need to split.
- if (!SuccVNI)
- continue;
- // If this is not a PHI def, it is either using a value from before the
- // loop, or a value defined inside the loop. Both are safe.
- if (!SuccVNI->isPHIDef() || SuccVNI->def.getBaseIndex() != SuccIdx)
- continue;
- // This exit block does have a PHI. Does it also have a predecessor that is
- // not a loop block or loop predecessor?
- for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
- PE = Succ->pred_end(); PI != PE; ++PI) {
- const MachineBasicBlock *Pred = *PI;
- if (Blocks.Loop.count(Pred) || Blocks.Preds.count(Pred))
- continue;
- // This is a critical exit block, and we need to split the exit edge.
- CriticalExits.insert(Succ);
+ // Live segment ends exactly at Stop. Move to the next segment.
+ if (LVI->end == Stop && ++LVI == LVE)
break;
- }
+
+ // Pick the next basic block.
+ if (LVI->start < Stop)
+ ++MFI;
+ else
+ MFI = LIS.getMBBFromIndex(LVI->start);
}
}
-/// canSplitCriticalExits - Return true if it is possible to insert new exit
-/// blocks before the blocks in CriticalExits.
-bool
-SplitAnalysis::canSplitCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
- BlockPtrSet &CriticalExits) {
- // If we don't allow critical edge splitting, require no critical exits.
- if (!AllowSplit)
- return CriticalExits.empty();
-
- for (BlockPtrSet::iterator I = CriticalExits.begin(), E = CriticalExits.end();
- I != E; ++I) {
- const MachineBasicBlock *Succ = *I;
- // We want to insert a new pre-exit MBB before Succ, and change all the
- // in-loop blocks to branch to the pre-exit instead of Succ.
- // Check that all the in-loop predecessors can be changed.
- for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
- PE = Succ->pred_end(); PI != PE; ++PI) {
- const MachineBasicBlock *Pred = *PI;
- // The external predecessors won't be altered.
- if (!Blocks.Loop.count(Pred) && !Blocks.Preds.count(Pred))
- continue;
- if (!canAnalyzeBranch(Pred))
- return false;
- }
-
- // If Succ's layout predecessor falls through, that too must be analyzable.
- // We need to insert the pre-exit block in the gap.
- MachineFunction::const_iterator MFI = Succ;
- if (MFI == mf_.begin())
- continue;
- if (!canAnalyzeBranch(--MFI))
- return false;
+void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const {
+ for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ unsigned count = UsingBlocks.lookup(*I);
+ OS << " BB#" << (*I)->getNumber();
+ if (count)
+ OS << '(' << count << ')';
}
- // No problems found.
- return true;
}
void SplitAnalysis::analyze(const LiveInterval *li) {
clear();
- curli_ = li;
+ CurLI = li;
analyzeUses();
}
-const MachineLoop *SplitAnalysis::getBestSplitLoop() {
- assert(curli_ && "Call analyze() before getBestSplitLoop");
- if (usingLoops_.empty())
- return 0;
-
- LoopPtrSet Loops, SecondLoops;
- LoopBlocks Blocks;
- BlockPtrSet CriticalExits;
-
- // Find first-class and second class candidate loops.
- // We prefer to split around loops where curli is used outside the periphery.
- for (LoopCountMap::const_iterator I = usingLoops_.begin(),
- E = usingLoops_.end(); I != E; ++I) {
- const MachineLoop *Loop = I->first;
- getLoopBlocks(Loop, Blocks);
-
- // FIXME: We need an SSA updater to properly handle multiple exit blocks.
- if (Blocks.Exits.size() > 1) {
- DEBUG(dbgs() << " multiple exits from " << *Loop);
- continue;
- }
-
- LoopPtrSet *LPS = 0;
- switch(analyzeLoopPeripheralUse(Blocks)) {
- case OutsideLoop:
- LPS = &Loops;
- break;
- case MultiPeripheral:
- LPS = &SecondLoops;
- break;
- case ContainedInLoop:
- DEBUG(dbgs() << " contained in " << *Loop);
- continue;
- case SinglePeripheral:
- DEBUG(dbgs() << " single peripheral use in " << *Loop);
- continue;
- }
- // Will it be possible to split around this loop?
- getCriticalExits(Blocks, CriticalExits);
- DEBUG(dbgs() << " " << CriticalExits.size() << " critical exits from "
- << *Loop);
- if (!canSplitCriticalExits(Blocks, CriticalExits))
- continue;
- // This is a possible split.
- assert(LPS);
- LPS->insert(Loop);
- }
-
- DEBUG(dbgs() << " getBestSplitLoop found " << Loops.size() << " + "
- << SecondLoops.size() << " candidate loops.\n");
-
- // If there are no first class loops available, look at second class loops.
- if (Loops.empty())
- Loops = SecondLoops;
- if (Loops.empty())
- return 0;
+//===----------------------------------------------------------------------===//
+// LiveIntervalMap
+//===----------------------------------------------------------------------===//
- // Pick the earliest loop.
- // FIXME: Are there other heuristics to consider?
- const MachineLoop *Best = 0;
- SlotIndex BestIdx;
- for (LoopPtrSet::const_iterator I = Loops.begin(), E = Loops.end(); I != E;
- ++I) {
- SlotIndex Idx = lis_.getMBBStartIdx((*I)->getHeader());
- if (!Best || Idx < BestIdx)
- Best = *I, BestIdx = Idx;
- }
- DEBUG(dbgs() << " getBestSplitLoop found " << *Best);
- return Best;
+// Work around the fact that the std::pair constructors are broken for pointer
+// pairs in some implementations. makeVV(x, 0) works.
+static inline std::pair<const VNInfo*, VNInfo*>
+makeVV(const VNInfo *a, VNInfo *b) {
+ return std::make_pair(a, b);
}
-/// getMultiUseBlocks - if curli has more than one use in a basic block, it
-/// may be an advantage to split curli for the duration of the block.
-bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
- // If curli is local to one block, there is no point to splitting it.
- if (usingBlocks_.size() <= 1)
- return false;
- // Add blocks with multiple uses.
- for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
- I != E; ++I)
- switch (I->second) {
- case 0:
- case 1:
- continue;
- case 2: {
- // It doesn't pay to split a 2-instr block if it redefines curli.
- VNInfo *VN1 = curli_->getVNInfoAt(lis_.getMBBStartIdx(I->first));
- VNInfo *VN2 =
- curli_->getVNInfoAt(lis_.getMBBEndIdx(I->first).getPrevIndex());
- // live-in and live-out with a different value.
- if (VN1 && VN2 && VN1 != VN2)
- continue;
- } // Fall through.
- default:
- Blocks.insert(I->first);
- }
- return !Blocks.empty();
+void LiveIntervalMap::reset(LiveInterval *li) {
+ LI = li;
+ Values.clear();
+ LiveOutCache.clear();
}
-//===----------------------------------------------------------------------===//
-// LiveIntervalMap
-//===----------------------------------------------------------------------===//
+bool LiveIntervalMap::isComplexMapped(const VNInfo *ParentVNI) const {
+ ValueMap::const_iterator i = Values.find(ParentVNI);
+ return i != Values.end() && i->second == 0;
+}
-// defValue - Introduce a li_ def for ParentVNI that could be later than
+// defValue - Introduce a LI def for ParentVNI that could be later than
// ParentVNI->def.
VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) {
+ assert(LI && "call reset first");
assert(ParentVNI && "Mapping NULL value");
assert(Idx.isValid() && "Invalid SlotIndex");
- assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
-
- // Is this a simple 1-1 mapping? Not likely.
- if (Idx == ParentVNI->def)
- return mapValue(ParentVNI, Idx);
-
- // This is a complex def. Mark with a NULL in valueMap.
- VNInfo *OldVNI =
- valueMap_.insert(
- ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))).first->second;
- // The static_cast<VNInfo *> is only needed to work around a bug in an
- // old version of the C++0x standard which the following compilers
- // implemented and have yet to fix:
- //
- // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel
- // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01
- //
- // If/When we move to C++0x, this can be replaced by nullptr.
- (void)OldVNI;
- assert(OldVNI == 0 && "Simple/Complex values mixed");
-
- // Should we insert a minimal snippet of VNI LiveRange, or can we count on
- // callers to do that? We need it for lookups of complex values.
- VNInfo *VNI = li_.getNextValue(Idx, 0, true, lis_.getVNInfoAllocator());
+ assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+
+ // Create a new value.
+ VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+
+ // Preserve the PHIDef bit.
+ if (ParentVNI->isPHIDef() && Idx == ParentVNI->def)
+ VNI->setIsPHIDef(true);
+
+ // Use insert for lookup, so we can add missing values with a second lookup.
+ std::pair<ValueMap::iterator,bool> InsP =
+ Values.insert(makeVV(ParentVNI, Idx == ParentVNI->def ? VNI : 0));
+
+ // This is now a complex def. Mark with a NULL in valueMap.
+ if (!InsP.second)
+ InsP.first->second = 0;
+
return VNI;
}
+
// mapValue - Find the mapped value for ParentVNI at Idx.
// Potentially create phi-def values.
-VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) {
+VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx,
+ bool *simple) {
+ assert(LI && "call reset first");
assert(ParentVNI && "Mapping NULL value");
assert(Idx.isValid() && "Invalid SlotIndex");
- assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+ assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
// Use insert for lookup, so we can add missing values with a second lookup.
std::pair<ValueMap::iterator,bool> InsP =
- valueMap_.insert(ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0)));
- // The static_cast<VNInfo *> is only needed to work around a bug in an
- // old version of the C++0x standard which the following compilers
- // implemented and have yet to fix:
- //
- // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel
- // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01
- //
- // If/When we move to C++0x, this can be replaced by nullptr.
+ Values.insert(makeVV(ParentVNI, 0));
// This was an unknown value. Create a simple mapping.
- if (InsP.second)
- return InsP.first->second = li_.createValueCopy(ParentVNI,
- lis_.getVNInfoAllocator());
+ if (InsP.second) {
+ if (simple) *simple = true;
+ return InsP.first->second = LI->createValueCopy(ParentVNI,
+ LIS.getVNInfoAllocator());
+ }
+
// This was a simple mapped value.
- if (InsP.first->second)
+ if (InsP.first->second) {
+ if (simple) *simple = true;
return InsP.first->second;
+ }
// This is a complex mapped value. There may be multiple defs, and we may need
// to create phi-defs.
- MachineBasicBlock *IdxMBB = lis_.getMBBFromIndex(Idx);
+ if (simple) *simple = false;
+ MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx);
assert(IdxMBB && "No MBB at Idx");
// Is there a def in the same MBB we can extend?
@@ -409,157 +270,260 @@ VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) {
// Now for the fun part. We know that ParentVNI potentially has multiple defs,
// and we may need to create even more phi-defs to preserve VNInfo SSA form.
- // Perform a depth-first search for predecessor blocks where we know the
- // dominating VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
-
- // Track MBBs where we have created or learned the dominating value.
- // This may change during the DFS as we create new phi-defs.
- typedef DenseMap<MachineBasicBlock*, VNInfo*> MBBValueMap;
- MBBValueMap DomValue;
-
- for (idf_iterator<MachineBasicBlock*>
- IDFI = idf_begin(IdxMBB),
- IDFE = idf_end(IdxMBB); IDFI != IDFE;) {
- MachineBasicBlock *MBB = *IDFI;
- SlotIndex End = lis_.getMBBEndIdx(MBB);
-
- // We are operating on the restricted CFG where ParentVNI is live.
- if (parentli_.getVNInfoAt(End.getPrevSlot()) != ParentVNI) {
- IDFI.skipChildren();
- continue;
- }
-
- // Do we have a dominating value in this block?
- VNInfo *VNI = extendTo(MBB, End);
- if (!VNI) {
- ++IDFI;
- continue;
+ // Perform a search for all predecessor blocks where we know the dominating
+ // VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
+ DEBUG(dbgs() << "\n Reaching defs for BB#" << IdxMBB->getNumber()
+ << " at " << Idx << " in " << *LI << '\n');
+
+ // Blocks where LI should be live-in.
+ SmallVector<MachineDomTreeNode*, 16> LiveIn;
+ LiveIn.push_back(MDT[IdxMBB]);
+
+ // Using LiveOutCache as a visited set, perform a BFS for all reaching defs.
+ for (unsigned i = 0; i != LiveIn.size(); ++i) {
+ MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+ // Is this a known live-out block?
+ std::pair<LiveOutMap::iterator,bool> LOIP =
+ LiveOutCache.insert(std::make_pair(Pred, LiveOutPair()));
+ // Yes, we have been here before.
+ if (!LOIP.second) {
+ DEBUG(if (VNInfo *VNI = LOIP.first->second.first)
+ dbgs() << " known valno #" << VNI->id
+ << " at BB#" << Pred->getNumber() << '\n');
+ continue;
+ }
+
+ // Does Pred provide a live-out value?
+ SlotIndex Last = LIS.getMBBEndIdx(Pred).getPrevSlot();
+ if (VNInfo *VNI = extendTo(Pred, Last)) {
+ MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(VNI->def);
+ DEBUG(dbgs() << " found valno #" << VNI->id
+ << " from BB#" << DefMBB->getNumber()
+ << " at BB#" << Pred->getNumber() << '\n');
+ LiveOutPair &LOP = LOIP.first->second;
+ LOP.first = VNI;
+ LOP.second = MDT[DefMBB];
+ continue;
+ }
+ // No, we need a live-in value for Pred as well
+ if (Pred != IdxMBB)
+ LiveIn.push_back(MDT[Pred]);
}
+ }
- // Yes, VNI dominates MBB. Track the path back to IdxMBB, creating phi-defs
- // as needed along the way.
- for (unsigned PI = IDFI.getPathLength()-1; PI != 0; --PI) {
- // Start from MBB's immediate successor. End at IdxMBB.
- MachineBasicBlock *Succ = IDFI.getPath(PI-1);
- std::pair<MBBValueMap::iterator, bool> InsP =
- DomValue.insert(MBBValueMap::value_type(Succ, VNI));
-
- // This is the first time we backtrack to Succ.
- if (InsP.second)
- continue;
-
- // We reached Succ again with the same VNI. Nothing is going to change.
- VNInfo *OVNI = InsP.first->second;
- if (OVNI == VNI)
- break;
+ // We may need to add phi-def values to preserve the SSA form.
+ // This is essentially the same iterative algorithm that SSAUpdater uses,
+ // except we already have a dominator tree, so we don't have to recompute it.
+ VNInfo *IdxVNI = 0;
+ unsigned Changes;
+ do {
+ Changes = 0;
+ DEBUG(dbgs() << " Iterating over " << LiveIn.size() << " blocks.\n");
+ // Propagate live-out values down the dominator tree, inserting phi-defs when
+ // necessary. Since LiveIn was created by a BFS, going backwards makes it more
+ // likely for us to visit immediate dominators before their children.
+ for (unsigned i = LiveIn.size(); i; --i) {
+ MachineDomTreeNode *Node = LiveIn[i-1];
+ MachineBasicBlock *MBB = Node->getBlock();
+ MachineDomTreeNode *IDom = Node->getIDom();
+ LiveOutPair IDomValue;
+ // We need a live-in value to a block with no immediate dominator?
+ // This is probably an unreachable block that has survived somehow.
+ bool needPHI = !IDom;
+
+ // Get the IDom live-out value.
+ if (!needPHI) {
+ LiveOutMap::iterator I = LiveOutCache.find(IDom->getBlock());
+ if (I != LiveOutCache.end())
+ IDomValue = I->second;
+ else
+ // If IDom is outside our set of live-out blocks, there must be new
+ // defs, and we need a phi-def here.
+ needPHI = true;
+ }
- // Succ already has a phi-def. No need to continue.
- SlotIndex Start = lis_.getMBBStartIdx(Succ);
- if (OVNI->def == Start)
- break;
+ // IDom dominates all of our predecessors, but it may not be the immediate
+ // dominator. Check if any of them have live-out values that are properly
+ // dominated by IDom. If so, we need a phi-def here.
+ if (!needPHI) {
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ LiveOutPair Value = LiveOutCache[*PI];
+ if (!Value.first || Value.first == IDomValue.first)
+ continue;
+ // This predecessor is carrying something other than IDomValue.
+ // It could be because IDomValue hasn't propagated yet, or it could be
+ // because MBB is in the dominance frontier of that value.
+ if (MDT.dominates(IDom, Value.second)) {
+ needPHI = true;
+ break;
+ }
+ }
+ }
- // We have a collision between the old and new VNI at Succ. That means
- // neither dominates and we need a new phi-def.
- VNI = li_.getNextValue(Start, 0, true, lis_.getVNInfoAllocator());
- VNI->setIsPHIDef(true);
- InsP.first->second = VNI;
-
- // Replace OVNI with VNI in the remaining path.
- for (; PI > 1 ; --PI) {
- MBBValueMap::iterator I = DomValue.find(IDFI.getPath(PI-2));
- if (I == DomValue.end() || I->second != OVNI)
- break;
- I->second = VNI;
+ // Create a phi-def if required.
+ if (needPHI) {
+ ++Changes;
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator());
+ VNI->setIsPHIDef(true);
+ DEBUG(dbgs() << " - BB#" << MBB->getNumber()
+ << " phi-def #" << VNI->id << " at " << Start << '\n');
+ // We no longer need LI to be live-in.
+ LiveIn.erase(LiveIn.begin()+(i-1));
+ // Blocks in LiveIn are either IdxMBB, or have a value live-through.
+ if (MBB == IdxMBB)
+ IdxVNI = VNI;
+ // Check if we need to update live-out info.
+ LiveOutMap::iterator I = LiveOutCache.find(MBB);
+ if (I == LiveOutCache.end() || I->second.second == Node) {
+ // We already have a live-out defined in MBB, so this must be IdxMBB.
+ assert(MBB == IdxMBB && "Adding phi-def to known live-out");
+ LI->addRange(LiveRange(Start, Idx.getNextSlot(), VNI));
+ } else {
+ // This phi-def is also live-out, so color the whole block.
+ LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+ I->second = LiveOutPair(VNI, Node);
+ }
+ } else if (IDomValue.first) {
+ // No phi-def here. Remember incoming value for IdxMBB.
+ if (MBB == IdxMBB)
+ IdxVNI = IDomValue.first;
+ // Propagate IDomValue if needed:
+ // MBB is live-out and doesn't define its own value.
+ LiveOutMap::iterator I = LiveOutCache.find(MBB);
+ if (I != LiveOutCache.end() && I->second.second != Node &&
+ I->second.first != IDomValue.first) {
+ ++Changes;
+ I->second = IDomValue;
+ DEBUG(dbgs() << " - BB#" << MBB->getNumber()
+ << " idom valno #" << IDomValue.first->id
+ << " from BB#" << IDom->getBlock()->getNumber() << '\n');
+ }
}
}
+ DEBUG(dbgs() << " - made " << Changes << " changes.\n");
+ } while (Changes);
- // No need to search the children, we found a dominating value.
- IDFI.skipChildren();
- }
+ assert(IdxVNI && "Didn't find value for Idx");
- // The search should at least find a dominating value for IdxMBB.
- assert(!DomValue.empty() && "Couldn't find a reaching definition");
+#ifndef NDEBUG
+ // Check the LiveOutCache invariants.
+ for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end();
+ I != E; ++I) {
+ assert(I->first && "Null MBB entry in cache");
+ assert(I->second.first && "Null VNInfo in cache");
+ assert(I->second.second && "Null DomTreeNode in cache");
+ if (I->second.second->getBlock() == I->first)
+ continue;
+ for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(),
+ PE = I->first->pred_end(); PI != PE; ++PI)
+ assert(LiveOutCache.lookup(*PI) == I->second && "Bad invariant");
+ }
+#endif
- // Since we went through the trouble of a full DFS visiting all reaching defs,
- // the values in DomValue are now accurate. No more phi-defs are needed for
- // these blocks, so we can color the live ranges.
+ // Since we went through the trouble of a full BFS visiting all reaching defs,
+ // the values in LiveIn are now accurate. No more phi-defs are needed
+ // for these blocks, so we can color the live ranges.
// This makes the next mapValue call much faster.
- VNInfo *IdxVNI = 0;
- for (MBBValueMap::iterator I = DomValue.begin(), E = DomValue.end(); I != E;
- ++I) {
- MachineBasicBlock *MBB = I->first;
- VNInfo *VNI = I->second;
- SlotIndex Start = lis_.getMBBStartIdx(MBB);
- if (MBB == IdxMBB) {
- // Don't add full liveness to IdxMBB, stop at Idx.
- if (Start != Idx)
- li_.addRange(LiveRange(Start, Idx, VNI));
- // The caller had better add some liveness to IdxVNI, or it leaks.
- IdxVNI = VNI;
- } else
- li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI));
+ for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ VNInfo *VNI = LiveOutCache.lookup(MBB).first;
+
+ // Anything in LiveIn other than IdxMBB is live-through.
+ // In IdxMBB, we should stop at Idx unless the same value is live-out.
+ if (MBB == IdxMBB && IdxVNI != VNI)
+ LI->addRange(LiveRange(Start, Idx.getNextSlot(), IdxVNI));
+ else
+ LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
}
- assert(IdxVNI && "Didn't find value for Idx");
return IdxVNI;
}
-// extendTo - Find the last li_ value defined in MBB at or before Idx. The
-// parentli_ is assumed to be live at Idx. Extend the live range to Idx.
+#ifndef NDEBUG
+void LiveIntervalMap::dumpCache() {
+ for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end();
+ I != E; ++I) {
+ assert(I->first && "Null MBB entry in cache");
+ assert(I->second.first && "Null VNInfo in cache");
+ assert(I->second.second && "Null DomTreeNode in cache");
+ dbgs() << " cache: BB#" << I->first->getNumber()
+ << " has valno #" << I->second.first->id << " from BB#"
+ << I->second.second->getBlock()->getNumber() << ", preds";
+ for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(),
+ PE = I->first->pred_end(); PI != PE; ++PI)
+ dbgs() << " BB#" << (*PI)->getNumber();
+ dbgs() << '\n';
+ }
+ dbgs() << " cache: " << LiveOutCache.size() << " entries.\n";
+}
+#endif
+
+// extendTo - Find the last LI value defined in MBB at or before Idx. The
+// ParentLI is assumed to be live at Idx. Extend the live range to Idx.
// Return the found VNInfo, or NULL.
-VNInfo *LiveIntervalMap::extendTo(MachineBasicBlock *MBB, SlotIndex Idx) {
- LiveInterval::iterator I = std::upper_bound(li_.begin(), li_.end(), Idx);
- if (I == li_.begin())
+VNInfo *LiveIntervalMap::extendTo(const MachineBasicBlock *MBB, SlotIndex Idx) {
+ assert(LI && "call reset first");
+ LiveInterval::iterator I = std::upper_bound(LI->begin(), LI->end(), Idx);
+ if (I == LI->begin())
return 0;
--I;
- if (I->start < lis_.getMBBStartIdx(MBB))
+ if (I->end <= LIS.getMBBStartIdx(MBB))
return 0;
- if (I->end < Idx)
- I->end = Idx;
+ if (I->end <= Idx)
+ I->end = Idx.getNextSlot();
return I->valno;
}
-// addSimpleRange - Add a simple range from parentli_ to li_.
+// addSimpleRange - Add a simple range from ParentLI to LI.
// ParentVNI must be live in the [Start;End) interval.
void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End,
const VNInfo *ParentVNI) {
- VNInfo *VNI = mapValue(ParentVNI, Start);
- // A simple mappoing is easy.
- if (VNI->def == ParentVNI->def) {
- li_.addRange(LiveRange(Start, End, VNI));
+ assert(LI && "call reset first");
+ bool simple;
+ VNInfo *VNI = mapValue(ParentVNI, Start, &simple);
+ // A simple mapping is easy.
+ if (simple) {
+ LI->addRange(LiveRange(Start, End, VNI));
return;
}
// ParentVNI is a complex value. We must map per MBB.
- MachineFunction::iterator MBB = lis_.getMBBFromIndex(Start);
- MachineFunction::iterator MBBE = lis_.getMBBFromIndex(End);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ MachineFunction::iterator MBBE = LIS.getMBBFromIndex(End.getPrevSlot());
if (MBB == MBBE) {
- li_.addRange(LiveRange(Start, End, VNI));
+ LI->addRange(LiveRange(Start, End, VNI));
return;
}
// First block.
- li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI));
+ LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
// Run sequence of full blocks.
for (++MBB; MBB != MBBE; ++MBB) {
- Start = lis_.getMBBStartIdx(MBB);
- li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB),
- mapValue(ParentVNI, Start)));
+ Start = LIS.getMBBStartIdx(MBB);
+ LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB),
+ mapValue(ParentVNI, Start)));
}
// Final block.
- Start = lis_.getMBBStartIdx(MBB);
+ Start = LIS.getMBBStartIdx(MBB);
if (Start != End)
- li_.addRange(LiveRange(Start, End, mapValue(ParentVNI, Start)));
+ LI->addRange(LiveRange(Start, End, mapValue(ParentVNI, Start)));
}
-/// addRange - Add live ranges to li_ where [Start;End) intersects parentli_.
+/// addRange - Add live ranges to LI where [Start;End) intersects ParentLI.
/// All needed values whose def is not inside [Start;End) must be defined
/// beforehand so mapValue will work.
void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) {
- LiveInterval::const_iterator B = parentli_.begin(), E = parentli_.end();
+ assert(LI && "call reset first");
+ LiveInterval::const_iterator B = ParentLI.begin(), E = ParentLI.end();
LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
// Check if --I begins before Start and overlaps.
@@ -575,403 +539,374 @@ void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) {
addSimpleRange(I->start, std::min(End, I->end), I->valno);
}
+
//===----------------------------------------------------------------------===//
// Split Editor
//===----------------------------------------------------------------------===//
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
-SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm,
- SmallVectorImpl<LiveInterval*> &intervals)
- : sa_(sa), lis_(lis), vrm_(vrm),
- mri_(vrm.getMachineFunction().getRegInfo()),
- tii_(*vrm.getMachineFunction().getTarget().getInstrInfo()),
- curli_(sa_.getCurLI()),
- dupli_(0), openli_(0),
- intervals_(intervals),
- firstInterval(intervals_.size())
+SplitEditor::SplitEditor(SplitAnalysis &sa,
+ LiveIntervals &lis,
+ VirtRegMap &vrm,
+ MachineDominatorTree &mdt,
+ LiveRangeEdit &edit)
+ : SA(sa), LIS(lis), VRM(vrm),
+ MRI(vrm.getMachineFunction().getRegInfo()),
+ MDT(mdt),
+ TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+ TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
+ Edit(edit),
+ OpenIdx(0),
+ RegAssign(Allocator)
{
- assert(curli_ && "SplitEditor created from empty SplitAnalysis");
-
- // Make sure curli_ is assigned a stack slot, so all our intervals get the
- // same slot as curli_.
- if (vrm_.getStackSlot(curli_->reg) == VirtRegMap::NO_STACK_SLOT)
- vrm_.assignVirt2StackSlot(curli_->reg);
-
+ // We don't need an AliasAnalysis since we will only be performing
+ // cheap-as-a-copy remats anyway.
+ Edit.anyRematerializable(LIS, TII, 0);
}
-LiveInterval *SplitEditor::createInterval() {
- unsigned curli = sa_.getCurLI()->reg;
- unsigned Reg = mri_.createVirtualRegister(mri_.getRegClass(curli));
- LiveInterval &Intv = lis_.getOrCreateInterval(Reg);
- vrm_.grow();
- vrm_.assignVirt2StackSlot(Reg, vrm_.getStackSlot(curli));
- return &Intv;
+void SplitEditor::dump() const {
+ if (RegAssign.empty()) {
+ dbgs() << " empty\n";
+ return;
+ }
+
+ for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+ dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+ dbgs() << '\n';
}
-LiveInterval *SplitEditor::getDupLI() {
- if (!dupli_) {
- // Create an interval for dupli that is a copy of curli.
- dupli_ = createInterval();
- dupli_->Copy(*curli_, &mri_, lis_.getVNInfoAllocator());
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineInstr *CopyMI = 0;
+ SlotIndex Def;
+ LiveInterval *LI = Edit.get(RegIdx);
+
+ // Attempt cheap-as-a-copy rematerialization.
+ LiveRangeEdit::Remat RM(ParentVNI);
+ if (Edit.canRematerializeAt(RM, UseIdx, true, LIS)) {
+ Def = Edit.rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI);
+ } else {
+ // Can't remat, just insert a copy from parent.
+ CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
+ .addReg(Edit.getReg());
+ Def = LIS.InsertMachineInstrInMaps(CopyMI).getDefIndex();
}
- return dupli_;
-}
-VNInfo *SplitEditor::mapValue(const VNInfo *curliVNI) {
- VNInfo *&VNI = valueMap_[curliVNI];
- if (!VNI)
- VNI = openli_->createValueCopy(curliVNI, lis_.getVNInfoAllocator());
- return VNI;
-}
+ // Define the value in Reg.
+ VNInfo *VNI = LIMappers[RegIdx].defValue(ParentVNI, Def);
+ VNI->setCopy(CopyMI);
-/// Insert a COPY instruction curli -> li. Allocate a new value from li
-/// defined by the COPY. Note that rewrite() will deal with the curli
-/// register, so this function can be used to copy from any interval - openli,
-/// curli, or dupli.
-VNInfo *SplitEditor::insertCopy(LiveInterval &LI,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
- MachineInstr *MI = BuildMI(MBB, I, DebugLoc(), tii_.get(TargetOpcode::COPY),
- LI.reg).addReg(curli_->reg);
- SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
- return LI.getNextValue(DefIdx, MI, true, lis_.getVNInfoAllocator());
+ // Add minimal liveness for the new value.
+ Edit.get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+ return VNI;
}
/// Create a new virtual register and live interval.
void SplitEditor::openIntv() {
- assert(!openli_ && "Previous LI not closed before openIntv");
- openli_ = createInterval();
- intervals_.push_back(openli_);
- liveThrough_ = false;
-}
+ assert(!OpenIdx && "Previous LI not closed before openIntv");
-/// enterIntvBefore - Enter openli before the instruction at Idx. If curli is
-/// not live before Idx, a COPY is not inserted.
-void SplitEditor::enterIntvBefore(SlotIndex Idx) {
- assert(openli_ && "openIntv not called before enterIntvBefore");
-
- // Copy from curli_ if it is live.
- if (VNInfo *CurVNI = curli_->getVNInfoAt(Idx.getUseIndex())) {
- MachineInstr *MI = lis_.getInstructionFromIndex(Idx);
- assert(MI && "enterIntvBefore called with invalid index");
- VNInfo *VNI = insertCopy(*openli_, *MI->getParent(), MI);
- openli_->addRange(LiveRange(VNI->def, Idx.getDefIndex(), VNI));
-
- // Make sure CurVNI is properly mapped.
- VNInfo *&mapVNI = valueMap_[CurVNI];
- // We dont have SSA update yet, so only one entry per value is allowed.
- assert(!mapVNI && "enterIntvBefore called more than once for the same value");
- mapVNI = VNI;
+ // Create the complement as index 0.
+ if (Edit.empty()) {
+ Edit.create(MRI, LIS, VRM);
+ LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent()));
+ LIMappers.back().reset(Edit.get(0));
}
- DEBUG(dbgs() << " enterIntvBefore " << Idx << ": " << *openli_ << '\n');
-}
-/// enterIntvAtEnd - Enter openli at the end of MBB.
-/// PhiMBB is a successor inside openli where a PHI value is created.
-/// Currently, all entries must share the same PhiMBB.
-void SplitEditor::enterIntvAtEnd(MachineBasicBlock &A, MachineBasicBlock &B) {
- assert(openli_ && "openIntv not called before enterIntvAtEnd");
-
- SlotIndex EndA = lis_.getMBBEndIdx(&A);
- VNInfo *CurVNIA = curli_->getVNInfoAt(EndA.getPrevIndex());
- if (!CurVNIA) {
- DEBUG(dbgs() << " enterIntvAtEnd, curli not live out of BB#"
- << A.getNumber() << ".\n");
- return;
- }
+ // Create the open interval.
+ OpenIdx = Edit.size();
+ Edit.create(MRI, LIS, VRM);
+ LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent()));
+ LIMappers[OpenIdx].reset(Edit.get(OpenIdx));
+}
- // Add a phi kill value and live range out of A.
- VNInfo *VNIA = insertCopy(*openli_, A, A.getFirstTerminator());
- openli_->addRange(LiveRange(VNIA->def, EndA, VNIA));
-
- // FIXME: If this is the only entry edge, we don't need the extra PHI value.
- // FIXME: If there are multiple entry blocks (so not a loop), we need proper
- // SSA update.
-
- // Now look at the start of B.
- SlotIndex StartB = lis_.getMBBStartIdx(&B);
- SlotIndex EndB = lis_.getMBBEndIdx(&B);
- const LiveRange *CurB = curli_->getLiveRangeContaining(StartB);
- if (!CurB) {
- DEBUG(dbgs() << " enterIntvAtEnd: curli not live in to BB#"
- << B.getNumber() << ".\n");
- return;
+SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvBefore");
+ DEBUG(dbgs() << " enterIntvBefore " << Idx);
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx;
}
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvBefore called with invalid index");
- VNInfo *VNIB = openli_->getVNInfoAt(StartB);
- if (!VNIB) {
- // Create a phi value.
- VNIB = openli_->getNextValue(SlotIndex(StartB, true), 0, false,
- lis_.getVNInfoAllocator());
- VNIB->setIsPHIDef(true);
- VNInfo *&mapVNI = valueMap_[CurB->valno];
- if (mapVNI) {
- // Multiple copies - must create PHI value.
- abort();
- } else {
- // This is the first copy of dupLR. Mark the mapping.
- mapVNI = VNIB;
- }
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
+ SlotIndex End = LIS.getMBBEndIdx(&MBB);
+ SlotIndex Last = End.getPrevSlot();
+ DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+ VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Last);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return End;
}
-
- DEBUG(dbgs() << " enterIntvAtEnd: " << *openli_ << '\n');
+ DEBUG(dbgs() << ": valno " << ParentVNI->id);
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
+ LIS.getLastSplitPoint(Edit.getParent(), &MBB));
+ RegAssign.insert(VNI->def, End, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
}
-/// useIntv - indicate that all instructions in MBB should use openli.
+/// useIntv - indicate that all instructions in MBB should use OpenLI.
void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
- useIntv(lis_.getMBBStartIdx(&MBB), lis_.getMBBEndIdx(&MBB));
+ useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB));
}
void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
- assert(openli_ && "openIntv not called before useIntv");
+ assert(OpenIdx && "openIntv not called before useIntv");
+ DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
+}
- // Map the curli values from the interval into openli_
- LiveInterval::const_iterator B = curli_->begin(), E = curli_->end();
- LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
+SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAfter");
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx);
- if (I != B) {
- --I;
- // I begins before Start, but overlaps.
- if (I->end > Start)
- openli_->addRange(LiveRange(Start, std::min(End, I->end),
- mapValue(I->valno)));
- ++I;
+ // The interval must be live beyond the instruction at Idx.
+ Idx = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx.getNextSlot();
}
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
- // The remaining ranges begin after Start.
- for (;I != E && I->start < End; ++I)
- openli_->addRange(LiveRange(I->start, std::min(End, I->end),
- mapValue(I->valno)));
- DEBUG(dbgs() << " use [" << Start << ';' << End << "): " << *openli_
- << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "No instruction at index");
+ VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(),
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
}
-/// leaveIntvAfter - Leave openli after the instruction at Idx.
-void SplitEditor::leaveIntvAfter(SlotIndex Idx) {
- assert(openli_ && "openIntv not called before leaveIntvAfter");
+SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvBefore");
+ DEBUG(dbgs() << " leaveIntvBefore " << Idx);
- const LiveRange *CurLR = curli_->getLiveRangeContaining(Idx.getDefIndex());
- if (!CurLR || CurLR->end <= Idx.getBoundaryIndex()) {
- DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": not live\n");
- return;
+ // The interval must be live into the instruction at Idx.
+ Idx = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx.getNextSlot();
}
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
- // Was this value of curli live through openli?
- if (!openli_->liveAt(CurLR->valno->def)) {
- DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": using external value\n");
- liveThrough_ = true;
- return;
- }
-
- // We are going to insert a back copy, so we must have a dupli_.
- LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Idx.getDefIndex());
- assert(DupLR && "dupli not live into black, but curli is?");
-
- // Insert the COPY instruction.
- MachineBasicBlock::iterator I = lis_.getInstructionFromIndex(Idx);
- MachineInstr *MI = BuildMI(*I->getParent(), llvm::next(I), I->getDebugLoc(),
- tii_.get(TargetOpcode::COPY), dupli_->reg)
- .addReg(openli_->reg);
- SlotIndex CopyIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
- openli_->addRange(LiveRange(Idx.getDefIndex(), CopyIdx,
- mapValue(CurLR->valno)));
- DupLR->valno->def = CopyIdx;
- DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": " << *openli_ << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "No instruction at index");
+ VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
}
-/// leaveIntvAtTop - Leave the interval at the top of MBB.
-/// Currently, only one value can leave the interval.
-void SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
- assert(openli_ && "openIntv not called before leaveIntvAtTop");
-
- SlotIndex Start = lis_.getMBBStartIdx(&MBB);
- const LiveRange *CurLR = curli_->getLiveRangeContaining(Start);
-
- // Is curli even live-in to MBB?
- if (!CurLR) {
- DEBUG(dbgs() << " leaveIntvAtTop at " << Start << ": not live\n");
- return;
- }
-
- // Is curli defined by PHI at the beginning of MBB?
- bool isPHIDef = CurLR->valno->isPHIDef() &&
- CurLR->valno->def.getBaseIndex() == Start;
+SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
+ SlotIndex Start = LIS.getMBBStartIdx(&MBB);
+ DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
- // If MBB is using a value of curli that was defined outside the openli range,
- // we don't want to copy it back here.
- if (!isPHIDef && !openli_->liveAt(CurLR->valno->def)) {
- DEBUG(dbgs() << " leaveIntvAtTop at " << Start
- << ": using external value\n");
- liveThrough_ = true;
- return;
+ VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Start);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Start;
}
- // We are going to insert a back copy, so we must have a dupli_.
- LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Start);
- assert(DupLR && "dupli not live into black, but curli is?");
-
- // Insert the COPY instruction.
- MachineInstr *MI = BuildMI(MBB, MBB.begin(), DebugLoc(),
- tii_.get(TargetOpcode::COPY), dupli_->reg)
- .addReg(openli_->reg);
- SlotIndex Idx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
-
- // Adjust dupli and openli values.
- if (isPHIDef) {
- // dupli was already a PHI on entry to MBB. Simply insert an openli PHI,
- // and shift the dupli def down to the COPY.
- VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
- lis_.getVNInfoAllocator());
- VNI->setIsPHIDef(true);
- openli_->addRange(LiveRange(VNI->def, Idx, VNI));
-
- dupli_->removeRange(Start, Idx);
- DupLR->valno->def = Idx;
- DupLR->valno->setIsPHIDef(false);
- } else {
- // The dupli value was defined somewhere inside the openli range.
- DEBUG(dbgs() << " leaveIntvAtTop source value defined at "
- << DupLR->valno->def << "\n");
- // FIXME: We may not need a PHI here if all predecessors have the same
- // value.
- VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
- lis_.getVNInfoAllocator());
- VNI->setIsPHIDef(true);
- openli_->addRange(LiveRange(VNI->def, Idx, VNI));
-
- // FIXME: What if DupLR->valno is used by multiple exits? SSA Update.
-
- // closeIntv is going to remove the superfluous live ranges.
- DupLR->valno->def = Idx;
- DupLR->valno->setIsPHIDef(false);
- }
+ VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
+ MBB.SkipPHIsAndLabels(MBB.begin()));
+ RegAssign.insert(Start, VNI->def, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
+}
- DEBUG(dbgs() << " leaveIntvAtTop at " << Idx << ": " << *openli_ << '\n');
+void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before overlapIntv");
+ assert(Edit.getParent().getVNInfoAt(Start) ==
+ Edit.getParent().getVNInfoAt(End.getPrevSlot()) &&
+ "Parent changes value in extended range");
+ assert(Edit.get(0)->getVNInfoAt(Start) && "Start must come from leaveIntv*");
+ assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
+ "Range cannot span basic blocks");
+
+ // Treat this as useIntv() for now. The complement interval will be extended
+ // as needed by mapValue().
+ DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
}
/// closeIntv - Indicate that we are done editing the currently open
/// LiveInterval, and ranges can be trimmed.
void SplitEditor::closeIntv() {
- assert(openli_ && "openIntv not called before closeIntv");
-
- DEBUG(dbgs() << " closeIntv cleaning up\n");
- DEBUG(dbgs() << " open " << *openli_ << '\n');
-
- if (liveThrough_) {
- DEBUG(dbgs() << " value live through region, leaving dupli as is.\n");
- } else {
- // live out with copies inserted, or killed by region. Either way we need to
- // remove the overlapping region from dupli.
- getDupLI();
- for (LiveInterval::iterator I = openli_->begin(), E = openli_->end();
- I != E; ++I) {
- dupli_->removeRange(I->start, I->end);
- }
- // FIXME: A block branching to the entry block may also branch elsewhere
- // curli is live. We need both openli and curli to be live in that case.
- DEBUG(dbgs() << " dup2 " << *dupli_ << '\n');
- }
- openli_ = 0;
- valueMap_.clear();
+ assert(OpenIdx && "openIntv not called before closeIntv");
+ OpenIdx = 0;
}
-/// rewrite - after all the new live ranges have been created, rewrite
-/// instructions using curli to use the new intervals.
-void SplitEditor::rewrite() {
- assert(!openli_ && "Previous LI not closed before rewrite");
- const LiveInterval *curli = sa_.getCurLI();
- for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(curli->reg),
- RE = mri_.reg_end(); RI != RE;) {
+/// rewriteAssigned - Rewrite all uses of Edit.getReg().
+void SplitEditor::rewriteAssigned() {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit.getReg()),
+ RE = MRI.reg_end(); RI != RE;) {
MachineOperand &MO = RI.getOperand();
MachineInstr *MI = MO.getParent();
++RI;
+ // LiveDebugVariables should have handled all DBG_VALUE instructions.
if (MI->isDebugValue()) {
DEBUG(dbgs() << "Zapping " << *MI);
- // FIXME: We can do much better with debug values.
MO.setReg(0);
continue;
}
- SlotIndex Idx = lis_.getInstructionIndex(MI);
- Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
- LiveInterval *LI = dupli_;
- for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
- LiveInterval *testli = intervals_[i];
- if (testli->liveAt(Idx)) {
- LI = testli;
- break;
- }
- }
- if (LI) {
- MO.setReg(LI->reg);
- sa_.removeUse(MI);
- DEBUG(dbgs() << " rewrite " << Idx << '\t' << *MI);
- }
- }
- // dupli_ goes in last, after rewriting.
- if (dupli_) {
- if (dupli_->empty()) {
- DEBUG(dbgs() << " dupli became empty?\n");
- lis_.removeInterval(dupli_->reg);
- dupli_ = 0;
- } else {
- dupli_->RenumberValues(lis_);
- intervals_.push_back(dupli_);
+ // <undef> operands don't really read the register, so just assign them to
+ // the complement.
+ if (MO.isUse() && MO.isUndef()) {
+ MO.setReg(Edit.get(0)->reg);
+ continue;
}
+
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+
+ // Rewrite to the mapped register at Idx.
+ unsigned RegIdx = RegAssign.lookup(Idx);
+ MO.setReg(Edit.get(RegIdx)->reg);
+ DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
+ << Idx << ':' << RegIdx << '\t' << *MI);
+
+ // Extend liveness to Idx.
+ const VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+ LIMappers[RegIdx].mapValue(ParentVNI, Idx);
}
+}
- // Calculate spill weight and allocation hints for new intervals.
- VirtRegAuxInfo vrai(vrm_.getMachineFunction(), lis_, sa_.loops_);
- for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
- LiveInterval &li = *intervals_[i];
- vrai.CalculateRegClass(li.reg);
- vrai.CalculateWeightAndHint(li);
- DEBUG(dbgs() << " new interval " << mri_.getRegClass(li.reg)->getName()
- << ":" << li << '\n');
+/// rewriteSplit - Rewrite uses of Intvs[0] according to the ConEQ mapping.
+void SplitEditor::rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs,
+ const ConnectedVNInfoEqClasses &ConEq) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Intvs[0]->reg),
+ RE = MRI.reg_end(); RI != RE;) {
+ MachineOperand &MO = RI.getOperand();
+ MachineInstr *MI = MO.getParent();
+ ++RI;
+ if (MO.isUse() && MO.isUndef())
+ continue;
+ // DBG_VALUE instructions should have been eliminated earlier.
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+ DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
+ << Idx << ':');
+ const VNInfo *VNI = Intvs[0]->getVNInfoAt(Idx);
+ assert(VNI && "Interval not live at use.");
+ MO.setReg(Intvs[ConEq.getEqClass(VNI)]->reg);
+ DEBUG(dbgs() << VNI->id << '\t' << *MI);
}
}
+void SplitEditor::finish() {
+ assert(OpenIdx == 0 && "Previous LI not closed before rewrite");
-//===----------------------------------------------------------------------===//
-// Loop Splitting
-//===----------------------------------------------------------------------===//
+ // At this point, the live intervals in Edit contain VNInfos corresponding to
+ // the inserted copies.
-bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) {
- SplitAnalysis::LoopBlocks Blocks;
- sa_.getLoopBlocks(Loop, Blocks);
+ // Add the original defs from the parent interval.
+ for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(),
+ E = Edit.getParent().vni_end(); I != E; ++I) {
+ const VNInfo *ParentVNI = *I;
+ if (ParentVNI->isUnused())
+ continue;
+ LiveIntervalMap &LIM = LIMappers[RegAssign.lookup(ParentVNI->def)];
+ VNInfo *VNI = LIM.defValue(ParentVNI, ParentVNI->def);
+ LIM.getLI()->addRange(LiveRange(ParentVNI->def,
+ ParentVNI->def.getNextSlot(), VNI));
+ // Mark all values as complex to force liveness computation.
+ // This should really only be necessary for remat victims, but we are lazy.
+ LIM.markComplexMapped(ParentVNI);
+ }
- // Break critical edges as needed.
- SplitAnalysis::BlockPtrSet CriticalExits;
- sa_.getCriticalExits(Blocks, CriticalExits);
- assert(CriticalExits.empty() && "Cannot break critical exits yet");
+#ifndef NDEBUG
+ // Every new interval must have a def by now, otherwise the split is bogus.
+ for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I)
+ assert((*I)->hasAtLeastOneValue() && "Split interval has no value");
+#endif
+
+ // FIXME: Don't recompute the liveness of all values, infer it from the
+ // overlaps between the parent live interval and RegAssign.
+ // The mapValue algorithm is only necessary when:
+ // - The parent value maps to multiple defs, and new phis are needed, or
+ // - The value has been rematerialized before some uses, and we want to
+ // minimize the live range so it only reaches the remaining uses.
+ // All other values have simple liveness that can be computed from RegAssign
+ // and the parent live interval.
+
+ // Extend live ranges to be live-out for successor PHI values.
+ for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(),
+ E = Edit.getParent().vni_end(); I != E; ++I) {
+ const VNInfo *PHIVNI = *I;
+ if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+ LiveIntervalMap &LIM = LIMappers[RegIdx];
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
+ DEBUG(dbgs() << " map phi in BB#" << MBB->getNumber() << '@' << PHIVNI->def
+ << " -> " << RegIdx << '\n');
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot();
+ DEBUG(dbgs() << " pred BB#" << (*PI)->getNumber() << '@' << End);
+ // The predecessor may not have a live-out value. That is OK, like an
+ // undef PHI operand.
+ if (VNInfo *VNI = Edit.getParent().getVNInfoAt(End)) {
+ DEBUG(dbgs() << " has parent valno #" << VNI->id << " live out\n");
+ assert(RegAssign.lookup(End) == RegIdx &&
+ "Different register assignment in phi predecessor");
+ LIM.mapValue(VNI, End);
+ }
+ else
+ DEBUG(dbgs() << " is not live-out\n");
+ }
+ DEBUG(dbgs() << " " << *LIM.getLI() << '\n');
+ }
- // Create new live interval for the loop.
- openIntv();
+ // Rewrite instructions.
+ rewriteAssigned();
- // Insert copies in the predecessors.
- for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Preds.begin(),
- E = Blocks.Preds.end(); I != E; ++I) {
- MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
- enterIntvAtEnd(MBB, *Loop->getHeader());
- }
+ // FIXME: Delete defs that were rematted everywhere.
- // Switch all loop blocks.
- for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Loop.begin(),
- E = Blocks.Loop.end(); I != E; ++I)
- useIntv(**I);
+ // Get rid of unused values and set phi-kill flags.
+ for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I)
+ (*I)->RenumberValues(LIS);
- // Insert back copies in the exit blocks.
- for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Exits.begin(),
- E = Blocks.Exits.end(); I != E; ++I) {
- MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
- leaveIntvAtTop(MBB);
+ // Now check if any registers were separated into multiple components.
+ ConnectedVNInfoEqClasses ConEQ(LIS);
+ for (unsigned i = 0, e = Edit.size(); i != e; ++i) {
+ // Don't use iterators, they are invalidated by create() below.
+ LiveInterval *li = Edit.get(i);
+ unsigned NumComp = ConEQ.Classify(li);
+ if (NumComp <= 1)
+ continue;
+ DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n');
+ SmallVector<LiveInterval*, 8> dups;
+ dups.push_back(li);
+ for (unsigned i = 1; i != NumComp; ++i)
+ dups.push_back(&Edit.create(MRI, LIS, VRM));
+ rewriteComponents(dups, ConEQ);
+ ConEQ.Distribute(&dups[0]);
}
- // Done.
- closeIntv();
- rewrite();
- return dupli_;
+ // Calculate spill weight and allocation hints for new intervals.
+ VirtRegAuxInfo vrai(VRM.getMachineFunction(), LIS, SA.Loops);
+ for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I){
+ LiveInterval &li = **I;
+ vrai.CalculateRegClass(li.reg);
+ vrai.CalculateWeightAndHint(li);
+ DEBUG(dbgs() << " new interval " << MRI.getRegClass(li.reg)->getName()
+ << ":" << li << '\n');
+ }
}
@@ -979,45 +914,50 @@ bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) {
// Single Block Splitting
//===----------------------------------------------------------------------===//
-/// splitSingleBlocks - Split curli into a separate live interval inside each
-/// basic block in Blocks. Return true if curli has been completely replaced,
-/// false if curli is still intact, and needs to be spilled or split further.
-bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
- DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n");
- // Determine the first and last instruction using curli in each block.
- typedef std::pair<SlotIndex,SlotIndex> IndexPair;
- typedef DenseMap<const MachineBasicBlock*,IndexPair> IndexPairMap;
- IndexPairMap MBBRange;
- for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(),
- E = sa_.usingInstrs_.end(); I != E; ++I) {
- const MachineBasicBlock *MBB = (*I)->getParent();
- if (!Blocks.count(MBB))
+/// getMultiUseBlocks - if CurLI has more than one use in a basic block, it
+/// may be an advantage to split CurLI for the duration of the block.
+bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
+ // If CurLI is local to one block, there is no point to splitting it.
+ if (LiveBlocks.size() <= 1)
+ return false;
+ // Add blocks with multiple uses.
+ for (unsigned i = 0, e = LiveBlocks.size(); i != e; ++i) {
+ const BlockInfo &BI = LiveBlocks[i];
+ if (!BI.Uses)
continue;
- SlotIndex Idx = lis_.getInstructionIndex(*I);
- DEBUG(dbgs() << " BB#" << MBB->getNumber() << '\t' << Idx << '\t' << **I);
- IndexPair &IP = MBBRange[MBB];
- if (!IP.first.isValid() || Idx < IP.first)
- IP.first = Idx;
- if (!IP.second.isValid() || Idx > IP.second)
- IP.second = Idx;
+ unsigned Instrs = UsingBlocks.lookup(BI.MBB);
+ if (Instrs <= 1)
+ continue;
+ if (Instrs == 2 && BI.LiveIn && BI.LiveOut && !BI.LiveThrough)
+ continue;
+ Blocks.insert(BI.MBB);
}
+ return !Blocks.empty();
+}
+
+/// splitSingleBlocks - Split CurLI into a separate live interval inside each
+/// basic block in Blocks.
+void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
+ DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n");
- // Create a new interval for each block.
- for (SplitAnalysis::BlockPtrSet::const_iterator I = Blocks.begin(),
- E = Blocks.end(); I != E; ++I) {
- IndexPair &IP = MBBRange[*I];
- DEBUG(dbgs() << " splitting for BB#" << (*I)->getNumber() << ": ["
- << IP.first << ';' << IP.second << ")\n");
- assert(IP.first.isValid() && IP.second.isValid());
+ for (unsigned i = 0, e = SA.LiveBlocks.size(); i != e; ++i) {
+ const SplitAnalysis::BlockInfo &BI = SA.LiveBlocks[i];
+ if (!BI.Uses || !Blocks.count(BI.MBB))
+ continue;
openIntv();
- enterIntvBefore(IP.first);
- useIntv(IP.first.getBaseIndex(), IP.second.getBoundaryIndex());
- leaveIntvAfter(IP.second);
+ SlotIndex SegStart = enterIntvBefore(BI.FirstUse);
+ if (BI.LastUse < BI.LastSplitPoint) {
+ useIntv(SegStart, leaveIntvAfter(BI.LastUse));
+ } else {
+ // THe last use os after tha last valid split point.
+ SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint);
+ useIntv(SegStart, SegStop);
+ overlapIntv(SegStop, BI.LastUse);
+ }
closeIntv();
}
- rewrite();
- return dupli_;
+ finish();
}
@@ -1025,31 +965,29 @@ bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
// Sub Block Splitting
//===----------------------------------------------------------------------===//
-/// getBlockForInsideSplit - If curli is contained inside a single basic block,
+/// getBlockForInsideSplit - If CurLI is contained inside a single basic block,
/// and it wou pay to subdivide the interval inside that block, return it.
/// Otherwise return NULL. The returned block can be passed to
/// SplitEditor::splitInsideBlock.
const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() {
// The interval must be exclusive to one block.
- if (usingBlocks_.size() != 1)
+ if (UsingBlocks.size() != 1)
return 0;
// Don't to this for less than 4 instructions. We want to be sure that
// splitting actually reduces the instruction count per interval.
- if (usingInstrs_.size() < 4)
+ if (UsingInstrs.size() < 4)
return 0;
- return usingBlocks_.begin()->first;
+ return UsingBlocks.begin()->first;
}
-/// splitInsideBlock - Split curli into multiple intervals inside MBB. Return
-/// true if curli has been completely replaced, false if curli is still
-/// intact, and needs to be spilled or split further.
-bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
+/// splitInsideBlock - Split CurLI into multiple intervals inside MBB.
+void SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
SmallVector<SlotIndex, 32> Uses;
- Uses.reserve(sa_.usingInstrs_.size());
- for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(),
- E = sa_.usingInstrs_.end(); I != E; ++I)
+ Uses.reserve(SA.UsingInstrs.size());
+ for (SplitAnalysis::InstrPtrSet::const_iterator I = SA.UsingInstrs.begin(),
+ E = SA.UsingInstrs.end(); I != E; ++I)
if ((*I)->getParent() == MBB)
- Uses.push_back(lis_.getInstructionIndex(*I));
+ Uses.push_back(LIS.getInstructionIndex(*I));
DEBUG(dbgs() << " splitInsideBlock BB#" << MBB->getNumber() << " for "
<< Uses.size() << " instructions.\n");
assert(Uses.size() >= 3 && "Need at least 3 instructions");
@@ -1077,21 +1015,16 @@ bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
// First interval before the gap. Don't create single-instr intervals.
if (bestPos > 1) {
openIntv();
- enterIntvBefore(Uses.front());
- useIntv(Uses.front().getBaseIndex(), Uses[bestPos-1].getBoundaryIndex());
- leaveIntvAfter(Uses[bestPos-1]);
+ useIntv(enterIntvBefore(Uses.front()), leaveIntvAfter(Uses[bestPos-1]));
closeIntv();
}
// Second interval after the gap.
if (bestPos < Uses.size()-1) {
openIntv();
- enterIntvBefore(Uses[bestPos]);
- useIntv(Uses[bestPos].getBaseIndex(), Uses.back().getBoundaryIndex());
- leaveIntvAfter(Uses.back());
+ useIntv(enterIntvBefore(Uses[bestPos]), leaveIntvAfter(Uses.back()));
closeIntv();
}
- rewrite();
- return dupli_;
+ finish();
}
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
index ddef746..5c34afd 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -1,4 +1,4 @@
-//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,125 +12,132 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/SlotIndexes.h"
namespace llvm {
+class ConnectedVNInfoEqClasses;
class LiveInterval;
class LiveIntervals;
+class LiveRangeEdit;
class MachineInstr;
-class MachineLoop;
class MachineLoopInfo;
class MachineRegisterInfo;
class TargetInstrInfo;
+class TargetRegisterInfo;
class VirtRegMap;
class VNInfo;
+class raw_ostream;
+
+/// At some point we should just include MachineDominators.h:
+class MachineDominatorTree;
+template <class NodeT> class DomTreeNodeBase;
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
/// opportunities.
class SplitAnalysis {
public:
- const MachineFunction &mf_;
- const LiveIntervals &lis_;
- const MachineLoopInfo &loops_;
- const TargetInstrInfo &tii_;
+ const MachineFunction &MF;
+ const VirtRegMap &VRM;
+ const LiveIntervals &LIS;
+ const MachineLoopInfo &Loops;
+ const TargetInstrInfo &TII;
// Instructions using the the current register.
typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet;
- InstrPtrSet usingInstrs_;
+ InstrPtrSet UsingInstrs;
+
+ // Sorted slot indexes of using instructions.
+ SmallVector<SlotIndex, 8> UseSlots;
- // The number of instructions using curli in each basic block.
+ // The number of instructions using CurLI in each basic block.
typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap;
- BlockCountMap usingBlocks_;
+ BlockCountMap UsingBlocks;
+
+ /// Additional information about basic blocks where the current variable is
+ /// live. Such a block will look like one of these templates:
+ ///
+ /// 1. | o---x | Internal to block. Variable is only live in this block.
+ /// 2. |---x | Live-in, kill.
+ /// 3. | o---| Def, live-out.
+ /// 4. |---x o---| Live-in, kill, def, live-out.
+ /// 5. |---o---o---| Live-through with uses or defs.
+ /// 6. |-----------| Live-through without uses. Transparent.
+ ///
+ struct BlockInfo {
+ MachineBasicBlock *MBB;
+ SlotIndex FirstUse; ///< First instr using current reg.
+ SlotIndex LastUse; ///< Last instr using current reg.
+ SlotIndex Kill; ///< Interval end point inside block.
+ SlotIndex Def; ///< Interval start point inside block.
+ /// Last possible point for splitting live ranges.
+ SlotIndex LastSplitPoint;
+ bool Uses; ///< Current reg has uses or defs in block.
+ bool LiveThrough; ///< Live in whole block (Templ 5. or 6. above).
+ bool LiveIn; ///< Current reg is live in.
+ bool LiveOut; ///< Current reg is live out.
+
+ // Per-interference pattern scratch data.
+ bool OverlapEntry; ///< Interference overlaps entering interval.
+ bool OverlapExit; ///< Interference overlaps exiting interval.
+ };
- // The number of basic block using curli in each loop.
- typedef DenseMap<const MachineLoop*, unsigned> LoopCountMap;
- LoopCountMap usingLoops_;
+ /// Basic blocks where var is live. This array is parallel to
+ /// SpillConstraints.
+ SmallVector<BlockInfo, 8> LiveBlocks;
private:
// Current live interval.
- const LiveInterval *curli_;
+ const LiveInterval *CurLI;
- // Sumarize statistics by counting instructions using curli_.
+ // Sumarize statistics by counting instructions using CurLI.
void analyzeUses();
+ /// calcLiveBlockInfo - Compute per-block information about CurLI.
+ void calcLiveBlockInfo();
+
/// canAnalyzeBranch - Return true if MBB ends in a branch that can be
/// analyzed.
bool canAnalyzeBranch(const MachineBasicBlock *MBB);
public:
- SplitAnalysis(const MachineFunction &mf, const LiveIntervals &lis,
+ SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
const MachineLoopInfo &mli);
- /// analyze - set curli to the specified interval, and analyze how it may be
+ /// analyze - set CurLI to the specified interval, and analyze how it may be
/// split.
void analyze(const LiveInterval *li);
- /// removeUse - Update statistics by noting that mi no longer uses curli.
- void removeUse(const MachineInstr *mi);
-
- const LiveInterval *getCurLI() { return curli_; }
-
/// clear - clear all data structures so SplitAnalysis is ready to analyze a
/// new interval.
void clear();
- typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
- typedef SmallPtrSet<const MachineLoop*, 16> LoopPtrSet;
-
- // Sets of basic blocks surrounding a machine loop.
- struct LoopBlocks {
- BlockPtrSet Loop; // Blocks in the loop.
- BlockPtrSet Preds; // Loop predecessor blocks.
- BlockPtrSet Exits; // Loop exit blocks.
-
- void clear() {
- Loop.clear();
- Preds.clear();
- Exits.clear();
- }
- };
-
- // Calculate the block sets surrounding the loop.
- void getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks);
-
- /// LoopPeripheralUse - how is a variable used in and around a loop?
- /// Peripheral blocks are the loop predecessors and exit blocks.
- enum LoopPeripheralUse {
- ContainedInLoop, // All uses are inside the loop.
- SinglePeripheral, // At most one instruction per peripheral block.
- MultiPeripheral, // Multiple instructions in some peripheral blocks.
- OutsideLoop // Uses outside loop periphery.
- };
-
- /// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
- /// and around the Loop.
- LoopPeripheralUse analyzeLoopPeripheralUse(const LoopBlocks&);
+ /// getParent - Return the last analyzed interval.
+ const LiveInterval &getParent() const { return *CurLI; }
- /// getCriticalExits - It may be necessary to partially break critical edges
- /// leaving the loop if an exit block has phi uses of curli. Collect the exit
- /// blocks that need special treatment into CriticalExits.
- void getCriticalExits(const LoopBlocks &Blocks, BlockPtrSet &CriticalExits);
+ /// hasUses - Return true if MBB has any uses of CurLI.
+ bool hasUses(const MachineBasicBlock *MBB) const {
+ return UsingBlocks.lookup(MBB);
+ }
- /// canSplitCriticalExits - Return true if it is possible to insert new exit
- /// blocks before the blocks in CriticalExits.
- bool canSplitCriticalExits(const LoopBlocks &Blocks,
- BlockPtrSet &CriticalExits);
+ typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
- /// getBestSplitLoop - Return the loop where curli may best be split to a
- /// separate register, or NULL.
- const MachineLoop *getBestSplitLoop();
+ // Print a set of blocks with use counts.
+ void print(const BlockPtrSet&, raw_ostream&) const;
/// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from
- /// having curli split to a new live interval. Return true if Blocks can be
+ /// having CurLI split to a new live interval. Return true if Blocks can be
/// passed to SplitEditor::splitSingleBlocks.
bool getMultiUseBlocks(BlockPtrSet &Blocks);
- /// getBlockForInsideSplit - If curli is contained inside a single basic block,
- /// and it wou pay to subdivide the interval inside that block, return it.
- /// Otherwise return NULL. The returned block can be passed to
+ /// getBlockForInsideSplit - If CurLI is contained inside a single basic
+ /// block, and it would pay to subdivide the interval inside that block,
+ /// return it. Otherwise return NULL. The returned block can be passed to
/// SplitEditor::splitInsideBlock.
const MachineBasicBlock *getBlockForInsideSplit();
};
@@ -140,58 +147,102 @@ public:
/// interval that is a subset. Insert phi-def values as needed. This class is
/// used by SplitEditor to create new smaller LiveIntervals.
///
-/// parentli_ is the larger interval, li_ is the subset interval. Every value
-/// in li_ corresponds to exactly one value in parentli_, and the live range
-/// of the value is contained within the live range of the parentli_ value.
-/// Values in parentli_ may map to any number of openli_ values, including 0.
+/// ParentLI is the larger interval, LI is the subset interval. Every value
+/// in LI corresponds to exactly one value in ParentLI, and the live range
+/// of the value is contained within the live range of the ParentLI value.
+/// Values in ParentLI may map to any number of OpenLI values, including 0.
class LiveIntervalMap {
- LiveIntervals &lis_;
+ LiveIntervals &LIS;
+ MachineDominatorTree &MDT;
// The parent interval is never changed.
- const LiveInterval &parentli_;
+ const LiveInterval &ParentLI;
- // The child interval's values are fully contained inside parentli_ values.
- LiveInterval &li_;
+ // The child interval's values are fully contained inside ParentLI values.
+ LiveInterval *LI;
typedef DenseMap<const VNInfo*, VNInfo*> ValueMap;
- // Map parentli_ values to simple values in li_ that are defined at the same
- // SlotIndex, or NULL for parentli_ values that have complex li_ defs.
+ // Map ParentLI values to simple values in LI that are defined at the same
+ // SlotIndex, or NULL for ParentLI values that have complex LI defs.
// Note there is a difference between values mapping to NULL (complex), and
// values not present (unknown/unmapped).
- ValueMap valueMap_;
-
- // extendTo - Find the last li_ value defined in MBB at or before Idx. The
- // parentli_ is assumed to be live at Idx. Extend the live range to Idx.
- // Return the found VNInfo, or NULL.
- VNInfo *extendTo(MachineBasicBlock *MBB, SlotIndex Idx);
-
- // addSimpleRange - Add a simple range from parentli_ to li_.
- // ParentVNI must be live in the [Start;End) interval.
- void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI);
+ ValueMap Values;
+
+ typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+ typedef DenseMap<MachineBasicBlock*,LiveOutPair> LiveOutMap;
+
+ // LiveOutCache - Map each basic block where LI is live out to the live-out
+ // value and its defining block. One of these conditions shall be true:
+ //
+ // 1. !LiveOutCache.count(MBB)
+ // 2. LiveOutCache[MBB].second.getNode() == MBB
+ // 3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB]
+ //
+ // This is only a cache, the values can be computed as:
+ //
+ // VNI = LI->getVNInfoAt(LIS.getMBBEndIdx(MBB))
+ // Node = mbt_[LIS.getMBBFromIndex(VNI->def)]
+ //
+ // The cache is also used as a visiteed set by mapValue().
+ LiveOutMap LiveOutCache;
+
+ // Dump the live-out cache to dbgs().
+ void dumpCache();
public:
LiveIntervalMap(LiveIntervals &lis,
- const LiveInterval &parentli,
- LiveInterval &li)
- : lis_(lis), parentli_(parentli), li_(li) {}
+ MachineDominatorTree &mdt,
+ const LiveInterval &parentli)
+ : LIS(lis), MDT(mdt), ParentLI(parentli), LI(0) {}
+
+ /// reset - clear all data structures and start a new live interval.
+ void reset(LiveInterval *);
+
+ /// getLI - return the current live interval.
+ LiveInterval *getLI() const { return LI; }
- /// defValue - define a value in li_ from the parentli_ value VNI and Idx.
+ /// defValue - define a value in LI from the ParentLI value VNI and Idx.
/// Idx does not have to be ParentVNI->def, but it must be contained within
- /// ParentVNI's live range in parentli_.
- /// Return the new li_ value.
+ /// ParentVNI's live range in ParentLI.
+ /// Return the new LI value.
VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx);
- /// mapValue - map ParentVNI to the corresponding li_ value at Idx. It is
+ /// mapValue - map ParentVNI to the corresponding LI value at Idx. It is
/// assumed that ParentVNI is live at Idx.
/// If ParentVNI has not been defined by defValue, it is assumed that
/// ParentVNI->def dominates Idx.
/// If ParentVNI has been defined by defValue one or more times, a value that
/// dominates Idx will be returned. This may require creating extra phi-def
- /// values and adding live ranges to li_.
- VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx);
+ /// values and adding live ranges to LI.
+ /// If simple is not NULL, *simple will indicate if ParentVNI is a simply
+ /// mapped value.
+ VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx, bool *simple = 0);
+
+ // extendTo - Find the last LI value defined in MBB at or before Idx. The
+ // parentli is assumed to be live at Idx. Extend the live range to include
+ // Idx. Return the found VNInfo, or NULL.
+ VNInfo *extendTo(const MachineBasicBlock *MBB, SlotIndex Idx);
+
+ /// isMapped - Return true is ParentVNI is a known mapped value. It may be a
+ /// simple 1-1 mapping or a complex mapping to later defs.
+ bool isMapped(const VNInfo *ParentVNI) const {
+ return Values.count(ParentVNI);
+ }
+
+ /// isComplexMapped - Return true if ParentVNI has received new definitions
+ /// with defValue.
+ bool isComplexMapped(const VNInfo *ParentVNI) const;
+
+ /// markComplexMapped - Mark ParentVNI as complex mapped regardless of the
+ /// number of definitions.
+ void markComplexMapped(const VNInfo *ParentVNI) { Values[ParentVNI] = 0; }
+
+ // addSimpleRange - Add a simple range from ParentLI to LI.
+ // ParentVNI must be live in the [Start;End) interval.
+ void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI);
- /// addRange - Add live ranges to li_ where [Start;End) intersects parentli_.
+ /// addRange - Add live ranges to LI where [Start;End) intersects ParentLI.
/// All needed values whose def is not inside [Start;End) must be defined
/// beforehand so mapValue will work.
void addRange(SlotIndex Start, SlotIndex End);
@@ -207,115 +258,129 @@ public:
/// - Mark the ranges where the new interval is used with useIntv*
/// - Mark the places where the interval is exited with exitIntv*.
/// - Finish the current interval with closeIntv and repeat from 2.
-/// - Rewrite instructions with rewrite().
+/// - Rewrite instructions with finish().
///
class SplitEditor {
- SplitAnalysis &sa_;
- LiveIntervals &lis_;
- VirtRegMap &vrm_;
- MachineRegisterInfo &mri_;
- const TargetInstrInfo &tii_;
-
- /// curli_ - The immutable interval we are currently splitting.
- const LiveInterval *const curli_;
-
- /// dupli_ - Created as a copy of curli_, ranges are carved out as new
- /// intervals get added through openIntv / closeIntv. This is used to avoid
- /// editing curli_.
- LiveInterval *dupli_;
-
- /// Currently open LiveInterval.
- LiveInterval *openli_;
-
- /// createInterval - Create a new virtual register and LiveInterval with same
- /// register class and spill slot as curli.
- LiveInterval *createInterval();
-
- /// getDupLI - Ensure dupli is created and return it.
- LiveInterval *getDupLI();
-
- /// valueMap_ - Map values in cupli to values in openli. These are direct 1-1
- /// mappings, and do not include values created by inserted copies.
- DenseMap<const VNInfo*, VNInfo*> valueMap_;
-
- /// mapValue - Return the openIntv value that corresponds to the given curli
- /// value.
- VNInfo *mapValue(const VNInfo *curliVNI);
-
- /// A dupli value is live through openIntv.
- bool liveThrough_;
-
- /// All the new intervals created for this split are added to intervals_.
- SmallVectorImpl<LiveInterval*> &intervals_;
-
- /// The index into intervals_ of the first interval we added. There may be
- /// others from before we got it.
- unsigned firstInterval;
-
- /// Insert a COPY instruction curli -> li. Allocate a new value from li
- /// defined by the COPY
- VNInfo *insertCopy(LiveInterval &LI,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I);
+ SplitAnalysis &SA;
+ LiveIntervals &LIS;
+ VirtRegMap &VRM;
+ MachineRegisterInfo &MRI;
+ MachineDominatorTree &MDT;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+
+ /// Edit - The current parent register and new intervals created.
+ LiveRangeEdit &Edit;
+
+ /// Index into Edit of the currently open interval.
+ /// The index 0 is used for the complement, so the first interval started by
+ /// openIntv will be 1.
+ unsigned OpenIdx;
+
+ typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
+
+ /// Allocator for the interval map. This will eventually be shared with
+ /// SlotIndexes and LiveIntervals.
+ RegAssignMap::Allocator Allocator;
+
+ /// RegAssign - Map of the assigned register indexes.
+ /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at
+ /// Idx.
+ RegAssignMap RegAssign;
+
+ /// LIMappers - One LiveIntervalMap or each interval in Edit.
+ SmallVector<LiveIntervalMap, 4> LIMappers;
+
+ /// defFromParent - Define Reg from ParentVNI at UseIdx using either
+ /// rematerialization or a COPY from parent. Return the new value.
+ VNInfo *defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+
+ /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
+ void rewriteAssigned();
+
+ /// rewriteComponents - Rewrite all uses of Intv[0] according to the eq
+ /// classes in ConEQ.
+ /// This must be done when Intvs[0] is styill live at all uses, before calling
+ /// ConEq.Distribute().
+ void rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs,
+ const ConnectedVNInfoEqClasses &ConEq);
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
/// Newly created intervals will be appended to newIntervals.
SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
- SmallVectorImpl<LiveInterval*> &newIntervals);
+ MachineDominatorTree&, LiveRangeEdit&);
/// getAnalysis - Get the corresponding analysis.
- SplitAnalysis &getAnalysis() { return sa_; }
+ SplitAnalysis &getAnalysis() { return SA; }
/// Create a new virtual register and live interval.
void openIntv();
- /// enterIntvBefore - Enter openli before the instruction at Idx. If curli is
- /// not live before Idx, a COPY is not inserted.
- void enterIntvBefore(SlotIndex Idx);
+ /// enterIntvBefore - Enter the open interval before the instruction at Idx.
+ /// If the parent interval is not live before Idx, a COPY is not inserted.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvBefore(SlotIndex Idx);
- /// enterIntvAtEnd - Enter openli at the end of MBB.
- /// PhiMBB is a successor inside openli where a PHI value is created.
- /// Currently, all entries must share the same PhiMBB.
- void enterIntvAtEnd(MachineBasicBlock &MBB, MachineBasicBlock &PhiMBB);
+ /// enterIntvAtEnd - Enter the open interval at the end of MBB.
+ /// Use the open interval from he inserted copy to the MBB end.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
- /// useIntv - indicate that all instructions in MBB should use openli.
+ /// useIntv - indicate that all instructions in MBB should use OpenLI.
void useIntv(const MachineBasicBlock &MBB);
- /// useIntv - indicate that all instructions in range should use openli.
+ /// useIntv - indicate that all instructions in range should use OpenLI.
void useIntv(SlotIndex Start, SlotIndex End);
- /// leaveIntvAfter - Leave openli after the instruction at Idx.
- void leaveIntvAfter(SlotIndex Idx);
+ /// leaveIntvAfter - Leave the open interval after the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAfter(SlotIndex Idx);
+
+ /// leaveIntvBefore - Leave the open interval before the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvBefore(SlotIndex Idx);
/// leaveIntvAtTop - Leave the interval at the top of MBB.
- /// Currently, only one value can leave the interval.
- void leaveIntvAtTop(MachineBasicBlock &MBB);
+ /// Add liveness from the MBB top to the copy.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
+
+ /// overlapIntv - Indicate that all instructions in range should use the open
+ /// interval, but also let the complement interval be live.
+ ///
+ /// This doubles the register pressure, but is sometimes required to deal with
+ /// register uses after the last valid split point.
+ ///
+ /// The Start index should be a return value from a leaveIntv* call, and End
+ /// should be in the same basic block. The parent interval must have the same
+ /// value across the range.
+ ///
+ void overlapIntv(SlotIndex Start, SlotIndex End);
/// closeIntv - Indicate that we are done editing the currently open
/// LiveInterval, and ranges can be trimmed.
void closeIntv();
- /// rewrite - after all the new live ranges have been created, rewrite
- /// instructions using curli to use the new intervals.
- void rewrite();
+ /// finish - after all the new live ranges have been created, compute the
+ /// remaining live range, and rewrite instructions to use the new registers.
+ void finish();
- // ===--- High level methods ---===
+ /// dump - print the current interval maping to dbgs().
+ void dump() const;
- /// splitAroundLoop - Split curli into a separate live interval inside
- /// the loop. Return true if curli has been completely replaced, false if
- /// curli is still intact, and needs to be spilled or split further.
- bool splitAroundLoop(const MachineLoop*);
+ // ===--- High level methods ---===
- /// splitSingleBlocks - Split curli into a separate live interval inside each
- /// basic block in Blocks. Return true if curli has been completely replaced,
- /// false if curli is still intact, and needs to be spilled or split further.
- bool splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
+ /// splitSingleBlocks - Split CurLI into a separate live interval inside each
+ /// basic block in Blocks.
+ void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
- /// splitInsideBlock - Split curli into multiple intervals inside MBB. Return
- /// true if curli has been completely replaced, false if curli is still
- /// intact, and needs to be spilled or split further.
- bool splitInsideBlock(const MachineBasicBlock *);
+ /// splitInsideBlock - Split CurLI into multiple intervals inside MBB.
+ void splitInsideBlock(const MachineBasicBlock *);
};
}
diff --git a/contrib/llvm/lib/CodeGen/Splitter.cpp b/contrib/llvm/lib/CodeGen/Splitter.cpp
index 38f3b1f..08aee82 100644
--- a/contrib/llvm/lib/CodeGen/Splitter.cpp
+++ b/contrib/llvm/lib/CodeGen/Splitter.cpp
@@ -29,8 +29,14 @@
using namespace llvm;
char LoopSplitter::ID = 0;
-INITIALIZE_PASS(LoopSplitter, "loop-splitting",
- "Split virtual regists across loop boundaries.", false, false);
+INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
+ "Split virtual regists across loop boundaries.", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
+ "Split virtual regists across loop boundaries.", false, false)
namespace llvm {
@@ -140,7 +146,6 @@ namespace llvm {
VNInfo *newVal = getNewVNI(preHeaderRange->valno);
newVal->def = copyDefIdx;
newVal->setCopy(copy);
- newVal->setIsDefAccurate(true);
li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
getNewLI()->addRange(LiveRange(copyDefIdx,
@@ -174,13 +179,13 @@ namespace llvm {
// Blow away output range definition.
outRange->valno->def = ls.lis->getInvalidIndex();
- outRange->valno->setIsDefAccurate(false);
li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
+ SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
+ assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
+ "PHI def index points at actual instruction.");
VNInfo *newVal =
- getNewLI()->getNextValue(SlotIndex(ls.lis->getMBBStartIdx(outBlock),
- true),
- 0, false, ls.lis->getVNInfoAllocator());
+ getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
copyDefIdx, newVal));
@@ -514,8 +519,10 @@ namespace llvm {
if (!insertRange)
continue;
- VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(preHeader),
- 0, false, lis->getVNInfoAllocator());
+ SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
+ assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+ "PHI def index points at actual instruction.");
+ VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
lis->getMBBEndIdx(preHeader),
newVal));
@@ -612,8 +619,11 @@ namespace llvm {
lis->getMBBEndIdx(splitBlock), true);
}
} else if (intersects) {
- VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(splitBlock),
- 0, false, lis->getVNInfoAllocator());
+ SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
+ assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+ "PHI def index points at actual instruction.");
+ VNInfo *newVal = li.getNextValue(newDefIdx, 0,
+ lis->getVNInfoAllocator());
li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
lis->getMBBEndIdx(splitBlock),
newVal));
diff --git a/contrib/llvm/lib/CodeGen/Splitter.h b/contrib/llvm/lib/CodeGen/Splitter.h
index a726a7b..9fb1b8b 100644
--- a/contrib/llvm/lib/CodeGen/Splitter.h
+++ b/contrib/llvm/lib/CodeGen/Splitter.h
@@ -36,7 +36,9 @@ namespace llvm {
public:
static char ID;
- LoopSplitter() : MachineFunctionPass(ID) {}
+ LoopSplitter() : MachineFunctionPass(ID) {
+ initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &au) const;
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index 9f51778..fcaee42 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -16,6 +16,7 @@
#define DEBUG_TYPE "stack-protector"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Attributes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -45,6 +46,8 @@ namespace {
Function *F;
Module *M;
+ DominatorTree* DT;
+
/// InsertStackProtectors - Insert code into the prologue and epilogue of
/// the function.
///
@@ -62,9 +65,17 @@ namespace {
bool RequiresStackProtector() const;
public:
static char ID; // Pass identification, replacement for typeid.
- StackProtector() : FunctionPass(ID), TLI(0) {}
+ StackProtector() : FunctionPass(ID), TLI(0) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
StackProtector(const TargetLowering *tli)
- : FunctionPass(ID), TLI(tli) {}
+ : FunctionPass(ID), TLI(tli) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ }
virtual bool runOnFunction(Function &Fn);
};
@@ -72,7 +83,7 @@ namespace {
char StackProtector::ID = 0;
INITIALIZE_PASS(StackProtector, "stack-protector",
- "Insert stack protectors", false, false);
+ "Insert stack protectors", false, false)
FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
return new StackProtector(tli);
@@ -81,6 +92,7 @@ FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
bool StackProtector::runOnFunction(Function &Fn) {
F = &Fn;
M = F->getParent();
+ DT = getAnalysisIfAvailable<DominatorTree>();
if (!RequiresStackProtector()) return false;
@@ -135,6 +147,7 @@ bool StackProtector::RequiresStackProtector() const {
/// value. It calls __stack_chk_fail if they differ.
bool StackProtector::InsertStackProtectors() {
BasicBlock *FailBB = 0; // The basic block to jump to if check fails.
+ BasicBlock *FailBBDom = 0; // FailBB's dominator.
AllocaInst *AI = 0; // Place on stack that stores the stack guard.
Value *StackGuardVar = 0; // The stack guard variable.
@@ -178,6 +191,8 @@ bool StackProtector::InsertStackProtectors() {
// Create the basic block to jump to when the guard check fails.
FailBB = CreateFailBB();
+ if (DT)
+ FailBBDom = DT->isReachableFromEntry(BB) ? BB : 0;
}
// For each block with a return instruction, convert this:
@@ -204,6 +219,10 @@ bool StackProtector::InsertStackProtectors() {
// Split the basic block before the return instruction.
BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+ if (DT) {
+ DT->addNewBlock(NewBB, DT->isReachableFromEntry(BB) ? BB : 0);
+ FailBBDom = DT->findNearestCommonDominator(FailBBDom, BB);
+ }
// Remove default branch instruction to the new BB.
BB->getTerminator()->eraseFromParent();
@@ -223,6 +242,9 @@ bool StackProtector::InsertStackProtectors() {
// statements in the function.
if (!FailBB) return false;
+ if (DT)
+ DT->addNewBlock(FailBB, FailBBDom);
+
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index 8d57ae9..01f5b56 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -95,9 +95,13 @@ namespace {
public:
static char ID; // Pass identification
StackSlotColoring() :
- MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {}
+ MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
StackSlotColoring(bool RegColor) :
- MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {}
+ MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -145,8 +149,14 @@ namespace {
char StackSlotColoring::ID = 0;
-INITIALIZE_PASS(StackSlotColoring, "stack-slot-coloring",
- "Stack Slot Coloring", false, false);
+INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
return new StackSlotColoring(RegColor);
@@ -208,7 +218,7 @@ void StackSlotColoring::InitializeSlots() {
for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
LiveInterval &li = i->second;
DEBUG(li.dump());
- int FI = li.getStackSlotIndex();
+ int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
if (MFI->isDeadObjectIndex(FI))
continue;
SSIntervals.push_back(&li);
@@ -251,7 +261,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = li->getStackSlotIndex();
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
if (!UsedColors[SS] || li->weight < 20)
// If the weight is < 20, i.e. two references in a loop with depth 1,
// don't bother with it.
@@ -340,7 +350,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
// Record the assignment.
Assignments[Color].push_back(li);
- int FI = li->getStackSlotIndex();
+ int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
// Change size and alignment of the allocated slot. If there are multiple
@@ -369,7 +379,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
bool Changed = false;
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = li->getStackSlotIndex();
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
int NewSS = ColorSlot(li);
assert(NewSS >= 0 && "Stack coloring failed?");
SlotMapping[SS] = NewSS;
@@ -382,7 +392,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
DEBUG(dbgs() << "\nSpill slots after coloring:\n");
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = li->getStackSlotIndex();
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
li->weight = SlotWeights[SS];
}
// Sort them by new weight.
@@ -636,7 +646,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
} else {
SmallVector<MachineInstr*, 4> NewMIs;
bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
- Success = Success; // Silence compiler warning.
+ (void)Success; // Silence compiler warning.
assert(Success && "Failed to unfold!");
MachineInstr *NewMI = NewMIs[0];
MBB->insert(MI, NewMI);
diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
index 894dbfa..ec7829e 100644
--- a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
@@ -1,4 +1,4 @@
-//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,1039 +7,823 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass eliminates machine instruction PHI nodes by inserting copy
-// instructions, using an intelligent copy-folding technique based on
-// dominator information. This is technique is derived from:
+// This pass eliminates PHI instructions by aggressively coalescing the copies
+// that would be inserted by a naive algorithm and only inserting the copies
+// that are necessary. The coalescing technique initially assumes that all
+// registers appearing in a PHI instruction do not interfere. It then eliminates
+// proven interferences, using dominators to only perform a linear number of
+// interference tests instead of the quadratic number of interference tests
+// that this would naively require. This is a technique derived from:
//
// Budimlic, et al. Fast copy coalescing and live-range identification.
// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
// Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
// PLDI '02. ACM, New York, NY, 25-32.
-// DOI= http://doi.acm.org/10.1145/512529.512534
+//
+// The original implementation constructs a data structure they call a dominance
+// forest for this purpose. The dominance forest was shown to be unnecessary,
+// as it is possible to emulate the creation and traversal of a dominance forest
+// by directly using the dominator tree, rather than actually constructing the
+// dominance forest. This technique is explained in:
+//
+// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
+// Quality and Efficiency,
+// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
+// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
+// CGO '09. IEEE, Washington, DC, 114-125.
+//
+// Careful implementation allows for all of the dominator forest interference
+// checks to be performed at once in a single depth-first traversal of the
+// dominator tree, which is what is implemented here.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "strongphielim"
+#include "PHIEliminationUtils.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
namespace {
- struct StrongPHIElimination : public MachineFunctionPass {
+ class StrongPHIElimination : public MachineFunctionPass {
+ public:
static char ID; // Pass identification, replacement for typeid
- StrongPHIElimination() : MachineFunctionPass(ID) {}
-
- // Waiting stores, for each MBB, the set of copies that need to
- // be inserted into that MBB
- DenseMap<MachineBasicBlock*,
- std::multimap<unsigned, unsigned> > Waiting;
-
- // Stacks holds the renaming stack for each register
- std::map<unsigned, std::vector<unsigned> > Stacks;
-
- // Registers in UsedByAnother are PHI nodes that are themselves
- // used as operands to another PHI node
- std::set<unsigned> UsedByAnother;
-
- // RenameSets are the is a map from a PHI-defined register
- // to the input registers to be coalesced along with the
- // predecessor block for those input registers.
- std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSets;
-
- // PhiValueNumber holds the ID numbers of the VNs for each phi that we're
- // eliminating, indexed by the register defined by that phi.
- std::map<unsigned, unsigned> PhiValueNumber;
-
- // Store the DFS-in number of each block
- DenseMap<MachineBasicBlock*, unsigned> preorder;
-
- // Store the DFS-out number of each block
- DenseMap<MachineBasicBlock*, unsigned> maxpreorder;
-
- bool runOnMachineFunction(MachineFunction &Fn);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<SlotIndexes>();
- AU.addPreserved<SlotIndexes>();
- AU.addRequired<LiveIntervals>();
-
- // TODO: Actually make this true.
- AU.addPreserved<LiveIntervals>();
- AU.addPreserved<RegisterCoalescer>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- virtual void releaseMemory() {
- preorder.clear();
- maxpreorder.clear();
-
- Waiting.clear();
- Stacks.clear();
- UsedByAnother.clear();
- RenameSets.clear();
+ StrongPHIElimination() : MachineFunctionPass(ID) {
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
}
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ bool runOnMachineFunction(MachineFunction&);
+
private:
-
- /// DomForestNode - Represents a node in the "dominator forest". This is
- /// a forest in which the nodes represent registers and the edges
- /// represent a dominance relation in the block defining those registers.
- struct DomForestNode {
- private:
- // Store references to our children
- std::vector<DomForestNode*> children;
- // The register we represent
- unsigned reg;
-
- // Add another node as our child
- void addChild(DomForestNode* DFN) { children.push_back(DFN); }
-
- public:
- typedef std::vector<DomForestNode*>::iterator iterator;
-
- // Create a DomForestNode by providing the register it represents, and
- // the node to be its parent. The virtual root node has register 0
- // and a null parent.
- DomForestNode(unsigned r, DomForestNode* parent) : reg(r) {
- if (parent)
- parent->addChild(this);
- }
-
- ~DomForestNode() {
- for (iterator I = begin(), E = end(); I != E; ++I)
- delete *I;
- }
-
- /// getReg - Return the regiser that this node represents
- inline unsigned getReg() { return reg; }
-
- // Provide iterator access to our children
- inline DomForestNode::iterator begin() { return children.begin(); }
- inline DomForestNode::iterator end() { return children.end(); }
+ /// This struct represents a single node in the union-find data structure
+ /// representing the variable congruence classes. There is one difference
+ /// from a normal union-find data structure. We steal two bits from the parent
+ /// pointer . One of these bits is used to represent whether the register
+ /// itself has been isolated, and the other is used to represent whether the
+ /// PHI with that register as its destination has been isolated.
+ ///
+ /// Note that this leads to the strange situation where the leader of a
+ /// congruence class may no longer logically be a member, due to being
+ /// isolated.
+ struct Node {
+ enum Flags {
+ kRegisterIsolatedFlag = 1,
+ kPHIIsolatedFlag = 2
+ };
+ Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
+
+ Node *getLeader();
+
+ PointerIntPair<Node*, 2> parent;
+ unsigned value;
+ unsigned rank;
};
-
- void computeDFS(MachineFunction& MF);
- void processBlock(MachineBasicBlock* MBB);
-
- std::vector<DomForestNode*> computeDomForest(
- std::map<unsigned, MachineBasicBlock*>& instrs,
- MachineRegisterInfo& MRI);
- void processPHIUnion(MachineInstr* Inst,
- std::map<unsigned, MachineBasicBlock*>& PHIUnion,
- std::vector<StrongPHIElimination::DomForestNode*>& DF,
- std::vector<std::pair<unsigned, unsigned> >& locals);
- void ScheduleCopies(MachineBasicBlock* MBB, std::set<unsigned>& pushed);
- void InsertCopies(MachineDomTreeNode* MBB,
- SmallPtrSet<MachineBasicBlock*, 16>& v);
- bool mergeLiveIntervals(unsigned primary, unsigned secondary);
- };
-}
-char StrongPHIElimination::ID = 0;
-INITIALIZE_PASS(StrongPHIElimination, "strong-phi-node-elimination",
- "Eliminate PHI nodes for register allocation, intelligently", false, false);
+ /// Add a register in a new congruence class containing only itself.
+ void addReg(unsigned);
-char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+ /// Join the congruence classes of two registers. This function is biased
+ /// towards the left argument, i.e. after
+ ///
+ /// addReg(r2);
+ /// unionRegs(r1, r2);
+ ///
+ /// the leader of the unioned congruence class is the same as the leader of
+ /// r1's congruence class prior to the union. This is actually relied upon
+ /// in the copy insertion code.
+ void unionRegs(unsigned, unsigned);
-/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree
-/// of the given MachineFunction. These numbers are then used in other parts
-/// of the PHI elimination process.
-void StrongPHIElimination::computeDFS(MachineFunction& MF) {
- SmallPtrSet<MachineDomTreeNode*, 8> frontier;
- SmallPtrSet<MachineDomTreeNode*, 8> visited;
-
- unsigned time = 0;
-
- MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>();
-
- MachineDomTreeNode* node = DT.getRootNode();
-
- std::vector<MachineDomTreeNode*> worklist;
- worklist.push_back(node);
-
- while (!worklist.empty()) {
- MachineDomTreeNode* currNode = worklist.back();
-
- if (!frontier.count(currNode)) {
- frontier.insert(currNode);
- ++time;
- preorder.insert(std::make_pair(currNode->getBlock(), time));
- }
-
- bool inserted = false;
- for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end();
- I != E; ++I)
- if (!frontier.count(*I) && !visited.count(*I)) {
- worklist.push_back(*I);
- inserted = true;
- break;
- }
-
- if (!inserted) {
- frontier.erase(currNode);
- visited.insert(currNode);
- maxpreorder.insert(std::make_pair(currNode->getBlock(), time));
-
- worklist.pop_back();
+ /// Get the color of a register. The color is 0 if the register has been
+ /// isolated.
+ unsigned getRegColor(unsigned);
+
+ // Isolate a register.
+ void isolateReg(unsigned);
+
+ /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
+ /// isolated. Otherwise, it is the original color of its destination and
+ /// all of its operands (before they were isolated, if they were).
+ unsigned getPHIColor(MachineInstr*);
+
+ /// Isolate a PHI.
+ void isolatePHI(MachineInstr*);
+
+ /// Traverses a basic block, splitting any interferences found between
+ /// registers in the same congruence class. It takes two DenseMaps as
+ /// arguments that it also updates: CurrentDominatingParent, which maps
+ /// a color to the register in that congruence class whose definition was
+ /// most recently seen, and ImmediateDominatingParent, which maps a register
+ /// to the register in the same congruence class that most immediately
+ /// dominates it.
+ ///
+ /// This function assumes that it is being called in a depth-first traversal
+ /// of the dominator tree.
+ void SplitInterferencesForBasicBlock(
+ MachineBasicBlock&,
+ DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+ DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
+
+ // Lowers a PHI instruction, inserting copies of the source and destination
+ // registers as necessary.
+ void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
+
+ // Merges the live interval of Reg into NewReg and renames Reg to NewReg
+ // everywhere that Reg appears. Requires Reg and NewReg to have non-
+ // overlapping lifetimes.
+ void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
+
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ MachineDominatorTree *DT;
+ LiveIntervals *LI;
+
+ BumpPtrAllocator Allocator;
+
+ DenseMap<unsigned, Node*> RegNodeMap;
+
+ // Maps a basic block to a list of its defs of registers that appear as PHI
+ // sources.
+ DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
+
+ // Maps a color to a pair of a MachineInstr* and a virtual register, which
+ // is the operand of that PHI corresponding to the current basic block.
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
+
+ // FIXME: Can these two data structures be combined? Would a std::multimap
+ // be any better?
+
+ // Stores pairs of predecessor basic blocks and the source registers of
+ // inserted copy instructions.
+ typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
+ SrcCopySet InsertedSrcCopySet;
+
+ // Maps pairs of predecessor basic blocks and colors to their defining copy
+ // instructions.
+ typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
+ SrcCopyMap;
+ SrcCopyMap InsertedSrcCopyMap;
+
+ // Maps inserted destination copy registers to their defining copy
+ // instructions.
+ typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
+ DestCopyMap InsertedDestCopies;
+ };
+
+ struct MIIndexCompare {
+ MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
+
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
}
- }
-}
-namespace {
+ LiveIntervals *LI;
+ };
+} // namespace
-/// PreorderSorter - a helper class that is used to sort registers
-/// according to the preorder number of their defining blocks
-class PreorderSorter {
-private:
- DenseMap<MachineBasicBlock*, unsigned>& preorder;
- MachineRegisterInfo& MRI;
-
-public:
- PreorderSorter(DenseMap<MachineBasicBlock*, unsigned>& p,
- MachineRegisterInfo& M) : preorder(p), MRI(M) { }
-
- bool operator()(unsigned A, unsigned B) {
- if (A == B)
- return false;
-
- MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent();
- MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent();
-
- if (preorder[ABlock] < preorder[BBlock])
- return true;
- else if (preorder[ABlock] > preorder[BBlock])
- return false;
-
- return false;
- }
-};
+STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
+STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
+STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
+char StrongPHIElimination::ID = 0;
+INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false)
+
+char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+
+void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
}
-/// computeDomForest - compute the subforest of the DomTree corresponding
-/// to the defining blocks of the registers in question
-std::vector<StrongPHIElimination::DomForestNode*>
-StrongPHIElimination::computeDomForest(
- std::map<unsigned, MachineBasicBlock*>& regs,
- MachineRegisterInfo& MRI) {
- // Begin by creating a virtual root node, since the actual results
- // may well be a forest. Assume this node has maximum DFS-out number.
- DomForestNode* VirtualRoot = new DomForestNode(0, 0);
- maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL));
-
- // Populate a worklist with the registers
- std::vector<unsigned> worklist;
- worklist.reserve(regs.size());
- for (std::map<unsigned, MachineBasicBlock*>::iterator I = regs.begin(),
- E = regs.end(); I != E; ++I)
- worklist.push_back(I->first);
-
- // Sort the registers by the DFS-in number of their defining block
- PreorderSorter PS(preorder, MRI);
- std::sort(worklist.begin(), worklist.end(), PS);
-
- // Create a "current parent" stack, and put the virtual root on top of it
- DomForestNode* CurrentParent = VirtualRoot;
- std::vector<DomForestNode*> stack;
- stack.push_back(VirtualRoot);
-
- // Iterate over all the registers in the previously computed order
- for (std::vector<unsigned>::iterator I = worklist.begin(), E = worklist.end();
- I != E; ++I) {
- unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()];
- MachineBasicBlock* parentBlock = CurrentParent->getReg() ?
- MRI.getVRegDef(CurrentParent->getReg())->getParent() :
- 0;
-
- // If the DFS-in number of the register is greater than the DFS-out number
- // of the current parent, repeatedly pop the parent stack until it isn't.
- while (pre > maxpreorder[parentBlock]) {
- stack.pop_back();
- CurrentParent = stack.back();
-
- parentBlock = CurrentParent->getReg() ?
- MRI.getVRegDef(CurrentParent->getReg())->getParent() :
- 0;
+static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
+ // FIXME: This only needs to check from the first terminator, as only the
+ // first terminator can use a virtual register.
+ for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
+ assert (RI != MBB->rend());
+ MachineInstr *MI = &*RI;
+
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ MachineOperand &MO = *OI;
+ if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
+ return &MO;
}
-
- // Now that we've found the appropriate parent, create a DomForestNode for
- // this register and attach it to the forest
- DomForestNode* child = new DomForestNode(*I, CurrentParent);
-
- // Push this new node on the "current parent" stack
- stack.push_back(child);
- CurrentParent = child;
}
-
- // Return a vector containing the children of the virtual root node
- std::vector<DomForestNode*> ret;
- ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end());
- return ret;
+ return NULL;
}
-/// isLiveIn - helper method that determines, from a regno, if a register
-/// is live into a block
-static bool isLiveIn(unsigned r, MachineBasicBlock* MBB,
- LiveIntervals& LI) {
- LiveInterval& I = LI.getOrCreateInterval(r);
- SlotIndex idx = LI.getMBBStartIdx(MBB);
- return I.liveAt(idx);
-}
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ TII = MF.getTarget().getInstrInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ LI = &getAnalysis<LiveIntervals>();
-/// isLiveOut - help method that determines, from a regno, if a register is
-/// live out of a block.
-static bool isLiveOut(unsigned r, MachineBasicBlock* MBB,
- LiveIntervals& LI) {
- for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(),
- E = MBB->succ_end(); PI != E; ++PI)
- if (isLiveIn(r, *PI, LI))
- return true;
-
- return false;
-}
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ unsigned DestReg = BBI->getOperand(0).getReg();
+ addReg(DestReg);
+ PHISrcDefs[I].push_back(BBI);
-/// interferes - checks for local interferences by scanning a block. The only
-/// trick parameter is 'mode' which tells it the relationship of the two
-/// registers. 0 - defined in the same block, 1 - first properly dominates
-/// second, 2 - second properly dominates first
-static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan,
- LiveIntervals& LV, unsigned mode) {
- MachineInstr* def = 0;
- MachineInstr* kill = 0;
-
- // The code is still in SSA form at this point, so there is only one
- // definition per VReg. Thus we can safely use MRI->getVRegDef().
- const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo();
-
- bool interference = false;
-
- // Wallk the block, checking for interferences
- for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end();
- MBI != MBE; ++MBI) {
- MachineInstr* curr = MBI;
-
- // Same defining block...
- if (mode == 0) {
- if (curr == MRI->getVRegDef(a)) {
- // If we find our first definition, save it
- if (!def) {
- def = curr;
- // If there's already an unkilled definition, then
- // this is an interference
- } else if (!kill) {
- interference = true;
- break;
- // If there's a definition followed by a KillInst, then
- // they can't interfere
- } else {
- interference = false;
- break;
- }
- // Symmetric with the above
- } else if (curr == MRI->getVRegDef(b)) {
- if (!def) {
- def = curr;
- } else if (!kill) {
- interference = true;
- break;
- } else {
- interference = false;
- break;
- }
- // Store KillInsts if they match up with the definition
- } else if (curr->killsRegister(a)) {
- if (def == MRI->getVRegDef(a)) {
- kill = curr;
- } else if (curr->killsRegister(b)) {
- if (def == MRI->getVRegDef(b)) {
- kill = curr;
- }
- }
- }
- // First properly dominates second...
- } else if (mode == 1) {
- if (curr == MRI->getVRegDef(b)) {
- // Definition of second without kill of first is an interference
- if (!kill) {
- interference = true;
- break;
- // Definition after a kill is a non-interference
- } else {
- interference = false;
- break;
- }
- // Save KillInsts of First
- } else if (curr->killsRegister(a)) {
- kill = curr;
- }
- // Symmetric with the above
- } else if (mode == 2) {
- if (curr == MRI->getVRegDef(a)) {
- if (!kill) {
- interference = true;
- break;
- } else {
- interference = false;
- break;
- }
- } else if (curr->killsRegister(b)) {
- kill = curr;
+ for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+ MachineOperand &SrcMO = BBI->getOperand(i);
+ unsigned SrcReg = SrcMO.getReg();
+ addReg(SrcReg);
+ unionRegs(DestReg, SrcReg);
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI)
+ PHISrcDefs[DefMI->getParent()].push_back(DefMI);
}
}
}
-
- return interference;
-}
-/// processBlock - Determine how to break up PHIs in the current block. Each
-/// PHI is broken up by some combination of renaming its operands and inserting
-/// copies. This method is responsible for determining which operands receive
-/// which treatment.
-void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
-
- // Holds names that have been added to a set in any PHI within this block
- // before the current one.
- std::set<unsigned> ProcessedNames;
-
- // Iterate over all the PHI nodes in this block
- MachineBasicBlock::iterator P = MBB->begin();
- while (P != MBB->end() && P->isPHI()) {
- unsigned DestReg = P->getOperand(0).getReg();
-
- // Don't both doing PHI elimination for dead PHI's.
- if (P->registerDefIsDead(DestReg)) {
- ++P;
- continue;
- }
+ // Perform a depth-first traversal of the dominator tree, splitting
+ // interferences amongst PHI-congruence classes.
+ DenseMap<unsigned, unsigned> CurrentDominatingParent;
+ DenseMap<unsigned, unsigned> ImmediateDominatingParent;
+ for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
+ DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
+ SplitInterferencesForBasicBlock(*DI->getBlock(),
+ CurrentDominatingParent,
+ ImmediateDominatingParent);
+ }
- LiveInterval& PI = LI.getOrCreateInterval(DestReg);
- SlotIndex pIdx = LI.getInstructionIndex(P).getDefIndex();
- VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno;
- PhiValueNumber.insert(std::make_pair(DestReg, PVN->id));
-
- // PHIUnion is the set of incoming registers to the PHI node that
- // are going to be renames rather than having copies inserted. This set
- // is refinded over the course of this function. UnionedBlocks is the set
- // of corresponding MBBs.
- std::map<unsigned, MachineBasicBlock*> PHIUnion;
- SmallPtrSet<MachineBasicBlock*, 8> UnionedBlocks;
-
- // Iterate over the operands of the PHI node
- for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
- unsigned SrcReg = P->getOperand(i-1).getReg();
-
- // Don't need to try to coalesce a register with itself.
- if (SrcReg == DestReg) {
- ProcessedNames.insert(SrcReg);
- continue;
- }
-
- // We don't need to insert copies for implicit_defs.
- MachineInstr* DefMI = MRI.getVRegDef(SrcReg);
- if (DefMI->isImplicitDef())
- ProcessedNames.insert(SrcReg);
-
- // Check for trivial interferences via liveness information, allowing us
- // to avoid extra work later. Any registers that interfere cannot both
- // be in the renaming set, so choose one and add copies for it instead.
- // The conditions are:
- // 1) if the operand is live into the PHI node's block OR
- // 2) if the PHI node is live out of the operand's defining block OR
- // 3) if the operand is itself a PHI node and the original PHI is
- // live into the operand's defining block OR
- // 4) if the operand is already being renamed for another PHI node
- // in this block OR
- // 5) if any two operands are defined in the same block, insert copies
- // for one of them
- if (isLiveIn(SrcReg, P->getParent(), LI) ||
- isLiveOut(P->getOperand(0).getReg(),
- MRI.getVRegDef(SrcReg)->getParent(), LI) ||
- ( MRI.getVRegDef(SrcReg)->isPHI() &&
- isLiveIn(P->getOperand(0).getReg(),
- MRI.getVRegDef(SrcReg)->getParent(), LI) ) ||
- ProcessedNames.count(SrcReg) ||
- UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) {
-
- // Add a copy for the selected register
- MachineBasicBlock* From = P->getOperand(i).getMBB();
- Waiting[From].insert(std::make_pair(SrcReg, DestReg));
- UsedByAnother.insert(SrcReg);
- } else {
- // Otherwise, add it to the renaming set
- PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB()));
- UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent());
- }
+ // Insert copies for all PHI source and destination registers.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ InsertCopiesForPHI(BBI, I);
}
-
- // Compute the dominator forest for the renaming set. This is a forest
- // where the nodes are the registers and the edges represent dominance
- // relations between the defining blocks of the registers
- std::vector<StrongPHIElimination::DomForestNode*> DF =
- computeDomForest(PHIUnion, MRI);
-
- // Walk DomForest to resolve interferences at an inter-block level. This
- // will remove registers from the renaming set (and insert copies for them)
- // if interferences are found.
- std::vector<std::pair<unsigned, unsigned> > localInterferences;
- processPHIUnion(P, PHIUnion, DF, localInterferences);
-
- // If one of the inputs is defined in the same block as the current PHI
- // then we need to check for a local interference between that input and
- // the PHI.
- for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
- E = PHIUnion.end(); I != E; ++I)
- if (MRI.getVRegDef(I->first)->getParent() == P->getParent())
- localInterferences.push_back(std::make_pair(I->first,
- P->getOperand(0).getReg()));
-
- // The dominator forest walk may have returned some register pairs whose
- // interference cannot be determined from dominator analysis. We now
- // examine these pairs for local interferences.
- for (std::vector<std::pair<unsigned, unsigned> >::iterator I =
- localInterferences.begin(), E = localInterferences.end(); I != E; ++I) {
- std::pair<unsigned, unsigned> p = *I;
-
- MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
-
- // Determine the block we need to scan and the relationship between
- // the two registers
- MachineBasicBlock* scan = 0;
- unsigned mode = 0;
- if (MRI.getVRegDef(p.first)->getParent() ==
- MRI.getVRegDef(p.second)->getParent()) {
- scan = MRI.getVRegDef(p.first)->getParent();
- mode = 0; // Same block
- } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(),
- MRI.getVRegDef(p.second)->getParent())) {
- scan = MRI.getVRegDef(p.second)->getParent();
- mode = 1; // First dominates second
- } else {
- scan = MRI.getVRegDef(p.first)->getParent();
- mode = 2; // Second dominates first
- }
-
- // If there's an interference, we need to insert copies
- if (interferes(p.first, p.second, scan, LI, mode)) {
- // Insert copies for First
- for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
- if (P->getOperand(i-1).getReg() == p.first) {
- unsigned SrcReg = p.first;
- MachineBasicBlock* From = P->getOperand(i).getMBB();
-
- Waiting[From].insert(std::make_pair(SrcReg,
- P->getOperand(0).getReg()));
- UsedByAnother.insert(SrcReg);
-
- PHIUnion.erase(SrcReg);
- }
- }
+ }
+
+ // FIXME: Preserve the equivalence classes during copy insertion and use
+ // the preversed equivalence classes instead of recomputing them.
+ RegNodeMap.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ unsigned DestReg = BBI->getOperand(0).getReg();
+ addReg(DestReg);
+
+ for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+ unsigned SrcReg = BBI->getOperand(i).getReg();
+ addReg(SrcReg);
+ unionRegs(DestReg, SrcReg);
}
}
-
- // Add the renaming set for this PHI node to our overall renaming information
- for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(),
- QE = PHIUnion.end(); QI != QE; ++QI) {
- DEBUG(dbgs() << "Adding Renaming: " << QI->first << " -> "
- << P->getOperand(0).getReg() << "\n");
- }
-
- RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion));
-
- // Remember which registers are already renamed, so that we don't try to
- // rename them for another PHI node in this block
- for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
- E = PHIUnion.end(); I != E; ++I)
- ProcessedNames.insert(I->first);
-
- ++P;
}
-}
-/// processPHIUnion - Take a set of candidate registers to be coalesced when
-/// decomposing the PHI instruction. Use the DominanceForest to remove the ones
-/// that are known to interfere, and flag others that need to be checked for
-/// local interferences.
-void StrongPHIElimination::processPHIUnion(MachineInstr* Inst,
- std::map<unsigned, MachineBasicBlock*>& PHIUnion,
- std::vector<StrongPHIElimination::DomForestNode*>& DF,
- std::vector<std::pair<unsigned, unsigned> >& locals) {
-
- std::vector<DomForestNode*> worklist(DF.begin(), DF.end());
- SmallPtrSet<DomForestNode*, 4> visited;
-
- // Code is still in SSA form, so we can use MRI::getVRegDef()
- MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo();
-
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- unsigned DestReg = Inst->getOperand(0).getReg();
-
- // DF walk on the DomForest
- while (!worklist.empty()) {
- DomForestNode* DFNode = worklist.back();
-
- visited.insert(DFNode);
-
- bool inserted = false;
- for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end();
- CI != CE; ++CI) {
- DomForestNode* child = *CI;
-
- // If the current node is live-out of the defining block of one of its
- // children, insert a copy for it. NOTE: The paper actually calls for
- // a more elaborate heuristic for determining whether to insert copies
- // for the child or the parent. In the interest of simplicity, we're
- // just always choosing the parent.
- if (isLiveOut(DFNode->getReg(),
- MRI.getVRegDef(child->getReg())->getParent(), LI)) {
- // Insert copies for parent
- for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) {
- if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) {
- unsigned SrcReg = DFNode->getReg();
- MachineBasicBlock* From = Inst->getOperand(i).getMBB();
-
- Waiting[From].insert(std::make_pair(SrcReg, DestReg));
- UsedByAnother.insert(SrcReg);
-
- PHIUnion.erase(SrcReg);
- }
- }
-
- // If a node is live-in to the defining block of one of its children, but
- // not live-out, then we need to scan that block for local interferences.
- } else if (isLiveIn(DFNode->getReg(),
- MRI.getVRegDef(child->getReg())->getParent(), LI) ||
- MRI.getVRegDef(DFNode->getReg())->getParent() ==
- MRI.getVRegDef(child->getReg())->getParent()) {
- // Add (p, c) to possible local interferences
- locals.push_back(std::make_pair(DFNode->getReg(), child->getReg()));
+ DenseMap<unsigned, unsigned> RegRenamingMap;
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ while (BBI != BBE && BBI->isPHI()) {
+ MachineInstr *PHI = BBI;
+
+ assert(PHI->getNumOperands() > 0);
+
+ unsigned SrcReg = PHI->getOperand(1).getReg();
+ unsigned SrcColor = getRegColor(SrcReg);
+ unsigned NewReg = RegRenamingMap[SrcColor];
+ if (!NewReg) {
+ NewReg = SrcReg;
+ RegRenamingMap[SrcColor] = SrcReg;
}
-
- if (!visited.count(child)) {
- worklist.push_back(child);
- inserted = true;
+ MergeLIsAndRename(SrcReg, NewReg);
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ if (!InsertedDestCopies.count(DestReg))
+ MergeLIsAndRename(DestReg, NewReg);
+
+ for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
+ unsigned SrcReg = PHI->getOperand(i).getReg();
+ MergeLIsAndRename(SrcReg, NewReg);
}
+
+ ++BBI;
+ LI->RemoveMachineInstrFromMaps(PHI);
+ PHI->eraseFromParent();
+ Changed = true;
}
-
- if (!inserted) worklist.pop_back();
}
-}
-/// ScheduleCopies - Insert copies into predecessor blocks, scheduling
-/// them properly so as to avoid the 'lost copy' and the 'virtual swap'
-/// problems.
-///
-/// Based on "Practical Improvements to the Construction and Destruction
-/// of Static Single Assignment Form" by Briggs, et al.
-void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
- std::set<unsigned>& pushed) {
- // FIXME: This function needs to update LiveIntervals
- std::multimap<unsigned, unsigned>& copy_set= Waiting[MBB];
-
- std::multimap<unsigned, unsigned> worklist;
- std::map<unsigned, unsigned> map;
-
- // Setup worklist of initial copies
- for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
- E = copy_set.end(); I != E; ) {
- map.insert(std::make_pair(I->first, I->first));
- map.insert(std::make_pair(I->second, I->second));
-
- if (!UsedByAnother.count(I->second)) {
- worklist.insert(*I);
-
- // Avoid iterator invalidation
- std::multimap<unsigned, unsigned>::iterator OI = I;
- ++I;
- copy_set.erase(OI);
- } else {
- ++I;
+ // Due to the insertion of copies to split live ranges, the live intervals are
+ // guaranteed to not overlap, except in one case: an original PHI source and a
+ // PHI destination copy. In this case, they have the same value and thus don't
+ // truly intersect, so we merge them into the value live at that point.
+ // FIXME: Is there some better way we can handle this?
+ for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
+ E = InsertedDestCopies.end(); I != E; ++I) {
+ unsigned DestReg = I->first;
+ unsigned DestColor = getRegColor(DestReg);
+ unsigned NewReg = RegRenamingMap[DestColor];
+
+ LiveInterval &DestLI = LI->getInterval(DestReg);
+ LiveInterval &NewLI = LI->getInterval(NewReg);
+
+ assert(DestLI.ranges.size() == 1
+ && "PHI destination copy's live interval should be a single live "
+ "range from the beginning of the BB to the copy instruction.");
+ LiveRange *DestLR = DestLI.begin();
+ VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
+ if (!NewVNI) {
+ NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
+ MachineInstr *CopyInstr = I->second;
+ CopyInstr->getOperand(1).setIsKill(true);
}
+
+ LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
+ NewLI.addRange(NewLR);
+
+ LI->removeInterval(DestReg);
+ MRI->replaceRegWith(DestReg, NewReg);
}
-
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- MachineFunction* MF = MBB->getParent();
- MachineRegisterInfo& MRI = MF->getRegInfo();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-
- SmallVector<std::pair<unsigned, MachineInstr*>, 4> InsertedPHIDests;
-
- // Iterate over the worklist, inserting copies
- while (!worklist.empty() || !copy_set.empty()) {
- while (!worklist.empty()) {
- std::multimap<unsigned, unsigned>::iterator WI = worklist.begin();
- std::pair<unsigned, unsigned> curr = *WI;
- worklist.erase(WI);
-
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first);
-
- if (isLiveOut(curr.second, MBB, LI)) {
- // Create a temporary
- unsigned t = MF->getRegInfo().createVirtualRegister(RC);
-
- // Insert copy from curr.second to a temporary at
- // the Phi defining curr.second
- MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second);
- BuildMI(*PI->getParent(), PI, DebugLoc(), TII->get(TargetOpcode::COPY),
- t).addReg(curr.second);
- DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t
- << "\n");
-
- // Push temporary on Stacks
- Stacks[curr.second].push_back(t);
-
- // Insert curr.second in pushed
- pushed.insert(curr.second);
-
- // Create a live interval for this temporary
- InsertedPHIDests.push_back(std::make_pair(t, --PI));
- }
-
- // Insert copy from map[curr.first] to curr.second
- BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
- TII->get(TargetOpcode::COPY), curr.second).addReg(map[curr.first]);
- map[curr.first] = curr.second;
- DEBUG(dbgs() << "Inserted copy from " << curr.first << " to "
- << curr.second << "\n");
-
- // Push this copy onto InsertedPHICopies so we can
- // update LiveIntervals with it.
- MachineBasicBlock::iterator MI = MBB->getFirstTerminator();
- InsertedPHIDests.push_back(std::make_pair(curr.second, --MI));
-
- // If curr.first is a destination in copy_set...
- for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
- E = copy_set.end(); I != E; )
- if (curr.first == I->second) {
- std::pair<unsigned, unsigned> temp = *I;
- worklist.insert(temp);
-
- // Avoid iterator invalidation
- std::multimap<unsigned, unsigned>::iterator OI = I;
- ++I;
- copy_set.erase(OI);
-
- break;
- } else {
- ++I;
- }
- }
-
- if (!copy_set.empty()) {
- std::multimap<unsigned, unsigned>::iterator CI = copy_set.begin();
- std::pair<unsigned, unsigned> curr = *CI;
- worklist.insert(curr);
- copy_set.erase(CI);
-
- LiveInterval& I = LI.getInterval(curr.second);
- MachineBasicBlock::iterator term = MBB->getFirstTerminator();
- SlotIndex endIdx = SlotIndex();
- if (term != MBB->end())
- endIdx = LI.getInstructionIndex(term);
- else
- endIdx = LI.getMBBEndIdx(MBB);
-
- if (I.liveAt(endIdx)) {
- const TargetRegisterClass *RC =
- MF->getRegInfo().getRegClass(curr.first);
-
- // Insert a copy from dest to a new temporary t at the end of b
- unsigned t = MF->getRegInfo().createVirtualRegister(RC);
- BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
- TII->get(TargetOpcode::COPY), t).addReg(curr.second);
- map[curr.second] = t;
-
- MachineBasicBlock::iterator TI = MBB->getFirstTerminator();
- InsertedPHIDests.push_back(std::make_pair(t, --TI));
+
+ // Adjust the live intervals of all PHI source registers to handle the case
+ // where the PHIs in successor blocks were the only later uses of the source
+ // register.
+ for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
+ E = InsertedSrcCopySet.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I->first;
+ unsigned SrcReg = I->second;
+ if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
+ SrcReg = RenamedRegister;
+
+ LiveInterval &SrcLI = LI->getInterval(SrcReg);
+
+ bool isLiveOut = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
+ isLiveOut = true;
+ break;
}
}
+
+ if (isLiveOut)
+ continue;
+
+ MachineOperand *LastUse = findLastUse(MBB, SrcReg);
+ assert(LastUse);
+ SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
+ SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
+ LastUse->setIsKill(true);
}
-
- // Renumber the instructions so that we can perform the index computations
- // needed to create new live intervals.
- LI.renumber();
-
- // For copies that we inserted at the ends of predecessors, we construct
- // live intervals. This is pretty easy, since we know that the destination
- // register cannot have be in live at that point previously. We just have
- // to make sure that, for registers that serve as inputs to more than one
- // PHI, we don't create multiple overlapping live intervals.
- std::set<unsigned> RegHandled;
- for (SmallVector<std::pair<unsigned, MachineInstr*>, 4>::iterator I =
- InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) {
- if (RegHandled.insert(I->first).second) {
- LiveInterval& Int = LI.getOrCreateInterval(I->first);
- SlotIndex instrIdx = LI.getInstructionIndex(I->second);
- if (Int.liveAt(instrIdx.getDefIndex()))
- Int.removeRange(instrIdx.getDefIndex(),
- LI.getMBBEndIdx(I->second->getParent()).getNextSlot(),
- true);
-
- LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second);
- R.valno->setCopy(I->second);
- R.valno->def = LI.getInstructionIndex(I->second).getDefIndex();
- }
+
+ LI->renumber();
+
+ Allocator.Reset();
+ RegNodeMap.clear();
+ PHISrcDefs.clear();
+ InsertedSrcCopySet.clear();
+ InsertedSrcCopyMap.clear();
+ InsertedDestCopies.clear();
+
+ return Changed;
+}
+
+void StrongPHIElimination::addReg(unsigned Reg) {
+ if (RegNodeMap.count(Reg))
+ return;
+ RegNodeMap[Reg] = new (Allocator) Node(Reg);
+}
+
+StrongPHIElimination::Node*
+StrongPHIElimination::Node::getLeader() {
+ Node *N = this;
+ Node *Parent = parent.getPointer();
+ Node *Grandparent = Parent->parent.getPointer();
+
+ while (Parent != Grandparent) {
+ N->parent.setPointer(Grandparent);
+ N = Grandparent;
+ Parent = Parent->parent.getPointer();
+ Grandparent = Parent->parent.getPointer();
}
+
+ return Parent;
}
-/// InsertCopies - insert copies into MBB and all of its successors
-void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
- SmallPtrSet<MachineBasicBlock*, 16>& visited) {
- MachineBasicBlock* MBB = MDTN->getBlock();
- visited.insert(MBB);
-
- std::set<unsigned> pushed;
-
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- // Rewrite register uses from Stacks
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- if (I->isPHI())
- continue;
-
- for (unsigned i = 0; i < I->getNumOperands(); ++i)
- if (I->getOperand(i).isReg() &&
- Stacks[I->getOperand(i).getReg()].size()) {
- // Remove the live range for the old vreg.
- LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg());
- LiveInterval::iterator OldLR =
- OldInt.FindLiveRangeContaining(LI.getInstructionIndex(I).getUseIndex());
- if (OldLR != OldInt.end())
- OldInt.removeRange(*OldLR, true);
-
- // Change the register
- I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back());
-
- // Add a live range for the new vreg
- LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
- VNInfo* FirstVN = *Int.vni_begin();
- FirstVN->setHasPHIKill(false);
- LiveRange LR (LI.getMBBStartIdx(I->getParent()),
- LI.getInstructionIndex(I).getUseIndex().getNextSlot(),
- FirstVN);
-
- Int.addRange(LR);
- }
- }
-
- // Schedule the copies for this block
- ScheduleCopies(MBB, pushed);
-
- // Recur down the dominator tree.
- for (MachineDomTreeNode::iterator I = MDTN->begin(),
- E = MDTN->end(); I != E; ++I)
- if (!visited.count((*I)->getBlock()))
- InsertCopies(*I, visited);
-
- // As we exit this block, pop the names we pushed while processing it
- for (std::set<unsigned>::iterator I = pushed.begin(),
- E = pushed.end(); I != E; ++I)
- Stacks[*I].pop_back();
+unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
+ DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
+ if (RI == RegNodeMap.end())
+ return 0;
+ Node *Node = RI->second;
+ if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
+ return 0;
+ return Node->getLeader()->value;
}
-bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
- unsigned secondary) {
-
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- LiveInterval& LHS = LI.getOrCreateInterval(primary);
- LiveInterval& RHS = LI.getOrCreateInterval(secondary);
-
- LI.renumber();
-
- DenseMap<VNInfo*, VNInfo*> VNMap;
- for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- LiveRange R = *I;
-
- SlotIndex Start = R.start;
- SlotIndex End = R.end;
- if (LHS.getLiveRangeContaining(Start))
- return false;
-
- if (LHS.getLiveRangeContaining(End))
- return false;
-
- LiveInterval::iterator RI = std::upper_bound(LHS.begin(), LHS.end(), R);
- if (RI != LHS.end() && RI->start < End)
- return false;
+void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
+ Node *Node1 = RegNodeMap[Reg1]->getLeader();
+ Node *Node2 = RegNodeMap[Reg2]->getLeader();
+
+ if (Node1->rank > Node2->rank) {
+ Node2->parent.setPointer(Node1->getLeader());
+ } else if (Node1->rank < Node2->rank) {
+ Node1->parent.setPointer(Node2->getLeader());
+ } else if (Node1 != Node2) {
+ Node2->parent.setPointer(Node1->getLeader());
+ Node1->rank++;
}
-
- for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- LiveRange R = *I;
- VNInfo* OldVN = R.valno;
- VNInfo*& NewVN = VNMap[OldVN];
- if (!NewVN) {
- NewVN = LHS.createValueCopy(OldVN, LI.getVNInfoAllocator());
- }
-
- LiveRange LR (R.start, R.end, NewVN);
- LHS.addRange(LR);
+}
+
+void StrongPHIElimination::isolateReg(unsigned Reg) {
+ Node *Node = RegNodeMap[Reg];
+ Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
+}
+
+unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
+ assert(PHI->isPHI());
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ Node *DestNode = RegNodeMap[DestReg];
+ if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
+ return 0;
+
+ for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+ unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
+ if (SrcColor)
+ return SrcColor;
}
-
- LI.removeInterval(RHS.reg);
-
- return true;
+ return 0;
}
-bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
-
- // Compute DFS numbers of each block
- computeDFS(Fn);
-
- // Determine which phi node operands need copies
- for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
- if (!I->empty() && I->begin()->isPHI())
- processBlock(I);
-
- // Break interferences where two different phis want to coalesce
- // in the same register.
- std::set<unsigned> seen;
- typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> >
- RenameSetType;
- for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
- I != E; ++I) {
- for (std::map<unsigned, MachineBasicBlock*>::iterator
- OI = I->second.begin(), OE = I->second.end(); OI != OE; ) {
- if (!seen.count(OI->first)) {
- seen.insert(OI->first);
- ++OI;
+void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
+ assert(PHI->isPHI());
+ Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
+ Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
+}
+
+/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
+/// interferences found between registers in the same congruence class. It
+/// takes two DenseMaps as arguments that it also updates:
+///
+/// 1) CurrentDominatingParent, which maps a color to the register in that
+/// congruence class whose definition was most recently seen.
+///
+/// 2) ImmediateDominatingParent, which maps a register to the register in the
+/// same congruence class that most immediately dominates it.
+///
+/// This function assumes that it is being called in a depth-first traversal
+/// of the dominator tree.
+///
+/// The algorithm used here is a generalization of the dominance-based SSA test
+/// for two variables. If there are variables a_1, ..., a_n such that
+///
+/// def(a_1) dom ... dom def(a_n),
+///
+/// then we can test for an interference between any two a_i by only using O(n)
+/// interference tests between pairs of variables. If i < j and a_i and a_j
+/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
+/// Thus, in order to test for an interference involving a_i, we need only check
+/// for a potential interference with a_i+1.
+///
+/// This method can be generalized to arbitrary sets of variables by performing
+/// a depth-first traversal of the dominator tree. As we traverse down a branch
+/// of the dominator tree, we keep track of the current dominating variable and
+/// only perform an interference test with that variable. However, when we go to
+/// another branch of the dominator tree, the definition of the current dominating
+/// variable may no longer dominate the current block. In order to correct this,
+/// we need to use a stack of past choices of the current dominating variable
+/// and pop from this stack until we find a variable whose definition actually
+/// dominates the current block.
+///
+/// There will be one push on this stack for each variable that has become the
+/// current dominating variable, so instead of using an explicit stack we can
+/// simply associate the previous choice for a current dominating variable with
+/// the new choice. This works better in our implementation, where we test for
+/// interference in multiple distinct sets at once.
+void
+StrongPHIElimination::SplitInterferencesForBasicBlock(
+ MachineBasicBlock &MBB,
+ DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+ DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
+ // Sort defs by their order in the original basic block, as the code below
+ // assumes that it is processing definitions in dominance order.
+ std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
+ std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
+
+ for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
+ BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
+ for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
+ E = (*BBI)->operands_end(); I != E; ++I) {
+ const MachineOperand &MO = *I;
+
+ // FIXME: This would be faster if it were possible to bail out of checking
+ // an instruction's operands after the explicit defs, but this is incorrect
+ // for variadic instructions, which may appear before register allocation
+ // in the future.
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned DestReg = MO.getReg();
+ if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // If the virtual register being defined is not used in any PHI or has
+ // already been isolated, then there are no more interferences to check.
+ unsigned DestColor = getRegColor(DestReg);
+ if (!DestColor)
+ continue;
+
+ // The input to this pass sometimes is not in SSA form in every basic
+ // block, as some virtual registers have redefinitions. We could eliminate
+ // this by fixing the passes that generate the non-SSA code, or we could
+ // handle it here by tracking defining machine instructions rather than
+ // virtual registers. For now, we just handle the situation conservatively
+ // in a way that will possibly lead to false interferences.
+ unsigned &CurrentParent = CurrentDominatingParent[DestColor];
+ unsigned NewParent = CurrentParent;
+ if (NewParent == DestReg)
+ continue;
+
+ // Pop registers from the stack represented by ImmediateDominatingParent
+ // until we find a parent that dominates the current instruction.
+ while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
+ || !getRegColor(NewParent)))
+ NewParent = ImmediateDominatingParent[NewParent];
+
+ // If NewParent is nonzero, then its definition dominates the current
+ // instruction, so it is only necessary to check for the liveness of
+ // NewParent in order to check for an interference.
+ if (NewParent
+ && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
+ // If there is an interference, always isolate the new register. This
+ // could be improved by using a heuristic that decides which of the two
+ // registers to isolate.
+ isolateReg(DestReg);
+ CurrentParent = NewParent;
} else {
- Waiting[OI->second].insert(std::make_pair(OI->first, I->first));
- unsigned reg = OI->first;
- ++OI;
- I->second.erase(reg);
- DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first
- << "\n");
+ // If there is no interference, update ImmediateDominatingParent and set
+ // the CurrentDominatingParent for this color to the current register.
+ ImmediateDominatingParent[DestReg] = NewParent;
+ CurrentParent = DestReg;
}
}
}
-
- // Insert copies
- // FIXME: This process should probably preserve LiveIntervals
- SmallPtrSet<MachineBasicBlock*, 16> visited;
- MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
- InsertCopies(MDT.getRootNode(), visited);
-
- // Perform renaming
- for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
- I != E; ++I)
- while (I->second.size()) {
- std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin();
-
- DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n");
-
- if (SI->first != I->first) {
- if (mergeLiveIntervals(I->first, SI->first)) {
- Fn.getRegInfo().replaceRegWith(SI->first, I->first);
-
- if (RenameSets.count(SI->first)) {
- I->second.insert(RenameSets[SI->first].begin(),
- RenameSets[SI->first].end());
- RenameSets.erase(SI->first);
- }
- } else {
- // Insert a last-minute copy if a conflict was detected.
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
- BuildMI(*SI->second, SI->second->getFirstTerminator(), DebugLoc(),
- TII->get(TargetOpcode::COPY), I->first).addReg(SI->first);
-
- LI.renumber();
-
- LiveInterval& Int = LI.getOrCreateInterval(I->first);
- SlotIndex instrIdx =
- LI.getInstructionIndex(--SI->second->getFirstTerminator());
- if (Int.liveAt(instrIdx.getDefIndex()))
- Int.removeRange(instrIdx.getDefIndex(),
- LI.getMBBEndIdx(SI->second).getNextSlot(), true);
-
- LiveRange R = LI.addLiveRangeToEndOfBlock(I->first,
- --SI->second->getFirstTerminator());
- R.valno->setCopy(--SI->second->getFirstTerminator());
- R.valno->def = instrIdx.getDefIndex();
-
- DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> "
- << I->first << "\n");
- }
+
+ // We now walk the PHIs in successor blocks and check for interferences. This
+ // is necesary because the use of a PHI's operands are logically contained in
+ // the predecessor block. The def of a PHI's destination register is processed
+ // along with the other defs in a basic block.
+
+ CurrentPHIForColor.clear();
+
+ for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ MachineInstr *PHI = BBI;
+
+ // If a PHI is already isolated, either by being isolated directly or
+ // having all of its operands isolated, ignore it.
+ unsigned Color = getPHIColor(PHI);
+ if (!Color)
+ continue;
+
+ // Find the index of the PHI operand that corresponds to this basic block.
+ unsigned PredIndex;
+ for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
+ if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
+ break;
}
-
- LiveInterval& Int = LI.getOrCreateInterval(I->first);
- const LiveRange* LR =
- Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second));
- LR->valno->setHasPHIKill(true);
-
- I->second.erase(SI->first);
+ assert(PredIndex < PHI->getNumOperands());
+ unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
+
+ // Pop registers from the stack represented by ImmediateDominatingParent
+ // until we find a parent that dominates the current instruction.
+ unsigned &CurrentParent = CurrentDominatingParent[Color];
+ unsigned NewParent = CurrentParent;
+ while (NewParent
+ && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
+ || !getRegColor(NewParent)))
+ NewParent = ImmediateDominatingParent[NewParent];
+ CurrentParent = NewParent;
+
+ // If there is an interference with a register, always isolate the
+ // register rather than the PHI. It is also possible to isolate the
+ // PHI, but that introduces copies for all of the registers involved
+ // in that PHI.
+ if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
+ && NewParent != PredOperandReg)
+ isolateReg(NewParent);
+
+ std::pair<MachineInstr*, unsigned>
+ &CurrentPHI = CurrentPHIForColor[Color];
+
+ // If two PHIs have the same operand from every shared predecessor, then
+ // they don't actually interfere. Otherwise, isolate the current PHI. This
+ // could possibly be improved, e.g. we could isolate the PHI with the
+ // fewest operands.
+ if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
+ isolatePHI(PHI);
+ else
+ CurrentPHI = std::make_pair(PHI, PredOperandReg);
}
-
- // Remove PHIs
- std::vector<MachineInstr*> phis;
- for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
- for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end();
- BI != BE; ++BI)
- if (BI->isPHI())
- phis.push_back(BI);
}
-
- for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end();
- I != E; ) {
- MachineInstr* PInstr = *(I++);
-
- // If this is a dead PHI node, then remove it from LiveIntervals.
- unsigned DestReg = PInstr->getOperand(0).getReg();
- LiveInterval& PI = LI.getInterval(DestReg);
- if (PInstr->registerDefIsDead(DestReg)) {
- if (PI.containsOneValue()) {
- LI.removeInterval(DestReg);
+}
+
+void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
+ MachineBasicBlock *MBB) {
+ assert(PHI->isPHI());
+ ++NumPHIsLowered;
+ unsigned PHIColor = getPHIColor(PHI);
+
+ for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+ MachineOperand &SrcMO = PHI->getOperand(i);
+
+ // If a source is defined by an implicit def, there is no need to insert a
+ // copy in the predecessor.
+ if (SrcMO.isUndef())
+ continue;
+
+ unsigned SrcReg = SrcMO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
+ unsigned SrcColor = getRegColor(SrcReg);
+
+ // If neither the PHI nor the operand were isolated, then we only need to
+ // set the phi-kill flag on the VNInfo at this PHI.
+ if (PHIColor && SrcColor == PHIColor) {
+ LiveInterval &SrcInterval = LI->getInterval(SrcReg);
+ SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
+ VNInfo *SrcVNI = SrcInterval.getVNInfoAt(PredIndex.getPrevIndex());
+ assert(SrcVNI);
+ SrcVNI->setHasPHIKill(true);
+ continue;
+ }
+
+ unsigned CopyReg = 0;
+ if (PHIColor) {
+ SrcCopyMap::const_iterator I
+ = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
+ CopyReg
+ = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
+ }
+
+ if (!CopyReg) {
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ CopyReg = MRI->createVirtualRegister(RC);
+
+ MachineBasicBlock::iterator
+ CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
+ unsigned SrcSubReg = SrcMO.getSubReg();
+ MachineInstr *CopyInstr = BuildMI(*PredBB,
+ CopyInsertPoint,
+ PHI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyReg).addReg(SrcReg, 0, SrcSubReg);
+ LI->InsertMachineInstrInMaps(CopyInstr);
+ ++NumSrcCopiesInserted;
+
+ // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
+ // the newly added range.
+ LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
+ InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
+
+ addReg(CopyReg);
+ if (PHIColor) {
+ unionRegs(PHIColor, CopyReg);
+ assert(getRegColor(CopyReg) != CopyReg);
} else {
- SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
- PI.removeRange(*PI.getLiveRangeContaining(idx), true);
- }
- } else {
- // Trim live intervals of input registers. They are no longer live into
- // this block if they died after the PHI. If they lived after it, don't
- // trim them because they might have other legitimate uses.
- for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) {
- unsigned reg = PInstr->getOperand(i).getReg();
-
- MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB();
- LiveInterval& InputI = LI.getInterval(reg);
- if (MBB != PInstr->getParent() &&
- InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) &&
- InputI.expiredAt(LI.getInstructionIndex(PInstr).getNextIndex()))
- InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()),
- LI.getInstructionIndex(PInstr),
- true);
+ PHIColor = CopyReg;
+ assert(getRegColor(CopyReg) == CopyReg);
}
-
- // If the PHI is not dead, then the valno defined by the PHI
- // now has an unknown def.
- SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
- const LiveRange* PLR = PI.getLiveRangeContaining(idx);
- PLR->valno->setIsPHIDef(true);
- LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
- PLR->start, PLR->valno);
- PI.addRange(R);
+
+ if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor)))
+ InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr;
}
-
- LI.RemoveMachineInstrFromMaps(PInstr);
- PInstr->eraseFromParent();
+
+ SrcMO.setReg(CopyReg);
+
+ // If SrcReg is not live beyond the PHI, trim its interval so that it is no
+ // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
+ // processed later, but this is still correct to do at this point because we
+ // never rely on LiveIntervals being correct while inserting copies.
+ // FIXME: Should this just count uses at PHIs like the normal PHIElimination
+ // pass does?
+ LiveInterval &SrcLI = LI->getInterval(SrcReg);
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
+ if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
+ SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
}
-
- LI.renumber();
-
- return true;
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ unsigned DestColor = getRegColor(DestReg);
+
+ if (PHIColor && DestColor == PHIColor) {
+ LiveInterval &DestLI = LI->getInterval(DestReg);
+
+ // Set the phi-def flag for the VN at this PHI.
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
+ assert(DestVNI);
+ DestVNI->setIsPHIDef(true);
+
+ // Prior to PHI elimination, the live ranges of PHIs begin at their defining
+ // instruction. After PHI elimination, PHI instructions are replaced by VNs
+ // with the phi-def flag set, and the live ranges of these VNs start at the
+ // beginning of the basic block.
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ DestVNI->def = MBBStartIndex;
+ DestLI.addRange(LiveRange(MBBStartIndex,
+ PHIIndex.getDefIndex(),
+ DestVNI));
+ return;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
+ unsigned CopyReg = MRI->createVirtualRegister(RC);
+
+ MachineInstr *CopyInstr = BuildMI(*MBB,
+ MBB->SkipPHIsAndLabels(MBB->begin()),
+ PHI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ DestReg).addReg(CopyReg);
+ LI->InsertMachineInstrInMaps(CopyInstr);
+ PHI->getOperand(0).setReg(CopyReg);
+ ++NumDestCopiesInserted;
+
+ // Add the region from the beginning of MBB to the copy instruction to
+ // CopyReg's live interval, and give the VNInfo the phidef flag.
+ LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
+ VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
+ CopyInstr,
+ LI->getVNInfoAllocator());
+ CopyVNI->setIsPHIDef(true);
+ CopyLI.addRange(LiveRange(MBBStartIndex,
+ DestCopyIndex.getDefIndex(),
+ CopyVNI));
+
+ // Adjust DestReg's live interval to adjust for its new definition at
+ // CopyInstr.
+ LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
+
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
+ assert(DestVNI);
+ DestVNI->def = DestCopyIndex.getDefIndex();
+
+ InsertedDestCopies[CopyReg] = CopyInstr;
+}
+
+void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
+ if (Reg == NewReg)
+ return;
+
+ LiveInterval &OldLI = LI->getInterval(Reg);
+ LiveInterval &NewLI = LI->getInterval(NewReg);
+
+ // Merge the live ranges of the two registers.
+ DenseMap<VNInfo*, VNInfo*> VNMap;
+ for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
+ LRI != LRE; ++LRI) {
+ LiveRange OldLR = *LRI;
+ VNInfo *OldVN = OldLR.valno;
+
+ VNInfo *&NewVN = VNMap[OldVN];
+ if (!NewVN) {
+ NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
+ VNMap[OldVN] = NewVN;
+ }
+
+ LiveRange LR(OldLR.start, OldLR.end, NewVN);
+ NewLI.addRange(LR);
+ }
+
+ // Remove the LiveInterval for the register being renamed and replace all
+ // of its defs and uses with the new register.
+ LI->removeInterval(Reg);
+ MRI->replaceRegWith(Reg, NewReg);
}
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index a815b36..04d3d31 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -350,7 +350,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (MO.isDef()) {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
@@ -459,15 +459,19 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
unsigned MaxDuplicateCount;
- if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ if (TailDuplicateSize.getNumOccurrences() == 0 &&
+ MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
MaxDuplicateCount = 1;
else
MaxDuplicateCount = TailDuplicateSize;
if (PreRegAlloc) {
- // Pre-regalloc tail duplication hurts compile time and doesn't help
- // much except for indirect branches.
- if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch())
+ if (TailBB->empty())
+ return false;
+ const TargetInstrDesc &TID = TailBB->back().getDesc();
+ // Pre-regalloc tail duplication hurts compile time and doesn't help
+ // much except for indirect branches and returns.
+ if (!TID.isIndirectBranch() && !TID.isReturn())
return false;
// If the target has hardware branch prediction that can handle indirect
// branches, duplicating them can often make them predictable when there
@@ -500,9 +504,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
if (!I->isPHI() && !I->isDebugValue())
InstrCount += 1;
}
- // Heuristically, don't tail-duplicate calls if it would expand code size,
- // as it's less likely to be worth the extra cost.
- if (InstrCount > 1 && HasCall)
+ // Don't tail-duplicate calls before register allocation. Calls presents a
+ // barrier to register allocation so duplicating them may end up increasing
+ // spills.
+ if (InstrCount > 1 && (PreRegAlloc && HasCall))
return false;
DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
index 6e4a0d8..15340a3 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -22,13 +22,18 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<bool> DisableHazardRecognizer(
+ "disable-sched-hazard", cl::Hidden, cl::init(false),
+ cl::desc("Disable hazard detection during preRA scheduling"));
+
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
/// after it, replacing it with an unconditional branch to NewDest.
void
@@ -135,7 +140,7 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
const TargetInstrDesc &TID = MI->getDesc();
if (!TID.isPredicable())
return false;
-
+
for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
if (TID.OpInfo[i].isPredicate()) {
MachineOperand &MO = MI->getOperand(i);
@@ -166,8 +171,10 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
MBB.insert(I, MI);
}
-bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
- const MachineInstr *MI1) const {
+bool
+TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
@@ -252,9 +259,9 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- Flags, /*Offset=*/0,
- MFI.getObjectSize(FI),
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ Flags, MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
NewMI->addMemOperand(MF, MMO);
@@ -329,8 +336,13 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
const TargetInstrDesc &TID = MI->getDesc();
// Avoid instructions obviously unsafe for remat.
- if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() ||
- TID.mayStore())
+ if (TID.isNotDuplicable() || TID.mayStore() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+
+ // Don't remat inline asm. We have no idea how expensive it is
+ // even if it's side effect free.
+ if (MI->isInlineAsm())
return false;
// Avoid instructions which load from potentially varying memory.
@@ -414,8 +426,24 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
return false;
}
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
+ return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ // Dummy hazard recognizer allows all instructions to issue.
+ return new ScheduleHazardRecognizer();
+}
+
// Default implementation of CreateTargetPostRAHazardRecognizer.
ScheduleHazardRecognizer *TargetInstrInfoImpl::
-CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
- return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II);
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return (ScheduleHazardRecognizer *)
+ new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index f1e10ee..0b7bd98 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -29,10 +29,12 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
using namespace llvm;
using namespace dwarf;
@@ -45,81 +47,81 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
TargetLoweringObjectFile::Initialize(Ctx, TM);
BSSSection =
- getContext().getELFSection(".bss", MCSectionELF::SHT_NOBITS,
- MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".bss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getBSS());
TextSection =
- getContext().getELFSection(".text", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_EXECINSTR |
- MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".text", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_ALLOC,
SectionKind::getText());
DataSection =
- getContext().getELFSection(".data", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".data", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getDataRel());
ReadOnlySection =
- getContext().getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".rodata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
SectionKind::getReadOnly());
TLSDataSection =
- getContext().getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
- MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".tdata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_TLS |
+ ELF::SHF_WRITE,
SectionKind::getThreadData());
TLSBSSSection =
- getContext().getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
- MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".tbss", ELF::SHT_NOBITS,
+ ELF::SHF_ALLOC | ELF::SHF_TLS |
+ ELF::SHF_WRITE,
SectionKind::getThreadBSS());
DataRelSection =
- getContext().getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".data.rel", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
SectionKind::getDataRel());
DataRelLocalSection =
- getContext().getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
SectionKind::getDataRelLocal());
DataRelROSection =
- getContext().getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
SectionKind::getReadOnlyWithRel());
DataRelROLocalSection =
- getContext().getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
SectionKind::getReadOnlyWithRelLocal());
MergeableConst4Section =
- getContext().getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+ getContext().getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
SectionKind::getMergeableConst4());
MergeableConst8Section =
- getContext().getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+ getContext().getELFSection(".rodata.cst8", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
SectionKind::getMergeableConst8());
MergeableConst16Section =
- getContext().getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+ getContext().getELFSection(".rodata.cst16", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
SectionKind::getMergeableConst16());
StaticCtorSection =
- getContext().getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".ctors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
SectionKind::getDataRel());
StaticDtorSection =
- getContext().getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".dtors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
SectionKind::getDataRel());
// Exception Handling Sections.
@@ -129,50 +131,50 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
// runtime hit for C++ apps. Either the contents of the LSDA need to be
// adjusted or this should be a data section.
LSDASection =
- getContext().getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".gcc_except_table", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
SectionKind::getReadOnly());
- EHFrameSection =
- getContext().getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
- SectionKind::getDataRel());
-
// Debug Info Sections.
DwarfAbbrevSection =
- getContext().getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfInfoSection =
- getContext().getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_info", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfLineSection =
- getContext().getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_line", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfFrameSection =
- getContext().getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfPubNamesSection =
- getContext().getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfPubTypesSection =
- getContext().getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfStrSection =
- getContext().getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_str", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfLocSection =
- getContext().getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfARangesSection =
- getContext().getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfRangesSection =
- getContext().getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfMacroInfoSection =
- getContext().getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
}
+const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const {
+ return getContext().getELFSection(".eh_frame", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+}
static SectionKind
getELFKindForNamedSection(StringRef Name, SectionKind K) {
@@ -208,18 +210,18 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
static unsigned getELFSectionType(StringRef Name, SectionKind K) {
if (Name == ".init_array")
- return MCSectionELF::SHT_INIT_ARRAY;
+ return ELF::SHT_INIT_ARRAY;
if (Name == ".fini_array")
- return MCSectionELF::SHT_FINI_ARRAY;
+ return ELF::SHT_FINI_ARRAY;
if (Name == ".preinit_array")
- return MCSectionELF::SHT_PREINIT_ARRAY;
+ return ELF::SHT_PREINIT_ARRAY;
if (K.isBSS() || K.isThreadBSS())
- return MCSectionELF::SHT_NOBITS;
+ return ELF::SHT_NOBITS;
- return MCSectionELF::SHT_PROGBITS;
+ return ELF::SHT_PROGBITS;
}
@@ -228,24 +230,24 @@ getELFSectionFlags(SectionKind K) {
unsigned Flags = 0;
if (!K.isMetadata())
- Flags |= MCSectionELF::SHF_ALLOC;
+ Flags |= ELF::SHF_ALLOC;
if (K.isText())
- Flags |= MCSectionELF::SHF_EXECINSTR;
+ Flags |= ELF::SHF_EXECINSTR;
if (K.isWriteable())
- Flags |= MCSectionELF::SHF_WRITE;
+ Flags |= ELF::SHF_WRITE;
if (K.isThreadLocal())
- Flags |= MCSectionELF::SHF_TLS;
+ Flags |= ELF::SHF_TLS;
// K.isMergeableConst() is left out to honour PR4650
if (K.isMergeableCString() || K.isMergeableConst4() ||
K.isMergeableConst8() || K.isMergeableConst16())
- Flags |= MCSectionELF::SHF_MERGE;
+ Flags |= ELF::SHF_MERGE;
if (K.isMergeableCString())
- Flags |= MCSectionELF::SHF_STRINGS;
+ Flags |= ELF::SHF_STRINGS;
return Flags;
}
@@ -261,23 +263,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
return getContext().getELFSection(SectionName,
getELFSectionType(SectionName, Kind),
- getELFSectionFlags(Kind), Kind, true);
-}
-
-static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
- if (Kind.isText()) return ".gnu.linkonce.t.";
- if (Kind.isReadOnly()) return ".gnu.linkonce.r.";
-
- if (Kind.isThreadData()) return ".gnu.linkonce.td.";
- if (Kind.isThreadBSS()) return ".gnu.linkonce.tb.";
-
- if (Kind.isDataNoRel()) return ".gnu.linkonce.d.";
- if (Kind.isDataRelLocal()) return ".gnu.linkonce.d.rel.local.";
- if (Kind.isDataRel()) return ".gnu.linkonce.d.rel.";
- if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local.";
-
- assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
- return ".gnu.linkonce.d.rel.ro.";
+ getELFSectionFlags(Kind), Kind);
}
/// getSectionPrefixForGlobal - Return the section prefix name used by options
@@ -307,7 +293,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
bool EmitUniquedSection;
if (Kind.isText())
EmitUniquedSection = TM.getFunctionSections();
- else
+ else
EmitUniquedSection = TM.getDataSections();
// If this global is linkonce/weak and the target handles this by emitting it
@@ -315,19 +301,21 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
if ((GV->isWeakForLinker() || EmitUniquedSection) &&
!Kind.isCommon() && !Kind.isBSS()) {
const char *Prefix;
- if (GV->isWeakForLinker())
- Prefix = getSectionPrefixForUniqueGlobal(Kind);
- else {
- assert(EmitUniquedSection);
- Prefix = getSectionPrefixForGlobal(Kind);
- }
+ Prefix = getSectionPrefixForGlobal(Kind);
SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
MCSymbol *Sym = Mang->getSymbol(GV);
Name.append(Sym->getName().begin(), Sym->getName().end());
+ StringRef Group = "";
+ unsigned Flags = getELFSectionFlags(Kind);
+ if (GV->isWeakForLinker()) {
+ Group = Sym->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
+
return getContext().getELFSection(Name.str(),
getELFSectionType(Name.str(), Kind),
- getELFSectionFlags(Kind), Kind);
+ Flags, Kind, 0, Group);
}
if (Kind.isText()) return TextSection;
@@ -352,10 +340,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
std::string Name = SizeSpec + utostr(Align);
- return getContext().getELFSection(Name, MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |
- MCSectionELF::SHF_MERGE |
- MCSectionELF::SHF_STRINGS,
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_MERGE |
+ ELF::SHF_STRINGS,
Kind);
}
@@ -450,7 +438,16 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
IsFunctionEHSymbolGlobal = true;
IsFunctionEHFrameSymbolPrivate = false;
SupportsWeakOmittedEHFrame = false;
-
+
+ Triple T(((LLVMTargetMachine&)TM).getTargetTriple());
+ if (T.getOS() == Triple::Darwin) {
+ unsigned MajNum = T.getDarwinMajorNumber();
+ if (MajNum == 7 || MajNum == 8) // 10.3 Panther, 10.4 Tiger
+ CommDirectiveSupportsAlignment = false;
+ if (MajNum > 9) // 10.6 SnowLeopard
+ IsFunctionEHSymbolGlobal = false;
+ }
+
TargetLoweringObjectFile::Initialize(Ctx, TM);
TextSection // .text
@@ -469,20 +466,20 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
= getContext().getMachOSection("__DATA", "__thread_bss",
MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
SectionKind::getThreadBSS());
-
+
// TODO: Verify datarel below.
TLSTLVSection // .tlv
= getContext().getMachOSection("__DATA", "__thread_vars",
MCSectionMachO::S_THREAD_LOCAL_VARIABLES,
SectionKind::getDataRel());
-
+
TLSThreadInitSection
= getContext().getMachOSection("__DATA", "__thread_init",
MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
SectionKind::getDataRel());
-
+
CStringSection // .cstring
- = getContext().getMachOSection("__TEXT", "__cstring",
+ = getContext().getMachOSection("__TEXT", "__cstring",
MCSectionMachO::S_CSTRING_LITERALS,
SectionKind::getMergeable1ByteCString());
UStringSection
@@ -493,7 +490,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
MCSectionMachO::S_4BYTE_LITERALS,
SectionKind::getMergeableConst4());
EightByteConstantSection // .literal8
- = getContext().getMachOSection("__TEXT", "__literal8",
+ = getContext().getMachOSection("__TEXT", "__literal8",
MCSectionMachO::S_8BYTE_LITERALS,
SectionKind::getMergeableConst8());
@@ -517,14 +514,14 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
SectionKind::getText());
ConstTextCoalSection
- = getContext().getMachOSection("__TEXT", "__const_coal",
+ = getContext().getMachOSection("__TEXT", "__const_coal",
MCSectionMachO::S_COALESCED,
SectionKind::getReadOnly());
ConstDataSection // .const_data
= getContext().getMachOSection("__DATA", "__const", 0,
SectionKind::getReadOnlyWithRel());
DataCoalSection
- = getContext().getMachOSection("__DATA","__datacoal_nt",
+ = getContext().getMachOSection("__DATA","__datacoal_nt",
MCSectionMachO::S_COALESCED,
SectionKind::getDataRel());
DataCommonSection
@@ -534,7 +531,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
DataBSSSection
= getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
SectionKind::getBSS());
-
+
LazySymbolPointerSection
= getContext().getMachOSection("__DATA", "__la_symbol_ptr",
@@ -566,17 +563,9 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
// Exception Handling.
LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
SectionKind::getReadOnlyWithRel());
- EHFrameSection =
- getContext().getMachOSection("__TEXT", "__eh_frame",
- MCSectionMachO::S_COALESCED |
- MCSectionMachO::S_ATTR_NO_TOC |
- MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
- MCSectionMachO::S_ATTR_LIVE_SUPPORT,
- SectionKind::getReadOnly());
-
// Debug Information.
DwarfAbbrevSection =
- getContext().getMachOSection("__DWARF", "__debug_abbrev",
+ getContext().getMachOSection("__DWARF", "__debug_abbrev",
MCSectionMachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
DwarfInfoSection =
@@ -623,10 +612,19 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
getContext().getMachOSection("__DWARF", "__debug_inlined",
MCSectionMachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
-
+
TLSExtraDataSection = TLSTLVSection;
}
+const MCSection *TargetLoweringObjectFileMachO::getEHFrameSection() const {
+ return getContext().getMachOSection("__TEXT", "__eh_frame",
+ MCSectionMachO::S_COALESCED |
+ MCSectionMachO::S_ATTR_NO_TOC |
+ MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+ MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+ SectionKind::getReadOnly());
+}
+
const MCSection *TargetLoweringObjectFileMachO::
getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
@@ -665,7 +663,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
const MCSection *TargetLoweringObjectFileMachO::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
-
+
// Handle thread local data.
if (Kind.isThreadBSS()) return TLSBSSSection;
if (Kind.isThreadData()) return TLSDataSection;
@@ -685,7 +683,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
if (Kind.isMergeable1ByteCString() &&
TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
return CStringSection;
-
+
// Do not put 16-bit arrays in the UString section if they have an
// externally visible label, this runs into issues with certain linker
// versions.
@@ -721,7 +719,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// with the .zerofill directive (aka .lcomm).
if (Kind.isBSSLocal())
return DataBSSSection;
-
+
// Otherwise, just drop the variable in the normal data section.
return DataSection;
}
@@ -858,13 +856,6 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
- EHFrameSection =
- getContext().getCOFFSection(".eh_frame",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
-
// Debug info.
DwarfAbbrevSection =
getContext().getCOFFSection(".debug_abbrev",
@@ -928,6 +919,15 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
SectionKind::getMetadata());
}
+const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const {
+ return getContext().getCOFFSection(".eh_frame",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
+
+
static unsigned
getCOFFSectionFlags(SectionKind K) {
unsigned Flags = 0;
@@ -938,6 +938,7 @@ getCOFFSectionFlags(SectionKind K) {
else if (K.isText())
Flags |=
COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_CNT_CODE;
else if (K.isBSS ())
Flags |=
@@ -967,12 +968,12 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
if (Kind.isText())
- return ".text$linkonce";
+ return ".text$";
if (Kind.isBSS ())
- return ".bss$linkonce";
+ return ".bss$";
if (Kind.isWriteable())
- return ".data$linkonce";
- return ".rdata$linkonce";
+ return ".data$";
+ return ".rdata$";
}
@@ -987,14 +988,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
MCSymbol *Sym = Mang->getSymbol(GV);
- Name.append(Sym->getName().begin(), Sym->getName().end());
+ Name.append(Sym->getName().begin() + 1, Sym->getName().end());
unsigned Characteristics = getCOFFSectionFlags(Kind);
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
return getContext().getCOFFSection(Name.str(), Characteristics,
- COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind);
+ COFF::IMAGE_COMDAT_SELECT_ANY, Kind);
}
if (Kind.isText())
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 78989c5..b3120b8 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -110,7 +110,7 @@ namespace {
bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi,
- unsigned RegB, unsigned Dist);
+ unsigned RegA, unsigned RegB, unsigned Dist);
typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
@@ -138,7 +138,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- TwoAddressInstructionPass() : MachineFunctionPass(ID) {}
+ TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -146,10 +148,7 @@ namespace {
AU.addPreserved<LiveVariables>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
- if (StrongPHIElim)
- AU.addPreservedID(StrongPHIEliminationID);
- else
- AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(PHIEliminationID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -159,8 +158,11 @@ namespace {
}
char TwoAddressInstructionPass::ID = 0;
-INITIALIZE_PASS(TwoAddressInstructionPass, "twoaddressinstruction",
- "Two-Address instruction pass", false, false);
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
@@ -548,8 +550,9 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
unsigned FromRegC = getMappedReg(regC, SrcRegMap);
unsigned ToRegB = getMappedReg(regB, DstRegMap);
unsigned ToRegC = getMappedReg(regC, DstRegMap);
- if (!regsAreCompatible(FromRegB, ToRegB, TRI) &&
- (regsAreCompatible(FromRegB, ToRegC, TRI) ||
+ if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) &&
+ ((!FromRegC && !ToRegC) ||
+ regsAreCompatible(FromRegB, ToRegC, TRI) ||
regsAreCompatible(FromRegC, ToRegB, TRI)))
return true;
@@ -630,7 +633,8 @@ bool
TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi,
- unsigned RegB, unsigned Dist) {
+ unsigned RegA, unsigned RegB,
+ unsigned Dist) {
MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
if (NewMI) {
DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
@@ -650,6 +654,10 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
mi = NewMI;
nmi = llvm::next(mi);
}
+
+ // Update source and destination register maps.
+ SrcRegMap.erase(RegA);
+ DstRegMap.erase(RegB);
return true;
}
@@ -740,7 +748,7 @@ static bool isSafeToDelete(MachineInstr *MI,
const TargetInstrDesc &TID = MI->getDesc();
if (TID.mayStore() || TID.isCall())
return false;
- if (TID.isTerminator() || TID.hasUnmodeledSideEffects())
+ if (TID.isTerminator() || MI->hasUnmodeledSideEffects())
return false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -884,7 +892,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// three-address instruction. Check if it is profitable.
if (!regBKilled || isProfitableToConv3Addr(regA)) {
// Try to convert it.
- if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
+ if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) {
++NumConvertedTo3Addr;
return true; // Done with this instruction.
}
@@ -951,7 +959,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
if (LV) {
for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() && MO.getReg() != 0 &&
+ if (MO.isReg() &&
TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
if (MO.isUse()) {
if (MO.isKill()) {
@@ -1013,8 +1021,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
<< MF.getFunction()->getName() << '\n');
// ReMatRegs - Keep track of the registers whose def's are remat'ed.
- BitVector ReMatRegs;
- ReMatRegs.resize(MRI->getLastVirtReg()+1);
+ BitVector ReMatRegs(MRI->getNumVirtRegs());
typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
TiedOperandMap;
@@ -1143,7 +1150,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
- ReMatRegs.set(regB);
+ ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
++NumReMats;
} else {
BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
@@ -1229,13 +1236,12 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
}
// Some remat'ed instructions are dead.
- int VReg = ReMatRegs.find_first();
- while (VReg != -1) {
+ for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
if (MRI->use_nodbg_empty(VReg)) {
MachineInstr *DefMI = MRI->getVRegDef(VReg);
DefMI->eraseFromParent();
}
- VReg = ReMatRegs.find_next(VReg);
}
// Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
@@ -1346,7 +1352,6 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
continue;
// Insert a copy to replace the original.
- MachineBasicBlock::iterator InsertLoc = SomeMI;
MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
SomeMI->getDebugLoc(),
TII->get(TargetOpcode::COPY))
@@ -1412,6 +1417,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
SmallSet<unsigned, 4> Seen;
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
unsigned SrcReg = MI->getOperand(i).getReg();
+ unsigned SubIdx = MI->getOperand(i+1).getImm();
if (MI->getOperand(i).getSubReg() ||
TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
@@ -1431,7 +1437,9 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
bool isKill = MI->getOperand(i).isKill();
if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
- !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI)) {
+ !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
+ !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
+ MRI->getRegClass(SrcReg), SubIdx)) {
// REG_SEQUENCE cannot have duplicated operands, add a copy.
// Also add an copy if the source is live-in the block. We don't want
// to end up with a partial-redef of a livein, e.g.
@@ -1460,7 +1468,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
MachineBasicBlock::iterator InsertLoc = MI;
MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
- .addReg(DstReg, RegState::Define, MI->getOperand(i+1).getImm())
+ .addReg(DstReg, RegState::Define, SubIdx)
.addReg(SrcReg, getKillRegState(isKill));
MI->getOperand(i).setReg(0);
if (LV && isKill)
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 6dd3333..48d8ab1 100644
--- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -26,6 +26,7 @@
#include "llvm/Function.h"
#include "llvm/Pass.h"
#include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -43,16 +44,19 @@ namespace {
virtual bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- UnreachableBlockElim() : FunctionPass(ID) {}
+ UnreachableBlockElim() : FunctionPass(ID) {
+ initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
AU.addPreserved<ProfileInfo>();
}
};
}
char UnreachableBlockElim::ID = 0;
INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
- "Remove unreachable blocks from the CFG", false, false);
+ "Remove unreachable blocks from the CFG", false, false)
FunctionPass *llvm::createUnreachableBlockEliminationPass() {
return new UnreachableBlockElim();
@@ -106,7 +110,7 @@ namespace {
char UnreachableMachineBlockElim::ID = 0;
INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
- "Remove unreachable machine basic blocks", false, false);
+ "Remove unreachable machine basic blocks", false, false)
char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
@@ -118,6 +122,7 @@ void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+ bool ModifiedPHI = false;
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
@@ -179,6 +184,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
if (!preds.count(phi->getOperand(i).getMBB())) {
phi->RemoveOperand(i);
phi->RemoveOperand(i-1);
+ ModifiedPHI = true;
}
if (phi->getNumOperands() == 3) {
@@ -188,6 +194,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
MachineInstr* temp = phi;
++phi;
temp->eraseFromParent();
+ ModifiedPHI = true;
if (Input != Output)
F.getRegInfo().replaceRegWith(Output, Input);
@@ -201,5 +208,5 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
F.RenumberBlocks();
- return DeadBlocks.size();
+ return (DeadBlocks.size() || ModifiedPHI);
}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 20ffcff..734b87e 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -48,7 +49,7 @@ STATISTIC(NumSpills , "Number of register spills");
char VirtRegMap::ID = 0;
-INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false);
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
MRI = &mf.getRegInfo();
@@ -74,8 +75,7 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
EmergencySpillSlots.clear();
SpillSlotToUsesMap.resize(8);
- ImplicitDefed.resize(MF->getRegInfo().getLastVirtReg()+1-
- TargetRegisterInfo::FirstVirtualRegister);
+ ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs());
allocatableRCRegs.clear();
for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
@@ -89,24 +89,37 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
}
void VirtRegMap::grow() {
- unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
- Virt2PhysMap.grow(LastVirtReg);
- Virt2StackSlotMap.grow(LastVirtReg);
- Virt2ReMatIdMap.grow(LastVirtReg);
- Virt2SplitMap.grow(LastVirtReg);
- Virt2SplitKillMap.grow(LastVirtReg);
- ReMatMap.grow(LastVirtReg);
- ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1);
+ unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
+ Virt2PhysMap.resize(NumRegs);
+ Virt2StackSlotMap.resize(NumRegs);
+ Virt2ReMatIdMap.resize(NumRegs);
+ Virt2SplitMap.resize(NumRegs);
+ Virt2SplitKillMap.resize(NumRegs);
+ ReMatMap.resize(NumRegs);
+ ImplicitDefed.resize(NumRegs);
+}
+
+unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+ if (LowSpillSlot == NO_STACK_SLOT)
+ LowSpillSlot = SS;
+ if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+ HighSpillSlot = SS;
+ assert(SS >= LowSpillSlot && "Unexpected low spill slot");
+ unsigned Idx = SS-LowSpillSlot;
+ while (Idx >= SpillSlotToUsesMap.size())
+ SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
+ return SS;
}
unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
unsigned physReg = Hint.second;
- if (physReg &&
- TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
+ if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
physReg = getPhys(physReg);
if (Hint.first == 0)
- return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg))
+ return (TargetRegisterInfo::isPhysicalRegister(physReg))
? physReg : 0;
return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
}
@@ -116,18 +129,8 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
"attempt to assign stack slot to already spilled register");
const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
- int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
- if (LowSpillSlot == NO_STACK_SLOT)
- LowSpillSlot = SS;
- if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
- HighSpillSlot = SS;
- unsigned Idx = SS-LowSpillSlot;
- while (Idx >= SpillSlotToUsesMap.size())
- SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
- Virt2StackSlotMap[virtReg] = SS;
++NumSpills;
- return SS;
+ return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
}
void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
@@ -160,14 +163,7 @@ int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
EmergencySpillSlots.find(RC);
if (I != EmergencySpillSlots.end())
return I->second;
- int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
- if (LowSpillSlot == NO_STACK_SLOT)
- LowSpillSlot = SS;
- if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
- HighSpillSlot = SS;
- EmergencySpillSlots[RC] = SS;
- return SS;
+ return EmergencySpillSlots[RC] = createSpillSlot(RC);
}
void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
@@ -232,10 +228,11 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
UnusedRegs.resize(NumRegs);
BitVector Used(NumRegs);
- for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
- e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
- if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
- Used.set(Virt2PhysMap[i]);
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG)
+ Used.set(Virt2PhysMap[Reg]);
+ }
BitVector Allocatable = TRI->getAllocatableSet(*MF);
bool AnyUnused = false;
@@ -258,23 +255,97 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
return AnyUnused;
}
+void VirtRegMap::rewrite(SlotIndexes *Indexes) {
+ DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+ << "********** Function: "
+ << MF->getFunction()->getName() << '\n');
+
+ SmallVector<unsigned, 8> SuperKills;
+
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ DEBUG(MBBI->print(dbgs(), Indexes));
+ for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end();
+ MII != MIE;) {
+ MachineInstr *MI = MII;
+ ++MII;
+
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ MachineOperand &MO = *MOI;
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ unsigned VirtReg = MO.getReg();
+ unsigned PhysReg = getPhys(VirtReg);
+ assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+
+ // Preserve semantics of sub-register operands.
+ if (MO.getSubReg()) {
+ // A virtual register kill refers to the whole register, so we may
+ // have to add <imp-use,kill> operands for the super-register.
+ if (MO.isUse() && MO.isKill() && !MO.isUndef())
+ SuperKills.push_back(PhysReg);
+
+ // We don't have to deal with sub-register defs because
+ // LiveIntervalAnalysis already added the necessary <imp-def>
+ // operands.
+
+ // PhysReg operands cannot have subregister indexes.
+ PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
+ assert(PhysReg && "Invalid SubReg for physical register");
+ MO.setSubReg(0);
+ }
+ // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
+ // we need the inlining here.
+ MO.setReg(PhysReg);
+ }
+
+ // Add any missing super-register kills after rewriting the whole
+ // instruction.
+ while (!SuperKills.empty())
+ MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+
+ DEBUG(dbgs() << "> " << *MI);
+
+ // Finally, remove any identity copies.
+ if (MI->isIdentityCopy()) {
+ DEBUG(dbgs() << "Deleting identity copy.\n");
+ RemoveMachineInstrFromMaps(MI);
+ if (Indexes)
+ Indexes->removeMachineInstrFromMaps(MI);
+ // It's safe to erase MI because MII has already been incremented.
+ MI->eraseFromParent();
+ }
+ }
+ }
+
+ // Tell MRI about physical registers in use.
+ for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
+ if (!MRI->reg_nodbg_empty(Reg))
+ MRI->setPhysRegUsed(Reg);
+}
+
void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF->getRegInfo();
OS << "********** REGISTER MAP **********\n";
- for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
- e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) {
- if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
- OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i])
- << "] " << MRI.getRegClass(i)->getName() << "\n";
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> "
+ << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+ << MRI.getRegClass(Reg)->getName() << "\n";
+ }
}
- for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
- e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
- if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
- OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i]
- << "] " << MRI.getRegClass(i)->getName() << "\n";
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+ << "] " << MRI.getRegClass(Reg)->getName() << "\n";
+ }
+ }
OS << '\n';
}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.h b/contrib/llvm/lib/CodeGen/VirtRegMap.h
index 8b6082d..ba50f4e 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.h
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.h
@@ -35,6 +35,7 @@ namespace llvm {
class TargetInstrInfo;
class TargetRegisterInfo;
class raw_ostream;
+ class SlotIndexes;
class VirtRegMap : public MachineFunctionPass {
public:
@@ -80,7 +81,7 @@ namespace llvm {
/// Virt2SplitKillMap - This is splitted virtual register to its last use
/// (kill) index mapping.
- IndexedMap<SlotIndex> Virt2SplitKillMap;
+ IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap;
/// ReMatMap - This is virtual register to re-materialized instruction
/// mapping. Each virtual register whose definition is going to be
@@ -134,6 +135,9 @@ namespace llvm {
/// UnusedRegs - A list of physical registers that have not been used.
BitVector UnusedRegs;
+ /// createSpillSlot - Allocate a spill slot for RC from MFI.
+ unsigned createSpillSlot(const TargetRegisterClass *RC);
+
VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT
void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
@@ -153,10 +157,13 @@ namespace llvm {
}
MachineFunction &getMachineFunction() const {
- assert(MF && "getMachineFunction called before runOnMAchineFunction");
+ assert(MF && "getMachineFunction called before runOnMachineFunction");
return *MF;
}
+ MachineRegisterInfo &getRegInfo() const { return *MRI; }
+ const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; }
+
void grow();
/// @brief returns true if the specified virtual register is
@@ -207,10 +214,19 @@ namespace llvm {
}
/// @brief returns the live interval virtReg is split from.
- unsigned getPreSplitReg(unsigned virtReg) {
+ unsigned getPreSplitReg(unsigned virtReg) const {
return Virt2SplitMap[virtReg];
}
+ /// getOriginal - Return the original virtual register that VirtReg descends
+ /// from through splitting.
+ /// A register that was not created by splitting is its own original.
+ /// This operation is idempotent.
+ unsigned getOriginal(unsigned VirtReg) const {
+ unsigned Orig = getPreSplitReg(VirtReg);
+ return Orig ? Orig : VirtReg;
+ }
+
/// @brief returns true if the specified virtual register is not
/// mapped to a stack slot or rematerialized.
bool isAssignedReg(unsigned virtReg) const {
@@ -426,12 +442,12 @@ namespace llvm {
/// @brief Mark the specified register as being implicitly defined.
void setIsImplicitlyDefined(unsigned VirtReg) {
- ImplicitDefed.set(VirtReg-TargetRegisterInfo::FirstVirtualRegister);
+ ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg));
}
/// @brief Returns true if the virtual register is implicitly defined.
bool isImplicitlyDefined(unsigned VirtReg) const {
- return ImplicitDefed[VirtReg-TargetRegisterInfo::FirstVirtualRegister];
+ return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)];
}
/// @brief Updates information about the specified virtual register's value
@@ -487,6 +503,13 @@ namespace llvm {
return 0;
}
+ /// rewrite - Rewrite all instructions in MF to use only physical registers
+ /// by mapping all virtual register operands to their assigned physical
+ /// registers.
+ ///
+ /// @param Indexes Optionally remove deleted instructions from indexes.
+ void rewrite(SlotIndexes *Indexes);
+
void print(raw_ostream &OS, const Module* M = 0) const;
void dump() const;
};
diff --git a/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp b/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp
index 240d28c..458a213 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegRewriter.cpp
@@ -22,8 +22,8 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include <algorithm>
using namespace llvm;
STATISTIC(NumDSE , "Number of dead stores elided");
@@ -216,7 +216,8 @@ public:
<< SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
else
DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
- DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) << "\n");
+ DEBUG(dbgs() << " in physreg " << TRI->getName(Reg)
+ << (CanClobber ? " canclobber" : "") << "\n");
}
/// canClobberPhysRegForSS - Return true if the spiller is allowed to change
@@ -297,7 +298,7 @@ ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
const TargetLowering *TL = MF.getTarget().getTargetLowering();
if (!TL->isTypeLegal(TL->getPointerTy()))
- // Believe it or not, this is true on PIC16.
+ // Believe it or not, this is true on 16-bit targets like PIC16.
return InsertLoc;
const TargetRegisterClass *ptrRegClass =
@@ -462,25 +463,70 @@ static void findSinglePredSuccessor(MachineBasicBlock *MBB,
}
}
-/// InvalidateKill - Invalidate register kill information for a specific
-/// register. This also unsets the kills marker on the last kill operand.
-static void InvalidateKill(unsigned Reg,
- const TargetRegisterInfo* TRI,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- if (RegKills[Reg]) {
- KillOps[Reg]->setIsKill(false);
- // KillOps[Reg] might be a def of a super-register.
- unsigned KReg = KillOps[Reg]->getReg();
- KillOps[KReg] = NULL;
- RegKills.reset(KReg);
- for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
- if (RegKills[*SR]) {
- KillOps[*SR]->setIsKill(false);
- KillOps[*SR] = NULL;
- RegKills.reset(*SR);
- }
- }
+/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed
+/// but not re-defined and it's being reused. Remove the kill flag for the
+/// register and unset the kill's marker and last kill operand.
+static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n");
+
+ MachineOperand *KillOp = KillOps[Reg];
+ KillOp->setIsKill(false);
+ // KillOps[Reg] might be a def of a super-register.
+ unsigned KReg = KillOp->getReg();
+ if (!RegKills[KReg])
+ return;
+
+ assert(KillOps[KReg] == KillOp && "invalid superreg kill flags");
+ KillOps[KReg] = NULL;
+ RegKills.reset(KReg);
+
+ // If it's a def of a super-register. Its other sub-regsters are no
+ // longer killed as well.
+ for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
+ DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n");
+
+ assert(KillOps[*SR] == KillOp && "invalid subreg kill flags");
+ KillOps[*SR] = NULL;
+ RegKills.reset(*SR);
+ }
+}
+
+/// ResurrectKill - Invalidate kill info associated with a previous MI. An
+/// optimization may have decided that it's safe to reuse a previously killed
+/// register. If we fail to erase the invalid kill flags, then the register
+/// scavenger may later clobber the register used by this MI. Note that this
+/// must be done even if this MI is being deleted! Consider:
+///
+/// USE $r1 (vreg1) <kill>
+/// ...
+/// $r1(vreg3) = COPY $r1 (vreg2)
+///
+/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially
+/// vreg1's only use is a kill. The rewriter doesn't know it should be live
+/// until it rewrites vreg2. At that points it sees that the copy is dead and
+/// deletes it. However, deleting the copy implicitly forwards liveness of $r1
+/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at
+/// vreg1 before deleting the copy.
+static void ResurrectKill(MachineInstr &MI, unsigned Reg,
+ const TargetRegisterInfo* TRI, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
+ ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
+ return;
+ }
+ // No previous kill for this reg. Check for subreg kills as well.
+ // d4 =
+ // store d4, fi#0
+ // ...
+ // = s8<kill>
+ // ...
+ // = d4 <avoiding reload>
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ unsigned SReg = *SR;
+ if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI)
+ ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps);
}
}
@@ -502,15 +548,22 @@ static void InvalidateKills(MachineInstr &MI,
KillRegs->push_back(Reg);
assert(Reg < KillOps.size());
if (KillOps[Reg] == &MO) {
+ // This operand was the kill, now no longer.
KillOps[Reg] = NULL;
RegKills.reset(Reg);
for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
if (RegKills[*SR]) {
+ assert(KillOps[*SR] == &MO && "bad subreg kill flags");
KillOps[*SR] = NULL;
RegKills.reset(*SR);
}
}
}
+ else {
+ // This operand may have reused a previously killed reg. Keep it live in
+ // case it continues to be used after erasing this instruction.
+ ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
+ }
}
}
@@ -578,44 +631,8 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
if (Reg == 0)
continue;
- if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
- // That can't be right. Register is killed but not re-defined and it's
- // being reused. Let's fix that.
- KillOps[Reg]->setIsKill(false);
- // KillOps[Reg] might be a def of a super-register.
- unsigned KReg = KillOps[Reg]->getReg();
- KillOps[KReg] = NULL;
- RegKills.reset(KReg);
-
- // Must be a def of a super-register. Its other sub-regsters are no
- // longer killed as well.
- for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
- KillOps[*SR] = NULL;
- RegKills.reset(*SR);
- }
- } else {
- // Check for subreg kills as well.
- // d4 =
- // store d4, fi#0
- // ...
- // = s8<kill>
- // ...
- // = d4 <avoiding reload>
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
- unsigned SReg = *SR;
- if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) {
- KillOps[SReg]->setIsKill(false);
- unsigned KReg = KillOps[SReg]->getReg();
- KillOps[KReg] = NULL;
- RegKills.reset(KReg);
-
- for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) {
- KillOps[*SSR] = NULL;
- RegKills.reset(*SSR);
- }
- }
- }
- }
+ // This operand may have reused a previously killed reg. Keep it live.
+ ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
if (MO.isKill()) {
RegKills.set(Reg);
@@ -770,7 +787,8 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
NotAvailable.insert(Reg);
else {
MBB.addLiveIn(Reg);
- InvalidateKill(Reg, TRI, RegKills, KillOps);
+ if (RegKills[Reg])
+ ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
}
// Skip over the same register.
@@ -1056,6 +1074,7 @@ class LocalRewriter : public VirtRegRewriter {
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
VirtRegMap *VRM;
+ LiveIntervals *LIs;
BitVector AllocatableRegs;
DenseMap<MachineInstr*, unsigned> DistanceMap;
DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
@@ -1068,6 +1087,11 @@ public:
LiveIntervals* LIs);
private:
+ void EraseInstr(MachineInstr *MI) {
+ VRM->RemoveMachineInstrFromMaps(MI);
+ LIs->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ }
bool OptimizeByUnfold2(unsigned VirtReg, int SS,
MachineBasicBlock::iterator &MII,
@@ -1110,6 +1134,12 @@ private:
bool InsertSpills(MachineInstr *MI);
+ void ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ ReuseInfo &ReusedOperands,
+ std::vector<MachineOperand*> &KillOps);
+
void RewriteMBB(LiveIntervals *LIs,
AvailableSpills &Spills, BitVector &RegKills,
std::vector<MachineOperand*> &KillOps);
@@ -1117,17 +1147,18 @@ private:
}
bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
- LiveIntervals* LIs) {
+ LiveIntervals* lis) {
MRI = &MF.getRegInfo();
TRI = MF.getTarget().getRegisterInfo();
TII = MF.getTarget().getInstrInfo();
VRM = &vrm;
+ LIs = lis;
AllocatableRegs = TRI->getAllocatableSet(MF);
DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
<< MF.getFunction()->getName() << "':\n");
DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
" reloads!) ****\n");
- DEBUG(MF.dump());
+ DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
// Spills - Keep track of which spilled values are available in physregs
// so that we can choose to reuse the physregs instead of emitting
@@ -1178,7 +1209,7 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
}
DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
- DEBUG(MF.dump());
+ DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
// Mark unused spill slots.
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1190,10 +1221,8 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
MFI->RemoveStackObject(SS);
for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
MachineInstr *DVMI = DbgValues[j];
- MachineBasicBlock *DVMBB = DVMI->getParent();
DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
- VRM->RemoveMachineInstrFromMaps(DVMI);
- DVMBB->erase(DVMI);
+ EraseInstr(DVMI);
}
++NumDSS;
}
@@ -1273,8 +1302,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
VRM->transferRestorePts(&MI, NewMIs[0]);
MII = MBB->insert(MII, NewMIs[0]);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
++NumModRefUnfold;
// Unfold next instructions that fold the same SS.
@@ -1289,8 +1317,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
VRM->transferRestorePts(&NextMI, NewMIs[0]);
MBB->insert(NextMII, NewMIs[0]);
InvalidateKills(NextMI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&NextMI);
- MBB->erase(&NextMI);
+ EraseInstr(&NextMI);
++NumModRefUnfold;
// Skip over dbg_value instructions.
while (NextMII != MBB->end() && NextMII->isDebugValue())
@@ -1417,8 +1444,7 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII,
VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
MII = FoldedMI;
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
return true;
}
}
@@ -1524,14 +1550,11 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
// Delete all 3 old instructions.
InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(ReloadMI);
- MBB->erase(ReloadMI);
+ EraseInstr(ReloadMI);
InvalidateKills(*DefMI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(DefMI);
- MBB->erase(DefMI);
+ EraseInstr(DefMI);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
// If NewReg was previously holding value of some SS, it's now clobbered.
// This has to be done now because it's a physical register. When this
@@ -1574,8 +1597,7 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
bool CheckDef = PrevMII != MBB->begin();
if (CheckDef)
--PrevMII;
- VRM->RemoveMachineInstrFromMaps(LastStore);
- MBB->erase(LastStore);
+ EraseInstr(LastStore);
if (CheckDef) {
// Look at defs of killed registers on the store. Mark the defs
// as dead since the store has been deleted and they aren't
@@ -1586,8 +1608,7 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
MachineInstr *DeadDef = PrevMII;
if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
// FIXME: This assumes a remat def does not have side effects.
- VRM->RemoveMachineInstrFromMaps(DeadDef);
- MBB->erase(DeadDef);
+ EraseInstr(DeadDef);
++NumDRM;
}
}
@@ -1612,10 +1633,18 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
/// effect and all of its defs are dead.
static bool isSafeToDelete(MachineInstr &MI) {
const TargetInstrDesc &TID = MI.getDesc();
- if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+ if (TID.mayLoad() || TID.mayStore() || TID.isTerminator() ||
TID.isCall() || TID.isBarrier() || TID.isReturn() ||
- TID.hasUnmodeledSideEffects())
+ MI.isLabel() || MI.isDebugValue() ||
+ MI.hasUnmodeledSideEffects())
return false;
+
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI.isInlineAsm())
+ return false;
+
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.getReg())
@@ -1675,8 +1704,7 @@ TransferDeadness(unsigned Reg, BitVector &RegKills,
LastUD->setIsDead();
break;
}
- VRM->RemoveMachineInstrFromMaps(LastUDMI);
- MBB->erase(LastUDMI);
+ EraseInstr(LastUDMI);
} else {
LastUD->setIsKill();
RegKills.set(Reg);
@@ -1764,6 +1792,10 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
<< TRI->getName(InReg) << " for vreg"
<< VirtReg <<" instead of reloading into physreg "
<< TRI->getName(Phys) << '\n');
+
+ // Reusing a physreg may resurrect it. But we expect ProcessUses to update
+ // the kill flags for the current instruction after processing it.
+
++NumOmitted;
continue;
} else if (InReg && InReg != Phys) {
@@ -1828,7 +1860,7 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
return true;
}
-/// InsertEmergencySpills - Insert spills after MI if requested by VRM. Return
+/// InsertSpills - Insert spills after MI if requested by VRM. Return
/// true if spills were inserted.
bool LocalRewriter::InsertSpills(MachineInstr *MI) {
if (!VRM->isSpillPt(MI))
@@ -1856,6 +1888,349 @@ bool LocalRewriter::InsertSpills(MachineInstr *MI) {
}
+/// ProcessUses - Process all of MI's spilled operands and all available
+/// operands.
+void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ ReuseInfo &ReusedOperands,
+ std::vector<MachineOperand*> &KillOps) {
+ // Clear kill info.
+ SmallSet<unsigned, 2> KilledMIRegs;
+ SmallVector<unsigned, 4> VirtUseOps;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue; // Ignore non-register operands.
+
+ unsigned VirtReg = MO.getReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+ // Ignore physregs for spilling, but remember that it is used by this
+ // function.
+ MRI->setPhysRegUsed(VirtReg);
+ continue;
+ }
+
+ // We want to process implicit virtual register uses first.
+ if (MO.isImplicit())
+ // If the virtual register is implicitly defined, emit a implicit_def
+ // before so scavenger knows it's "defined".
+ // FIXME: This is a horrible hack done the by register allocator to
+ // remat a definition with virtual register operand.
+ VirtUseOps.insert(VirtUseOps.begin(), i);
+ else
+ VirtUseOps.push_back(i);
+
+ // A partial def causes problems because the same operand both reads and
+ // writes the register. This rewriter is designed to rewrite uses and defs
+ // separately, so a partial def would already have been rewritten to a
+ // physreg by the time we get to processing defs.
+ // Add an implicit use operand to model the partial def.
+ if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) &&
+ MI.findRegisterUseOperandIdx(VirtReg) == -1) {
+ VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands());
+ MI.addOperand(MachineOperand::CreateReg(VirtReg,
+ false, // isDef
+ true)); // isImplicit
+ DEBUG(dbgs() << "Partial redef: " << MI);
+ }
+ }
+
+ // Process all of the spilled uses and all non spilled reg references.
+ SmallVector<int, 2> PotentialDeadStoreSlots;
+ KilledMIRegs.clear();
+ for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+ unsigned i = VirtUseOps[j];
+ unsigned VirtReg = MI.getOperand(i).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register?");
+
+ unsigned SubIdx = MI.getOperand(i).getSubReg();
+ if (VRM->isAssignedReg(VirtReg)) {
+ // This virtual register was assigned a physreg!
+ unsigned Phys = VRM->getPhys(VirtReg);
+ MRI->setPhysRegUsed(Phys);
+ if (MI.getOperand(i).isDef())
+ ReusedOperands.markClobbered(Phys);
+ substitutePhysReg(MI.getOperand(i), Phys, *TRI);
+ if (VRM->isImplicitlyDefined(VirtReg))
+ // FIXME: Is this needed?
+ BuildMI(*MBB, &MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
+ continue;
+ }
+
+ // This virtual register is now known to be a spilled value.
+ if (!MI.getOperand(i).isUse())
+ continue; // Handle defs in the loop below (handle use&def here though)
+
+ bool AvoidReload = MI.getOperand(i).isUndef();
+ // Check if it is defined by an implicit def. It should not be spilled.
+ // Note, this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ bool DoReMat = VRM->isReMaterialized(VirtReg);
+ int SSorRMId = DoReMat
+ ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+ int ReuseSlot = SSorRMId;
+
+ // Check to see if this stack slot is available.
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+ // If this is a sub-register use, make sure the reuse register is in the
+ // right register class. For example, for x86 not all of the 32-bit
+ // registers have accessible sub-registers.
+ // Similarly so for EXTRACT_SUBREG. Consider this:
+ // EDI = op
+ // MOV32_mr fi#1, EDI
+ // ...
+ // = EXTRACT_SUBREG fi#1
+ // fi#1 is available in EDI, but it cannot be reused because it's not in
+ // the right register file.
+ if (PhysReg && !AvoidReload && SubIdx) {
+ const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+ if (!RC->contains(PhysReg))
+ PhysReg = 0;
+ }
+
+ if (PhysReg && !AvoidReload) {
+ // This spilled operand might be part of a two-address operand. If this
+ // is the case, then changing it will necessarily require changing the
+ // def part of the instruction as well. However, in some cases, we
+ // aren't allowed to modify the reused register. If none of these cases
+ // apply, reuse it.
+ bool CanReuse = true;
+ bool isTied = MI.isRegTiedToDefOperand(i);
+ if (isTied) {
+ // Okay, we have a two address operand. We can reuse this physreg as
+ // long as we are allowed to clobber the value and there isn't an
+ // earlier def that has already clobbered the physreg.
+ CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+ Spills.canClobberPhysReg(PhysReg);
+ }
+ // If this is an asm, and a PhysReg alias is used elsewhere as an
+ // earlyclobber operand, we can't also use it as an input.
+ if (MI.isInlineAsm()) {
+ for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
+ MachineOperand &MOk = MI.getOperand(k);
+ if (MOk.isReg() && MOk.isEarlyClobber() &&
+ TRI->regsOverlap(MOk.getReg(), PhysReg)) {
+ CanReuse = false;
+ DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
+ << " for vreg" << VirtReg << ": " << MOk << '\n');
+ break;
+ }
+ }
+ }
+
+ if (CanReuse) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "Reusing RM#"
+ << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+ DEBUG(dbgs() << " from physreg "
+ << TRI->getName(PhysReg) << " for vreg"
+ << VirtReg <<" instead of reloading into physreg "
+ << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+
+ // Reusing a physreg may resurrect it. But we expect ProcessUses to
+ // update the kill flags for the current instr after processing it.
+
+ // The only technical detail we have is that we don't know that
+ // PhysReg won't be clobbered by a reloaded stack slot that occurs
+ // later in the instruction. In particular, consider 'op V1, V2'.
+ // If V1 is available in physreg R0, we would choose to reuse it
+ // here, instead of reloading it into the register the allocator
+ // indicated (say R1). However, V2 might have to be reloaded
+ // later, and it might indicate that it needs to live in R0. When
+ // this occurs, we need to have information available that
+ // indicates it is safe to use R1 for the reload instead of R0.
+ //
+ // To further complicate matters, we might conflict with an alias,
+ // or R0 and R1 might not be compatible with each other. In this
+ // case, we actually insert a reload for V1 in R1, ensuring that
+ // we can get at R0 or its alias.
+ ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+ VRM->getPhys(VirtReg), VirtReg);
+ if (isTied)
+ // Only mark it clobbered if this is a use&def operand.
+ ReusedOperands.markClobbered(PhysReg);
+ ++NumReused;
+
+ if (MI.getOperand(i).isKill() &&
+ ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+ // The store of this spilled value is potentially dead, but we
+ // won't know for certain until we've confirmed that the re-use
+ // above is valid, which means waiting until the other operands
+ // are processed. For now we just track the spill slot, we'll
+ // remove it after the other operands are processed if valid.
+
+ PotentialDeadStoreSlots.push_back(ReuseSlot);
+ }
+
+ // Mark is isKill if it's there no other uses of the same virtual
+ // register and it's not a two-address operand. IsKill will be
+ // unset if reg is reused.
+ if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+ continue;
+ } // CanReuse
+
+ // Otherwise we have a situation where we have a two-address instruction
+ // whose mod/ref operand needs to be reloaded. This reload is already
+ // available in some register "PhysReg", but if we used PhysReg as the
+ // operand to our 2-addr instruction, the instruction would modify
+ // PhysReg. This isn't cool if something later uses PhysReg and expects
+ // to get its initial value.
+ //
+ // To avoid this problem, and to avoid doing a load right after a store,
+ // we emit a copy from PhysReg into the designated register for this
+ // operand.
+ //
+ // This case also applies to an earlyclobber'd PhysReg.
+ unsigned DesignatedReg = VRM->getPhys(VirtReg);
+ assert(DesignatedReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ DesignatedReg = ReusedOperands.
+ GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
+ MaybeDeadStores, RegKills, KillOps, *VRM);
+
+ // If the mapped designated register is actually the physreg we have
+ // incoming, we don't need to inserted a dead copy.
+ if (DesignatedReg == PhysReg) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "Reusing RM#"
+ << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+ DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
+ << " for vreg" << VirtReg
+ << " instead of reloading into same physreg.\n");
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ ReusedOperands.markClobbered(RReg);
+ ++NumReused;
+ continue;
+ }
+
+ MRI->setPhysRegUsed(DesignatedReg);
+ ReusedOperands.markClobbered(DesignatedReg);
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
+ SSorRMId, TII, *MBB->getParent());
+ MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ DesignatedReg).addReg(PhysReg);
+ CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+ // This invalidates DesignatedReg.
+ Spills.ClobberPhysReg(DesignatedReg);
+
+ Spills.addAvailable(ReuseSlot, DesignatedReg);
+ unsigned RReg =
+ SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+ ++NumReused;
+ continue;
+ } // if (PhysReg)
+
+ // Otherwise, reload it and remember that we have it.
+ PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+
+ MRI->setPhysRegUsed(PhysReg);
+ ReusedOperands.markClobbered(PhysReg);
+ if (AvoidReload)
+ ++NumAvoided;
+ else {
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat,
+ SSorRMId, TII, *MBB->getParent());
+
+ if (DoReMat) {
+ ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
+ } else {
+ const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
+ MachineInstr *LoadMI = prior(InsertLoc);
+ VRM->addSpillSlotUse(SSorRMId, LoadMI);
+ ++NumLoads;
+ DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+ }
+ // This invalidates PhysReg.
+ Spills.ClobberPhysReg(PhysReg);
+
+ // Any stores to this stack slot are not dead anymore.
+ if (!DoReMat)
+ MaybeDeadStores[SSorRMId] = NULL;
+ Spills.addAvailable(SSorRMId, PhysReg);
+ // Assumes this is the last use. IsKill will be unset if reg is reused
+ // unless it's a two-address operand.
+ if (!MI.isRegTiedToDefOperand(i) &&
+ KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+
+ UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+ DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+ }
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ }
+
+ // Ok - now we can remove stores that have been confirmed dead.
+ for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+ // This was the last use and the spilled value is still available
+ // for reuse. That means the spill was unnecessary!
+ int PDSSlot = PotentialDeadStoreSlots[j];
+ MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+ if (DeadStore) {
+ DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+ InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+ EraseInstr(DeadStore);
+ MaybeDeadStores[PDSSlot] = NULL;
+ ++NumDSE;
+ }
+ }
+}
+
/// rewriteMBB - Keep track of which spills are available even after the
/// register allocator is done with them. If possible, avoid reloading vregs.
void
@@ -1880,9 +2255,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// ReMatDefs - These are rematerializable def MIs which are not deleted.
SmallSet<MachineInstr*, 4> ReMatDefs;
- // Clear kill info.
- SmallSet<unsigned, 2> KilledMIRegs;
-
// Keep track of the registers we have already spilled in case there are
// multiple defs of the same register in MI.
SmallSet<unsigned, 8> SpilledMIRegs;
@@ -1918,323 +2290,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
/// ReusedOperands - Keep track of operand reuse in case we need to undo
/// reuse.
ReuseInfo ReusedOperands(MI, TRI);
- SmallVector<unsigned, 4> VirtUseOps;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0)
- continue; // Ignore non-register operands.
-
- unsigned VirtReg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
- // Ignore physregs for spilling, but remember that it is used by this
- // function.
- MRI->setPhysRegUsed(VirtReg);
- continue;
- }
-
- // We want to process implicit virtual register uses first.
- if (MO.isImplicit())
- // If the virtual register is implicitly defined, emit a implicit_def
- // before so scavenger knows it's "defined".
- // FIXME: This is a horrible hack done the by register allocator to
- // remat a definition with virtual register operand.
- VirtUseOps.insert(VirtUseOps.begin(), i);
- else
- VirtUseOps.push_back(i);
- }
-
- // Process all of the spilled uses and all non spilled reg references.
- SmallVector<int, 2> PotentialDeadStoreSlots;
- KilledMIRegs.clear();
- for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
- unsigned i = VirtUseOps[j];
- unsigned VirtReg = MI.getOperand(i).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "Not a virtual register?");
-
- unsigned SubIdx = MI.getOperand(i).getSubReg();
- if (VRM->isAssignedReg(VirtReg)) {
- // This virtual register was assigned a physreg!
- unsigned Phys = VRM->getPhys(VirtReg);
- MRI->setPhysRegUsed(Phys);
- if (MI.getOperand(i).isDef())
- ReusedOperands.markClobbered(Phys);
- substitutePhysReg(MI.getOperand(i), Phys, *TRI);
- if (VRM->isImplicitlyDefined(VirtReg))
- // FIXME: Is this needed?
- BuildMI(*MBB, &MI, MI.getDebugLoc(),
- TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
- continue;
- }
-
- // This virtual register is now known to be a spilled value.
- if (!MI.getOperand(i).isUse())
- continue; // Handle defs in the loop below (handle use&def here though)
-
- bool AvoidReload = MI.getOperand(i).isUndef();
- // Check if it is defined by an implicit def. It should not be spilled.
- // Note, this is for correctness reason. e.g.
- // 8 %reg1024<def> = IMPLICIT_DEF
- // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
- // The live range [12, 14) are not part of the r1024 live interval since
- // it's defined by an implicit def. It will not conflicts with live
- // interval of r1025. Now suppose both registers are spilled, you can
- // easily see a situation where both registers are reloaded before
- // the INSERT_SUBREG and both target registers that would overlap.
- bool DoReMat = VRM->isReMaterialized(VirtReg);
- int SSorRMId = DoReMat
- ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
- int ReuseSlot = SSorRMId;
-
- // Check to see if this stack slot is available.
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-
- // If this is a sub-register use, make sure the reuse register is in the
- // right register class. For example, for x86 not all of the 32-bit
- // registers have accessible sub-registers.
- // Similarly so for EXTRACT_SUBREG. Consider this:
- // EDI = op
- // MOV32_mr fi#1, EDI
- // ...
- // = EXTRACT_SUBREG fi#1
- // fi#1 is available in EDI, but it cannot be reused because it's not in
- // the right register file.
- if (PhysReg && !AvoidReload && SubIdx) {
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- if (!RC->contains(PhysReg))
- PhysReg = 0;
- }
-
- if (PhysReg && !AvoidReload) {
- // This spilled operand might be part of a two-address operand. If this
- // is the case, then changing it will necessarily require changing the
- // def part of the instruction as well. However, in some cases, we
- // aren't allowed to modify the reused register. If none of these cases
- // apply, reuse it.
- bool CanReuse = true;
- bool isTied = MI.isRegTiedToDefOperand(i);
- if (isTied) {
- // Okay, we have a two address operand. We can reuse this physreg as
- // long as we are allowed to clobber the value and there isn't an
- // earlier def that has already clobbered the physreg.
- CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
- Spills.canClobberPhysReg(PhysReg);
- }
- // If this is an asm, and a PhysReg alias is used elsewhere as an
- // earlyclobber operand, we can't also use it as an input.
- if (MI.isInlineAsm()) {
- for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
- MachineOperand &MOk = MI.getOperand(k);
- if (MOk.isReg() && MOk.isEarlyClobber() &&
- TRI->regsOverlap(MOk.getReg(), PhysReg)) {
- CanReuse = false;
- DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
- << " for vreg" << VirtReg << ": " << MOk << '\n');
- break;
- }
- }
- }
-
- if (CanReuse) {
- // If this stack slot value is already available, reuse it!
- if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "Reusing RM#"
- << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
- DEBUG(dbgs() << " from physreg "
- << TRI->getName(PhysReg) << " for vreg"
- << VirtReg <<" instead of reloading into physreg "
- << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
-
- // The only technical detail we have is that we don't know that
- // PhysReg won't be clobbered by a reloaded stack slot that occurs
- // later in the instruction. In particular, consider 'op V1, V2'.
- // If V1 is available in physreg R0, we would choose to reuse it
- // here, instead of reloading it into the register the allocator
- // indicated (say R1). However, V2 might have to be reloaded
- // later, and it might indicate that it needs to live in R0. When
- // this occurs, we need to have information available that
- // indicates it is safe to use R1 for the reload instead of R0.
- //
- // To further complicate matters, we might conflict with an alias,
- // or R0 and R1 might not be compatible with each other. In this
- // case, we actually insert a reload for V1 in R1, ensuring that
- // we can get at R0 or its alias.
- ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
- VRM->getPhys(VirtReg), VirtReg);
- if (isTied)
- // Only mark it clobbered if this is a use&def operand.
- ReusedOperands.markClobbered(PhysReg);
- ++NumReused;
-
- if (MI.getOperand(i).isKill() &&
- ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
-
- // The store of this spilled value is potentially dead, but we
- // won't know for certain until we've confirmed that the re-use
- // above is valid, which means waiting until the other operands
- // are processed. For now we just track the spill slot, we'll
- // remove it after the other operands are processed if valid.
-
- PotentialDeadStoreSlots.push_back(ReuseSlot);
- }
-
- // Mark is isKill if it's there no other uses of the same virtual
- // register and it's not a two-address operand. IsKill will be
- // unset if reg is reused.
- if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
- MI.getOperand(i).setIsKill();
- KilledMIRegs.insert(VirtReg);
- }
-
- continue;
- } // CanReuse
-
- // Otherwise we have a situation where we have a two-address instruction
- // whose mod/ref operand needs to be reloaded. This reload is already
- // available in some register "PhysReg", but if we used PhysReg as the
- // operand to our 2-addr instruction, the instruction would modify
- // PhysReg. This isn't cool if something later uses PhysReg and expects
- // to get its initial value.
- //
- // To avoid this problem, and to avoid doing a load right after a store,
- // we emit a copy from PhysReg into the designated register for this
- // operand.
- //
- // This case also applies to an earlyclobber'd PhysReg.
- unsigned DesignatedReg = VRM->getPhys(VirtReg);
- assert(DesignatedReg && "Must map virtreg to physreg!");
-
- // Note that, if we reused a register for a previous operand, the
- // register we want to reload into might not actually be
- // available. If this occurs, use the register indicated by the
- // reuser.
- if (ReusedOperands.hasReuses())
- DesignatedReg = ReusedOperands.
- GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
- MaybeDeadStores, RegKills, KillOps, *VRM);
-
- // If the mapped designated register is actually the physreg we have
- // incoming, we don't need to inserted a dead copy.
- if (DesignatedReg == PhysReg) {
- // If this stack slot value is already available, reuse it!
- if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "Reusing RM#"
- << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
- DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
- << " for vreg" << VirtReg
- << " instead of reloading into same physreg.\n");
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- ReusedOperands.markClobbered(RReg);
- ++NumReused;
- continue;
- }
-
- MRI->setPhysRegUsed(DesignatedReg);
- ReusedOperands.markClobbered(DesignatedReg);
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
- SSorRMId, TII, MF);
- MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- DesignatedReg).addReg(PhysReg);
- CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
- // This invalidates DesignatedReg.
- Spills.ClobberPhysReg(DesignatedReg);
-
- Spills.addAvailable(ReuseSlot, DesignatedReg);
- unsigned RReg =
- SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- DEBUG(dbgs() << '\t' << *prior(MII));
- ++NumReused;
- continue;
- } // if (PhysReg)
-
- // Otherwise, reload it and remember that we have it.
- PhysReg = VRM->getPhys(VirtReg);
- assert(PhysReg && "Must map virtreg to physreg!");
-
- // Note that, if we reused a register for a previous operand, the
- // register we want to reload into might not actually be
- // available. If this occurs, use the register indicated by the
- // reuser.
- if (ReusedOperands.hasReuses())
- PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
- Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
-
- MRI->setPhysRegUsed(PhysReg);
- ReusedOperands.markClobbered(PhysReg);
- if (AvoidReload)
- ++NumAvoided;
- else {
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, DoReMat,
- SSorRMId, TII, MF);
-
- if (DoReMat) {
- ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
- } else {
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
- MachineInstr *LoadMI = prior(InsertLoc);
- VRM->addSpillSlotUse(SSorRMId, LoadMI);
- ++NumLoads;
- DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
- }
- // This invalidates PhysReg.
- Spills.ClobberPhysReg(PhysReg);
-
- // Any stores to this stack slot are not dead anymore.
- if (!DoReMat)
- MaybeDeadStores[SSorRMId] = NULL;
- Spills.addAvailable(SSorRMId, PhysReg);
- // Assumes this is the last use. IsKill will be unset if reg is reused
- // unless it's a two-address operand.
- if (!MI.isRegTiedToDefOperand(i) &&
- KilledMIRegs.count(VirtReg) == 0) {
- MI.getOperand(i).setIsKill();
- KilledMIRegs.insert(VirtReg);
- }
-
- UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
- DEBUG(dbgs() << '\t' << *prior(InsertLoc));
- }
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- }
-
- // Ok - now we can remove stores that have been confirmed dead.
- for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
- // This was the last use and the spilled value is still available
- // for reuse. That means the spill was unnecessary!
- int PDSSlot = PotentialDeadStoreSlots[j];
- MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
- if (DeadStore) {
- DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
- InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(DeadStore);
- MBB->erase(DeadStore);
- MaybeDeadStores[PDSSlot] = NULL;
- ++NumDSE;
- }
- }
+ ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps);
DEBUG(dbgs() << '\t' << MI);
@@ -2288,14 +2345,13 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
BackTracked = true;
} else {
DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- // Unset last kill since it's being reused.
- InvalidateKill(InReg, TRI, RegKills, KillOps);
+ // InvalidateKills resurrects any prior kill of the copy's source
+ // allowing the source reg to be reused in place of the copy.
Spills.disallowClobberPhysReg(InReg);
}
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
goto ProcessNextInst;
}
@@ -2306,8 +2362,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
MBB->insert(MII, NewMIs[0]);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
--NextMII; // backtrack to the unfolded instruction.
BackTracked = true;
@@ -2343,8 +2398,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
MBB->insert(MII, NewStore);
VRM->addSpillSlotUse(SS, NewStore);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
--NextMII;
--NextMII; // backtrack to the unfolded instruction.
@@ -2359,8 +2413,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// If we get here, the store is dead, nuke it now.
DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(DeadStore);
- MBB->erase(DeadStore);
+ EraseInstr(DeadStore);
if (!NewStore)
++NumDSE;
}
@@ -2437,8 +2490,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// Last def is now dead.
TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
}
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
Spills.disallowClobberPhysReg(VirtReg);
goto ProcessNextInst;
@@ -2514,8 +2566,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
++NumDCE;
DEBUG(dbgs() << "Removing now-noop copy: " << MI);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
UpdateKills(*LastStore, TRI, RegKills, KillOps);
goto ProcessNextInst;
@@ -2526,8 +2577,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// Delete dead instructions without side effects.
if (!Erased && !BackTracked && isSafeToDelete(MI)) {
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
}
if (!Erased)
diff --git a/contrib/llvm/lib/CompilerDriver/Action.cpp b/contrib/llvm/lib/CompilerDriver/Action.cpp
index 0be8049..a8d625c 100644
--- a/contrib/llvm/lib/CompilerDriver/Action.cpp
+++ b/contrib/llvm/lib/CompilerDriver/Action.cpp
@@ -14,11 +14,12 @@
#include "llvm/CompilerDriver/Action.h"
#include "llvm/CompilerDriver/BuiltinOptions.h"
#include "llvm/CompilerDriver/Error.h"
+#include "llvm/CompilerDriver/Main.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SystemUtils.h"
-#include "llvm/System/Program.h"
-#include "llvm/System/TimeValue.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/TimeValue.h"
#include <stdexcept>
#include <string>
@@ -28,7 +29,6 @@ using namespace llvmc;
namespace llvmc {
-extern int Main(int argc, char** argv);
extern const char* ProgramName;
}
@@ -53,15 +53,19 @@ namespace {
#endif
}
- int ExecuteProgram (const std::string& name,
- const StrVector& args) {
- sys::Path prog = sys::Program::FindProgramByName(name);
+ int ExecuteProgram (const std::string& name, const StrVector& args) {
+ sys::Path prog(name);
- if (prog.isEmpty()) {
- prog = FindExecutable(name, ProgramName, (void *)(intptr_t)&Main);
- if (prog.isEmpty()) {
- PrintError("Can't find program '" + name + "'");
- return -1;
+ if (sys::path::is_relative(prog.str())) {
+ prog = PrependMainExecutablePath(name, ProgramName,
+ (void *)(intptr_t)&Main);
+
+ if (!prog.canExecute()) {
+ prog = sys::Program::FindProgramByName(name);
+ if (prog.isEmpty()) {
+ PrintError("Can't find program '" + name + "'");
+ return -1;
+ }
}
}
if (!prog.canExecute()) {
diff --git a/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp b/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp
index d0c0e15..33c6566 100644
--- a/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp
+++ b/contrib/llvm/lib/CompilerDriver/CompilationGraph.cpp
@@ -32,7 +32,8 @@ using namespace llvmc;
namespace llvmc {
const std::string* LanguageMap::GetLanguage(const sys::Path& File) const {
- StringRef suf = File.getSuffix();
+ // Remove the '.'.
+ StringRef suf = sys::path::extension(File.str()).substr(1);
LanguageMap::const_iterator Lang =
this->find(suf.empty() ? "*empty*" : suf);
if (Lang == this->end()) {
@@ -218,10 +219,11 @@ FindToolChain(const sys::Path& In, const std::string* ForceLanguage,
InputLanguagesSet& InLangs, const LanguageMap& LangMap) const {
// Determine the input language.
- const std::string* InLang = LangMap.GetLanguage(In);
+ const std::string* InLang = (ForceLanguage ? ForceLanguage
+ : LangMap.GetLanguage(In));
if (InLang == 0)
return 0;
- const std::string& InLanguage = (ForceLanguage ? *ForceLanguage : *InLang);
+ const std::string& InLanguage = *InLang;
// Add the current input language to the input language set.
InLangs.insert(InLanguage);
@@ -439,13 +441,17 @@ int CompilationGraph::CheckLanguageNames() const {
continue;
}
- const char* OutLang = N1.ToolPtr->OutputLanguage();
+ const char** OutLangs = N1.ToolPtr->OutputLanguages();
const char** InLangs = N2->ToolPtr->InputLanguages();
bool eq = false;
- for (;*InLangs; ++InLangs) {
- if (std::strcmp(OutLang, *InLangs) == 0) {
- eq = true;
- break;
+ const char* OutLang = 0;
+ for (;*OutLangs; ++OutLangs) {
+ OutLang = *OutLangs;
+ for (;*InLangs; ++InLangs) {
+ if (std::strcmp(OutLang, *InLangs) == 0) {
+ eq = true;
+ break;
+ }
}
}
@@ -480,7 +486,7 @@ int CompilationGraph::CheckMultipleDefaultEdges() const {
for (const_nodes_iterator B = this->NodesMap.begin(),
E = this->NodesMap.end(); B != E; ++B) {
const Node& N = B->second;
- int MaxWeight = 0;
+ int MaxWeight = -1024;
// Ignore the root node.
if (!N.ToolPtr)
@@ -572,6 +578,26 @@ int CompilationGraph::Check () {
// Code related to graph visualization.
+namespace {
+
+std::string SquashStrArray (const char** StrArr) {
+ std::string ret;
+
+ for (; *StrArr; ++StrArr) {
+ if (*(StrArr + 1)) {
+ ret += *StrArr;
+ ret += ", ";
+ }
+ else {
+ ret += *StrArr;
+ }
+ }
+
+ return ret;
+}
+
+} // End anonymous namespace.
+
namespace llvm {
template <>
struct DOTGraphTraits<llvmc::CompilationGraph*>
@@ -586,7 +612,8 @@ namespace llvm {
if (N->ToolPtr->IsJoin())
return N->Name() + "\n (join" +
(N->HasChildren() ? ")"
- : std::string(": ") + N->ToolPtr->OutputLanguage() + ')');
+ : std::string(": ") +
+ SquashStrArray(N->ToolPtr->OutputLanguages()) + ')');
else
return N->Name();
else
@@ -596,28 +623,15 @@ namespace llvm {
template<typename EdgeIter>
static std::string getEdgeSourceLabel(const Node* N, EdgeIter I) {
if (N->ToolPtr) {
- return N->ToolPtr->OutputLanguage();
+ return SquashStrArray(N->ToolPtr->OutputLanguages());
}
else {
- const char** InLangs = I->ToolPtr->InputLanguages();
- std::string ret;
-
- for (; *InLangs; ++InLangs) {
- if (*(InLangs + 1)) {
- ret += *InLangs;
- ret += ", ";
- }
- else {
- ret += *InLangs;
- }
- }
-
- return ret;
+ return SquashStrArray(I->ToolPtr->InputLanguages());
}
}
};
-}
+} // End namespace llvm
int CompilationGraph::writeGraph(const std::string& OutputFilename) {
std::string ErrorInfo;
diff --git a/contrib/llvm/lib/CompilerDriver/Main.cpp b/contrib/llvm/lib/CompilerDriver/Main.cpp
index 0a6613a..7120027 100644
--- a/contrib/llvm/lib/CompilerDriver/Main.cpp
+++ b/contrib/llvm/lib/CompilerDriver/Main.cpp
@@ -16,8 +16,9 @@
#include "llvm/CompilerDriver/CompilationGraph.h"
#include "llvm/CompilerDriver/Error.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
#include <sstream>
#include <string>
@@ -43,15 +44,15 @@ namespace {
return 0;
}
else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) {
- tempDir = OutputFilename;
- tempDir = tempDir.getDirname();
+ tempDir = sys::path::parent_path(OutputFilename);
}
else {
// SaveTemps == Cwd --> use current dir (leave tempDir empty).
return 0;
}
- if (!tempDir.exists()) {
+ bool Exists;
+ if (llvm::sys::fs::exists(tempDir.str(), Exists) || !Exists) {
std::string ErrMsg;
if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) {
PrintError(ErrMsg);
diff --git a/contrib/llvm/lib/CompilerDriver/Tool.cpp b/contrib/llvm/lib/CompilerDriver/Tool.cpp
index c8488b2..876759a 100644
--- a/contrib/llvm/lib/CompilerDriver/Tool.cpp
+++ b/contrib/llvm/lib/CompilerDriver/Tool.cpp
@@ -15,7 +15,7 @@
#include "llvm/CompilerDriver/Tool.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
#include <algorithm>
@@ -61,7 +61,7 @@ sys::Path Tool::OutFilename(const sys::Path& In,
Out.appendSuffix(OutputSuffix);
}
else {
- Out.set(In.getBasename());
+ Out.set(sys::path::stem(In.str()));
Out.appendSuffix(OutputSuffix);
}
}
@@ -69,7 +69,7 @@ sys::Path Tool::OutFilename(const sys::Path& In,
if (IsJoin())
Out = MakeTempFile(TempDir, "tmp", OutputSuffix);
else
- Out = MakeTempFile(TempDir, In.getBasename(), OutputSuffix);
+ Out = MakeTempFile(TempDir, sys::path::stem(In.str()), OutputSuffix);
}
return Out;
}
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index be7f1f5..f286975 100644
--- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -19,14 +19,15 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/DynamicLibrary.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Host.h"
#include "llvm/Target/TargetData.h"
#include <cmath>
#include <cstring>
@@ -45,14 +46,24 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)(
StringRef MArch,
StringRef MCPU,
const SmallVectorImpl<std::string>& MAttrs) = 0;
+ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
+ Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel,
+ bool GVsWithCode,
+ CodeModel::Model CMM,
+ StringRef MArch,
+ StringRef MCPU,
+ const SmallVectorImpl<std::string>& MAttrs) = 0;
ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
std::string *ErrorStr) = 0;
-ExecutionEngine::EERegisterFn ExecutionEngine::ExceptionTableRegister = 0;
-
ExecutionEngine::ExecutionEngine(Module *M)
: EEState(*this),
- LazyFunctionCreator(0) {
+ LazyFunctionCreator(0),
+ ExceptionTableRegister(0),
+ ExceptionTableDeregister(0) {
CompilingLazily = false;
GVCompilationDisabled = false;
SymbolSearchingDisabled = false;
@@ -66,16 +77,25 @@ ExecutionEngine::~ExecutionEngine() {
delete Modules[i];
}
+void ExecutionEngine::DeregisterAllTables() {
+ if (ExceptionTableDeregister) {
+ for (std::vector<void*>::iterator it = AllExceptionTables.begin(),
+ ie = AllExceptionTables.end(); it != ie; ++it)
+ ExceptionTableDeregister(*it);
+ AllExceptionTables.clear();
+ }
+}
+
namespace {
-// This class automatically deletes the memory block when the GlobalVariable is
-// destroyed.
+/// \brief Helper class which uses a value handler to automatically deletes the
+/// memory block when the GlobalVariable is destroyed.
class GVMemoryBlock : public CallbackVH {
GVMemoryBlock(const GlobalVariable *GV)
: CallbackVH(const_cast<GlobalVariable*>(GV)) {}
public:
- // Returns the address the GlobalVariable should be written into. The
- // GVMemoryBlock object prefixes that.
+ /// \brief Returns the address the GlobalVariable should be written into. The
+ /// GVMemoryBlock object prefixes that.
static char *Create(const GlobalVariable *GV, const TargetData& TD) {
const Type *ElTy = GV->getType()->getElementType();
size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy);
@@ -97,13 +117,12 @@ public:
};
} // anonymous namespace
-char* ExecutionEngine::getMemoryForGV(const GlobalVariable* GV) {
+char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) {
return GVMemoryBlock::Create(GV, *getTargetData());
}
-/// removeModule - Remove a Module from the list of modules.
bool ExecutionEngine::removeModule(Module *M) {
- for(SmallVector<Module *, 1>::iterator I = Modules.begin(),
+ for(SmallVector<Module *, 1>::iterator I = Modules.begin(),
E = Modules.end(); I != E; ++I) {
Module *Found = *I;
if (Found == M) {
@@ -115,9 +134,6 @@ bool ExecutionEngine::removeModule(Module *M) {
return false;
}
-/// FindFunctionNamed - Search all of the active modules to find the one that
-/// defines FnName. This is very slow operation and shouldn't be used for
-/// general code.
Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
if (Function *F = Modules[i]->getFunction(FnName))
@@ -127,10 +143,13 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
}
-void *ExecutionEngineState::RemoveMapping(
- const MutexGuard &, const GlobalValue *ToUnmap) {
+void *ExecutionEngineState::RemoveMapping(const MutexGuard &,
+ const GlobalValue *ToUnmap) {
GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap);
void *OldVal;
+
+ // FIXME: This is silly, we shouldn't end up with a mapping -> 0 in the
+ // GlobalAddressMap.
if (I == GlobalAddressMap.end())
OldVal = 0;
else {
@@ -142,21 +161,16 @@ void *ExecutionEngineState::RemoveMapping(
return OldVal;
}
-/// addGlobalMapping - Tell the execution engine that the specified global is
-/// at the specified location. This is used internally as functions are JIT'd
-/// and as global variables are laid out in memory. It can and should also be
-/// used by clients of the EE that want to have an LLVM global overlay
-/// existing data in memory.
void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
MutexGuard locked(lock);
- DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
+ DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
<< "\' to [" << Addr << "]\n";);
void *&CurVal = EEState.getGlobalAddressMap(locked)[GV];
assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!");
CurVal = Addr;
-
- // If we are using the reverse mapping, add it too
+
+ // If we are using the reverse mapping, add it too.
if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
AssertingVH<const GlobalValue> &V =
EEState.getGlobalAddressReverseMap(locked)[Addr];
@@ -165,32 +179,23 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
}
}
-/// clearAllGlobalMappings - Clear all global mappings and start over again
-/// use in dynamic compilation scenarios when you want to move globals
void ExecutionEngine::clearAllGlobalMappings() {
MutexGuard locked(lock);
-
+
EEState.getGlobalAddressMap(locked).clear();
EEState.getGlobalAddressReverseMap(locked).clear();
}
-/// clearGlobalMappingsFromModule - Clear all global mappings that came from a
-/// particular module, because it has been removed from the JIT.
void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
MutexGuard locked(lock);
-
- for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
+
+ for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
EEState.RemoveMapping(locked, FI);
- }
- for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
- GI != GE; ++GI) {
+ for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
+ GI != GE; ++GI)
EEState.RemoveMapping(locked, GI);
- }
}
-/// updateGlobalMapping - Replace an existing mapping for GV with a new
-/// address. This updates both maps as required. If "Addr" is null, the
-/// entry for the global is removed from the mappings.
void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
MutexGuard locked(lock);
@@ -198,18 +203,17 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
EEState.getGlobalAddressMap(locked);
// Deleting from the mapping?
- if (Addr == 0) {
+ if (Addr == 0)
return EEState.RemoveMapping(locked, GV);
- }
-
+
void *&CurVal = Map[GV];
void *OldVal = CurVal;
if (CurVal && !EEState.getGlobalAddressReverseMap(locked).empty())
EEState.getGlobalAddressReverseMap(locked).erase(CurVal);
CurVal = Addr;
-
- // If we are using the reverse mapping, add it too
+
+ // If we are using the reverse mapping, add it too.
if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
AssertingVH<const GlobalValue> &V =
EEState.getGlobalAddressReverseMap(locked)[Addr];
@@ -219,20 +223,14 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
return OldVal;
}
-/// getPointerToGlobalIfAvailable - This returns the address of the specified
-/// global value if it is has already been codegen'd, otherwise it returns null.
-///
void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
MutexGuard locked(lock);
-
+
ExecutionEngineState::GlobalAddressMapTy::iterator I =
EEState.getGlobalAddressMap(locked).find(GV);
return I != EEState.getGlobalAddressMap(locked).end() ? I->second : 0;
}
-/// getGlobalValueAtAddress - Return the LLVM global value object that starts
-/// at the specified address.
-///
const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
MutexGuard locked(lock);
@@ -241,8 +239,8 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
for (ExecutionEngineState::GlobalAddressMapTy::iterator
I = EEState.getGlobalAddressMap(locked).begin(),
E = EEState.getGlobalAddressMap(locked).end(); I != E; ++I)
- EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair(I->second,
- I->first));
+ EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair(
+ I->second, I->first));
}
std::map<void *, AssertingVH<const GlobalValue> >::iterator I =
@@ -301,54 +299,50 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
return Array;
}
-
-/// runStaticConstructorsDestructors - This method is used to execute all of
-/// the static constructors or destructors for a module, depending on the
-/// value of isDtors.
void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
bool isDtors) {
const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors";
-
- // Execute global ctors/dtors for each module in the program.
-
- GlobalVariable *GV = module->getNamedGlobal(Name);
-
- // If this global has internal linkage, or if it has a use, then it must be
- // an old-style (llvmgcc3) static ctor with __main linked in and in use. If
- // this is the case, don't execute any of the global ctors, __main will do
- // it.
- if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return;
-
- // Should be an array of '{ int, void ()* }' structs. The first value is
- // the init priority, which we ignore.
- ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
- if (!InitList) return;
- for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
- if (ConstantStruct *CS =
- dyn_cast<ConstantStruct>(InitList->getOperand(i))) {
- if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
-
- Constant *FP = CS->getOperand(1);
- if (FP->isNullValue())
- break; // Found a null terminator, exit.
-
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
- if (CE->isCast())
- FP = CE->getOperand(0);
- if (Function *F = dyn_cast<Function>(FP)) {
- // Execute the ctor/dtor function!
- runFunction(F, std::vector<GenericValue>());
- }
- }
+ GlobalVariable *GV = module->getNamedGlobal(Name);
+
+ // If this global has internal linkage, or if it has a use, then it must be
+ // an old-style (llvmgcc3) static ctor with __main linked in and in use. If
+ // this is the case, don't execute any of the global ctors, __main will do
+ // it.
+ if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return;
+
+ // Should be an array of '{ int, void ()* }' structs. The first value is
+ // the init priority, which we ignore.
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!InitList) return;
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ ConstantStruct *CS =
+ dyn_cast<ConstantStruct>(InitList->getOperand(i));
+ if (!CS) continue;
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ Constant *FP = CS->getOperand(1);
+ if (FP->isNullValue())
+ break; // Found a null terminator, exit.
+
+ // Strip off constant expression casts.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+ if (CE->isCast())
+ FP = CE->getOperand(0);
+
+ // Execute the ctor/dtor function!
+ if (Function *F = dyn_cast<Function>(FP))
+ runFunction(F, std::vector<GenericValue>());
+
+ // FIXME: It is marginally lame that we just do nothing here if we see an
+ // entry we don't recognize. It might not be unreasonable for the verifier
+ // to not even allow this and just assert here.
+ }
}
-/// runStaticConstructorsDestructors - This method is used to execute all of
-/// the static constructors or destructors for a program, depending on the
-/// value of isDtors.
void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
// Execute global ctors/dtors for each module in the program.
- for (unsigned m = 0, e = Modules.size(); m != e; ++m)
- runStaticConstructorsDestructors(Modules[m], isDtors);
+ for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+ runStaticConstructorsDestructors(Modules[i], isDtors);
}
#ifndef NDEBUG
@@ -362,9 +356,6 @@ static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) {
}
#endif
-/// runFunctionAsMain - This is a helper function which wraps runFunction to
-/// handle the common task of starting up main with the specified argc, argv,
-/// and envp parameters.
int ExecutionEngine::runFunctionAsMain(Function *Fn,
const std::vector<std::string> &argv,
const char * const * envp) {
@@ -376,32 +367,20 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
unsigned NumArgs = Fn->getFunctionType()->getNumParams();
const FunctionType *FTy = Fn->getFunctionType();
const Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo();
- switch (NumArgs) {
- case 3:
- if (FTy->getParamType(2) != PPInt8Ty) {
- report_fatal_error("Invalid type for third argument of main() supplied");
- }
- // FALLS THROUGH
- case 2:
- if (FTy->getParamType(1) != PPInt8Ty) {
- report_fatal_error("Invalid type for second argument of main() supplied");
- }
- // FALLS THROUGH
- case 1:
- if (!FTy->getParamType(0)->isIntegerTy(32)) {
- report_fatal_error("Invalid type for first argument of main() supplied");
- }
- // FALLS THROUGH
- case 0:
- if (!FTy->getReturnType()->isIntegerTy() &&
- !FTy->getReturnType()->isVoidTy()) {
- report_fatal_error("Invalid return type of main() supplied");
- }
- break;
- default:
- report_fatal_error("Invalid number of arguments of main() supplied");
- }
-
+
+ // Check the argument types.
+ if (NumArgs > 3)
+ report_fatal_error("Invalid number of arguments of main() supplied");
+ if (NumArgs >= 3 && FTy->getParamType(2) != PPInt8Ty)
+ report_fatal_error("Invalid type for third argument of main() supplied");
+ if (NumArgs >= 2 && FTy->getParamType(1) != PPInt8Ty)
+ report_fatal_error("Invalid type for second argument of main() supplied");
+ if (NumArgs >= 1 && !FTy->getParamType(0)->isIntegerTy(32))
+ report_fatal_error("Invalid type for first argument of main() supplied");
+ if (!FTy->getReturnType()->isIntegerTy() &&
+ !FTy->getReturnType()->isVoidTy())
+ report_fatal_error("Invalid return type of main() supplied");
+
ArgvArray CArgv;
ArgvArray CEnv;
if (NumArgs) {
@@ -420,13 +399,10 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
}
}
}
+
return runFunction(Fn, GVArgs).IntVal.getZExtValue();
}
-/// If possible, create a JIT, unless the caller specifically requests an
-/// Interpreter or there's an error. If even an Interpreter cannot be created,
-/// NULL is returned.
-///
ExecutionEngine *ExecutionEngine::create(Module *M,
bool ForceInterpreter,
std::string *ErrorStr,
@@ -464,7 +440,13 @@ ExecutionEngine *EngineBuilder::create() {
// Unless the interpreter was explicitly selected or the JIT is not linked,
// try making a JIT.
if (WhichEngine & EngineKind::JIT) {
- if (ExecutionEngine::JITCtor) {
+ if (UseMCJIT && ExecutionEngine::MCJITCtor) {
+ ExecutionEngine *EE =
+ ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel,
+ AllocateGVsWithCode, CMModel,
+ MArch, MCPU, MAttrs);
+ if (EE) return EE;
+ } else if (ExecutionEngine::JITCtor) {
ExecutionEngine *EE =
ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel,
AllocateGVsWithCode, CMModel,
@@ -486,21 +468,18 @@ ExecutionEngine *EngineBuilder::create() {
if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0) {
if (ErrorStr)
*ErrorStr = "JIT has not been linked in.";
- }
+ }
+
return 0;
}
-/// getPointerToGlobal - This returns the address of the specified global
-/// value. This may involve code generation if it's a function.
-///
void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
if (Function *F = const_cast<Function*>(dyn_cast<Function>(GV)))
return getPointerToFunction(F);
MutexGuard locked(lock);
- void *p = EEState.getGlobalAddressMap(locked)[GV];
- if (p)
- return p;
+ if (void *P = EEState.getGlobalAddressMap(locked)[GV])
+ return P;
// Global variable might have been added since interpreter started.
if (GlobalVariable *GVar =
@@ -508,12 +487,12 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
EmitGlobalVariable(GVar);
else
llvm_unreachable("Global hasn't had an address allocated yet!");
+
return EEState.getGlobalAddressMap(locked)[GV];
}
-/// This function converts a Constant* into a GenericValue. The interesting
-/// part is if C is a ConstantExpr.
-/// @brief Get a GenericValue for a Constant*
+/// \brief Converts a Constant* into a GenericValue, including handling of
+/// ConstantExpr values.
GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
// If its undefined, return the garbage.
if (isa<UndefValue>(C)) {
@@ -533,12 +512,12 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
return Result;
}
- // If the value is a ConstantExpr
+ // Otherwise, if the value is a ConstantExpr...
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
Constant *Op0 = CE->getOperand(0);
switch (CE->getOpcode()) {
case Instruction::GetElementPtr: {
- // Compute the index
+ // Compute the index
GenericValue Result = getConstantValue(Op0);
SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end());
uint64_t Offset =
@@ -585,9 +564,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
else if (CE->getType()->isDoubleTy())
GV.DoubleVal = GV.IntVal.roundToDouble();
else if (CE->getType()->isX86_FP80Ty()) {
- const uint64_t zero[] = {0, 0};
- APFloat apf = APFloat(APInt(80, 2, zero));
- (void)apf.convertFromAPInt(GV.IntVal,
+ APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+ (void)apf.convertFromAPInt(GV.IntVal,
false,
APFloat::rmNearestTiesToEven);
GV.IntVal = apf.bitcastToAPInt();
@@ -601,9 +579,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
else if (CE->getType()->isDoubleTy())
GV.DoubleVal = GV.IntVal.signedRoundToDouble();
else if (CE->getType()->isX86_FP80Ty()) {
- const uint64_t zero[] = { 0, 0};
- APFloat apf = APFloat(APInt(80, 2, zero));
- (void)apf.convertFromAPInt(GV.IntVal,
+ APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+ (void)apf.convertFromAPInt(GV.IntVal,
true,
APFloat::rmNearestTiesToEven);
GV.IntVal = apf.bitcastToAPInt();
@@ -623,7 +600,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
uint64_t v;
bool ignored;
(void)apf.convertToInteger(&v, BitWidth,
- CE->getOpcode()==Instruction::FPToSI,
+ CE->getOpcode()==Instruction::FPToSI,
APFloat::rmTowardZero, &ignored);
GV.IntVal = v; // endian?
}
@@ -656,13 +633,13 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
else if (DestTy->isDoubleTy())
GV.DoubleVal = GV.IntVal.bitsToDouble();
break;
- case Type::FloatTyID:
+ case Type::FloatTyID:
assert(DestTy->isIntegerTy(32) && "Invalid bitcast");
- GV.IntVal.floatToBits(GV.FloatVal);
+ GV.IntVal = APInt::floatToBits(GV.FloatVal);
break;
case Type::DoubleTyID:
assert(DestTy->isIntegerTy(64) && "Invalid bitcast");
- GV.IntVal.doubleToBits(GV.DoubleVal);
+ GV.IntVal = APInt::doubleToBits(GV.DoubleVal);
break;
case Type::PointerTyID:
assert(DestTy->isPointerTy() && "Invalid bitcast");
@@ -712,9 +689,9 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
GV.FloatVal = LHS.FloatVal - RHS.FloatVal; break;
case Instruction::FMul:
GV.FloatVal = LHS.FloatVal * RHS.FloatVal; break;
- case Instruction::FDiv:
+ case Instruction::FDiv:
GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break;
- case Instruction::FRem:
+ case Instruction::FRem:
GV.FloatVal = std::fmod(LHS.FloatVal,RHS.FloatVal); break;
}
break;
@@ -727,9 +704,9 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
GV.DoubleVal = LHS.DoubleVal - RHS.DoubleVal; break;
case Instruction::FMul:
GV.DoubleVal = LHS.DoubleVal * RHS.DoubleVal; break;
- case Instruction::FDiv:
+ case Instruction::FDiv:
GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break;
- case Instruction::FRem:
+ case Instruction::FRem:
GV.DoubleVal = std::fmod(LHS.DoubleVal,RHS.DoubleVal); break;
}
break;
@@ -738,7 +715,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
case Type::FP128TyID: {
APFloat apfLHS = APFloat(LHS.IntVal);
switch (CE->getOpcode()) {
- default: llvm_unreachable("Invalid long double opcode");llvm_unreachable(0);
+ default: llvm_unreachable("Invalid long double opcode");
case Instruction::FAdd:
apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
GV.IntVal = apfLHS.bitcastToAPInt();
@@ -751,11 +728,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
GV.IntVal = apfLHS.bitcastToAPInt();
break;
- case Instruction::FDiv:
+ case Instruction::FDiv:
apfLHS.divide(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
GV.IntVal = apfLHS.bitcastToAPInt();
break;
- case Instruction::FRem:
+ case Instruction::FRem:
apfLHS.mod(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
GV.IntVal = apfLHS.bitcastToAPInt();
break;
@@ -768,16 +745,18 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
default:
break;
}
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "ConstantExpr not handled: " << *CE;
- report_fatal_error(Msg.str());
+
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ConstantExpr not handled: " << *CE;
+ report_fatal_error(OS.str());
}
+ // Otherwise, we have a simple constant.
GenericValue Result;
switch (C->getType()->getTypeID()) {
- case Type::FloatTyID:
- Result.FloatVal = cast<ConstantFP>(C)->getValueAPF().convertToFloat();
+ case Type::FloatTyID:
+ Result.FloatVal = cast<ConstantFP>(C)->getValueAPF().convertToFloat();
break;
case Type::DoubleTyID:
Result.DoubleVal = cast<ConstantFP>(C)->getValueAPF().convertToDouble();
@@ -804,11 +783,12 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
llvm_unreachable("Unknown constant pointer type!");
break;
default:
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "ERROR: Constant unimplemented for type: " << *C->getType();
- report_fatal_error(Msg.str());
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ERROR: Constant unimplemented for type: " << *C->getType();
+ report_fatal_error(OS.str());
}
+
return Result;
}
@@ -819,11 +799,11 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
uint8_t *Src = (uint8_t *)IntVal.getRawData();
- if (sys::isLittleEndianHost())
+ if (sys::isLittleEndianHost()) {
// Little-endian host - the source is ordered from LSB to MSB. Order the
// destination from LSB to MSB: Do a straight copy.
memcpy(Dst, Src, StoreBytes);
- else {
+ } else {
// Big-endian host - the source is an array of 64 bit words ordered from
// LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
// from MSB to LSB: Reverse the word order, but not the bytes in a word.
@@ -838,10 +818,6 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
}
}
-/// StoreValueToMemory - Stores the data in Val of type Ty at address Ptr. Ptr
-/// is the address of the memory at which to store Val, cast to GenericValue *.
-/// It is not a pointer to a GenericValue containing the address at which to
-/// store Val.
void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
GenericValue *Ptr, const Type *Ty) {
const unsigned StoreBytes = getTargetData()->getTypeStoreSize(Ty);
@@ -932,16 +908,13 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
break;
}
default:
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Cannot load value of type " << *Ty << "!";
- report_fatal_error(Msg.str());
+ SmallString<256> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "Cannot load value of type " << *Ty << "!";
+ report_fatal_error(OS.str());
}
}
-// InitializeMemory - Recursive function to apply a Constant value into the
-// specified memory location...
-//
void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
DEBUG(dbgs() << "JIT: Initializing " << Addr << " ");
DEBUG(Init->dump());
@@ -974,20 +947,17 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
return;
}
- dbgs() << "Bad Type: " << *Init->getType() << "\n";
+ DEBUG(dbgs() << "Bad Type: " << *Init->getType() << "\n");
llvm_unreachable("Unknown constant type to initialize memory with!");
}
/// EmitGlobals - Emit all of the global variables to memory, storing their
/// addresses into GlobalAddress. This must make sure to copy the contents of
/// their initializers into the memory.
-///
void ExecutionEngine::emitGlobals() {
-
// Loop over all of the global variables in the program, allocating the memory
// to hold them. If there is more than one module, do a prepass over globals
// to figure out how the different modules should link together.
- //
std::map<std::pair<std::string, const Type*>,
const GlobalValue*> LinkedGlobalsMap;
@@ -1000,8 +970,8 @@ void ExecutionEngine::emitGlobals() {
if (GV->hasLocalLinkage() || GV->isDeclaration() ||
GV->hasAppendingLinkage() || !GV->hasName())
continue;// Ignore external globals and globals with internal linkage.
-
- const GlobalValue *&GVEntry =
+
+ const GlobalValue *&GVEntry =
LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
// If this is the first time we've seen this global, it is the canonical
@@ -1010,13 +980,13 @@ void ExecutionEngine::emitGlobals() {
GVEntry = GV;
continue;
}
-
+
// If the existing global is strong, never replace it.
if (GVEntry->hasExternalLinkage() ||
GVEntry->hasDLLImportLinkage() ||
GVEntry->hasDLLExportLinkage())
continue;
-
+
// Otherwise, we know it's linkonce/weak, replace it if this is a strong
// symbol. FIXME is this right for common?
if (GV->hasExternalLinkage() || GVEntry->hasExternalWeakLinkage())
@@ -1024,7 +994,7 @@ void ExecutionEngine::emitGlobals() {
}
}
}
-
+
std::vector<const GlobalValue*> NonCanonicalGlobals;
for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
Module &M = *Modules[m];
@@ -1032,7 +1002,7 @@ void ExecutionEngine::emitGlobals() {
I != E; ++I) {
// In the multi-module case, see what this global maps to.
if (!LinkedGlobalsMap.empty()) {
- if (const GlobalValue *GVEntry =
+ if (const GlobalValue *GVEntry =
LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())]) {
// If something else is the canonical global, ignore this one.
if (GVEntry != &*I) {
@@ -1041,7 +1011,7 @@ void ExecutionEngine::emitGlobals() {
}
}
}
-
+
if (!I->isDeclaration()) {
addGlobalMapping(I, getMemoryForGV(I));
} else {
@@ -1056,7 +1026,7 @@ void ExecutionEngine::emitGlobals() {
}
}
}
-
+
// If there are multiple modules, map the non-canonical globals to their
// canonical location.
if (!NonCanonicalGlobals.empty()) {
@@ -1069,14 +1039,14 @@ void ExecutionEngine::emitGlobals() {
addGlobalMapping(GV, Ptr);
}
}
-
- // Now that all of the globals are set up in memory, loop through them all
+
+ // Now that all of the globals are set up in memory, loop through them all
// and initialize their contents.
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
if (!I->isDeclaration()) {
if (!LinkedGlobalsMap.empty()) {
- if (const GlobalValue *GVEntry =
+ if (const GlobalValue *GVEntry =
LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())])
if (GVEntry != &*I) // Not the canonical variable.
continue;
@@ -1098,11 +1068,11 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
GA = getMemoryForGV(GV);
addGlobalMapping(GV, GA);
}
-
+
// Don't initialize if it's thread local, let the client do it.
if (!GV->isThreadLocal())
InitializeMemory(GV->getInitializer(), GA);
-
+
const Type *ElTy = GV->getType()->getElementType();
size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy);
NumInitBytes += (unsigned)GVSize;
@@ -1113,18 +1083,20 @@ ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE)
: EE(EE), GlobalAddressMap(this) {
}
-sys::Mutex *ExecutionEngineState::AddressMapConfig::getMutex(
- ExecutionEngineState *EES) {
+sys::Mutex *
+ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) {
return &EES->EE.lock;
}
-void ExecutionEngineState::AddressMapConfig::onDelete(
- ExecutionEngineState *EES, const GlobalValue *Old) {
+
+void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES,
+ const GlobalValue *Old) {
void *OldVal = EES->GlobalAddressMap.lookup(Old);
EES->GlobalAddressReverseMap.erase(OldVal);
}
-void ExecutionEngineState::AddressMapConfig::onRAUW(
- ExecutionEngineState *, const GlobalValue *, const GlobalValue *) {
+void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *,
+ const GlobalValue *,
+ const GlobalValue *) {
assert(false && "The ExecutionEngine doesn't know how to handle a"
" RAUW on a value it has a global mapping for.");
}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index 59ebe6e..498063b 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1060,11 +1060,9 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy,
Dest.PointerVal = Src.PointerVal;
} else if (DstTy->isIntegerTy()) {
if (SrcTy->isFloatTy()) {
- Dest.IntVal.zext(sizeof(Src.FloatVal) * CHAR_BIT);
- Dest.IntVal.floatToBits(Src.FloatVal);
+ Dest.IntVal = APInt::floatToBits(Src.FloatVal);
} else if (SrcTy->isDoubleTy()) {
- Dest.IntVal.zext(sizeof(Src.DoubleVal) * CHAR_BIT);
- Dest.IntVal.doubleToBits(Src.DoubleVal);
+ Dest.IntVal = APInt::doubleToBits(Src.DoubleVal);
} else if (SrcTy->isIntegerTy()) {
Dest.IntVal = Src.IntVal;
} else
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 57d1260..062256a 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -24,10 +24,10 @@
#include "llvm/Module.h"
#include "llvm/Config/config.h" // Detect libffi
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/System/DynamicLibrary.h"
+#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include <csignal>
#include <cstdio>
#include <map>
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
index 564e9ab..bfebe3d 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -19,7 +19,7 @@
#include "llvm/ExecutionEngine/GenericValue.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/CallSite.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp
index 274f816..169e1ba 100644
--- a/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -17,7 +17,7 @@
#include "JIT.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/System/DynamicLibrary.h"
+#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Config/config.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp
index 63125b7..cc76b13 100644
--- a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp
@@ -30,7 +30,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MutexGuard.h"
-#include "llvm/System/DynamicLibrary.h"
+#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Config/config.h"
using namespace llvm;
@@ -66,8 +66,15 @@ static struct RegisterJIT {
extern "C" void LLVMLinkInJIT() {
}
+// Determine whether we can register EH tables.
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \
+ !defined(__USING_SJLJ_EXCEPTIONS__))
+#define HAVE_EHTABLE_SUPPORT 1
+#else
+#define HAVE_EHTABLE_SUPPORT 0
+#endif
-#if defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+#if HAVE_EHTABLE_SUPPORT
// libgcc defines the __register_frame function to dynamically register new
// dwarf frames for exception handling. This functionality is not portable
@@ -87,6 +94,7 @@ extern "C" void LLVMLinkInJIT() {
// values of an opaque key, used by libgcc to find dwarf tables.
extern "C" void __register_frame(void*);
+extern "C" void __deregister_frame(void*);
#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED <= 1050
# define USE_KEYMGR 1
@@ -190,7 +198,7 @@ void DarwinRegisterFrame(void* FrameBegin) {
}
#endif // __APPLE__
-#endif // __GNUC__
+#endif // HAVE_EHTABLE_SUPPORT
/// createJIT - This is the factory method for creating a JIT for the current
/// machine, it does not fall back to the interpreter. This takes ownership
@@ -306,7 +314,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
}
// Register routine for informing unwinding runtime about new EH frames
-#if defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+#if HAVE_EHTABLE_SUPPORT
#if USE_KEYMGR
struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
_keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
@@ -318,16 +326,21 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1);
_keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI);
InstallExceptionTableRegister(DarwinRegisterFrame);
+ // Not sure about how to deregister on Darwin.
#else
InstallExceptionTableRegister(__register_frame);
+ InstallExceptionTableDeregister(__deregister_frame);
#endif // __APPLE__
-#endif // __GNUC__
+#endif // HAVE_EHTABLE_SUPPORT
// Initialize passes.
PM.doInitialization();
}
JIT::~JIT() {
+ // Unregister all exception tables registered by this JIT.
+ DeregisterAllTables();
+ // Cleanup.
AllJits->Remove(this);
delete jitstate;
delete JCE;
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
index 6e11a3c..3b5acb7 100644
--- a/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -25,7 +25,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include <string>
#include <vector>
@@ -35,7 +35,7 @@ namespace llvm {
extern "C" {
// Debuggers puts a breakpoint in this function.
- DISABLE_INLINE void __jit_debug_register_code() { }
+ LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { }
// We put information about the JITed function in this global, which the
// debugger reads. Make sure to specify the version statically, because the
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
index 7e53d78..dce506b 100644
--- a/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
@@ -16,7 +16,7 @@
#define LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
#include <string>
// This must be kept in sync with gdb/gdb/jit.h .
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 1105bcc..f54ccca 100644
--- a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -26,7 +26,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -43,8 +43,9 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
const TargetMachine& TM = F.getTarget();
TD = TM.getTargetData();
- stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection();
+ stackGrowthDirection = TM.getFrameLowering()->getStackGrowthDirection();
RI = TM.getRegisterInfo();
+ TFI = TM.getFrameLowering();
JCE = &jce;
unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction,
@@ -66,7 +67,7 @@ void
JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
const std::vector<MachineMove> &Moves) const {
unsigned PointerSize = TD->getPointerSize();
- int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
+ int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
PointerSize : -PointerSize;
MCSymbol *BaseLabel = 0;
@@ -481,7 +482,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
unsigned char*
JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
unsigned PointerSize = TD->getPointerSize();
- int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
+ int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
PointerSize : -PointerSize;
unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue();
@@ -523,7 +524,7 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
}
std::vector<MachineMove> Moves;
- RI->getInitialFrameState(Moves);
+ TFI->getInitialFrameState(Moves);
EmitFrameMoves(0, Moves);
JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
index 3095682..9495697 100644
--- a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -23,6 +23,7 @@ class MachineFunction;
class MachineModuleInfo;
class MachineMove;
class TargetData;
+class TargetFrameLowering;
class TargetMachine;
class TargetRegisterInfo;
@@ -30,6 +31,7 @@ class JITDwarfEmitter {
const TargetData* TD;
JITCodeEmitter* JCE;
const TargetRegisterInfo* RI;
+ const TargetFrameLowering *TFI;
MachineModuleInfo* MMI;
JIT& Jit;
bool stackGrowthDirection;
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 4c0d078..4cd8757 100644
--- a/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -42,8 +42,8 @@
#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Disassembler.h"
-#include "llvm/System/Memory.h"
+#include "llvm/Support/Disassembler.h"
+#include "llvm/Support/Memory.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 653e6f1..eec23ce 100644
--- a/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -22,7 +22,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Memory.h"
+#include "llvm/Support/Memory.h"
#include <vector>
#include <cassert>
#include <climits>
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
index 1ca084b..670fa7d 100644
--- a/contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -26,7 +26,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Errno.h"
+#include "llvm/Support/Errno.h"
#include "llvm/Config/config.h"
#include <stddef.h>
using namespace llvm;
diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/TargetSelect.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/TargetSelect.cpp
index 3349c33..6b7173c 100644
--- a/contrib/llvm/lib/ExecutionEngine/JIT/TargetSelect.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/JIT/TargetSelect.cpp
@@ -18,7 +18,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/Host.h"
#include "llvm/Target/SubtargetFeature.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegistry.h"
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt
new file mode 100644
index 0000000..f7ed176
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMCJIT
+ MCJIT.cpp
+ TargetSelect.cpp
+ )
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
new file mode 100644
index 0000000..f1e9dab
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -0,0 +1,92 @@
+//===-- JIT.cpp - MC-based Just-in-Time Compiler --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJIT.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+
+using namespace llvm;
+
+namespace {
+
+static struct RegisterJIT {
+ RegisterJIT() { MCJIT::Register(); }
+} JITRegistrator;
+
+}
+
+extern "C" void LLVMLinkInMCJIT() {
+}
+
+ExecutionEngine *MCJIT::createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel,
+ bool GVsWithCode,
+ CodeModel::Model CMM,
+ StringRef MArch,
+ StringRef MCPU,
+ const SmallVectorImpl<std::string>& MAttrs) {
+ // Try to register the program as a source of symbols to resolve against.
+ //
+ // FIXME: Don't do this here.
+ sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
+
+ // Pick a target either via -march or by guessing the native arch.
+ //
+ // FIXME: This should be lifted out of here, it isn't something which should
+ // be part of the JIT policy, rather the burden for this selection should be
+ // pushed to clients.
+ TargetMachine *TM = MCJIT::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr);
+ if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
+ TM->setCodeModel(CMM);
+
+ // If the target supports JIT code generation, create the JIT.
+ if (TargetJITInfo *TJ = TM->getJITInfo())
+ return new MCJIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+
+ if (ErrorStr)
+ *ErrorStr = "target does not support JIT code generation";
+ return 0;
+}
+
+MCJIT::MCJIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+ JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
+ bool AllocateGVsWithCode)
+ : ExecutionEngine(M) {
+}
+
+MCJIT::~MCJIT() {
+}
+
+void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
+ report_fatal_error("not yet implemented");
+ return 0;
+}
+
+void *MCJIT::getPointerToFunction(Function *F) {
+ report_fatal_error("not yet implemented");
+ return 0;
+}
+
+void *MCJIT::recompileAndRelinkFunction(Function *F) {
+ report_fatal_error("not yet implemented");
+}
+
+void MCJIT::freeMachineCodeForFunction(Function *F) {
+ report_fatal_error("not yet implemented");
+}
+
+GenericValue MCJIT::runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues) {
+ report_fatal_error("not yet implemented");
+ return GenericValue();
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
new file mode 100644
index 0000000..cd1f989
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -0,0 +1,68 @@
+//===-- MCJIT.h - Class definition for the MCJIT ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+#define LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+
+namespace llvm {
+
+class MCJIT : public ExecutionEngine {
+ MCJIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+ JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
+ bool AllocateGVsWithCode);
+public:
+ ~MCJIT();
+
+ /// @name ExecutionEngine interface implementation
+ /// @{
+
+ virtual void *getPointerToBasicBlock(BasicBlock *BB);
+
+ virtual void *getPointerToFunction(Function *F);
+
+ virtual void *recompileAndRelinkFunction(Function *F);
+
+ virtual void freeMachineCodeForFunction(Function *F);
+
+ virtual GenericValue runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues);
+
+ /// @}
+ /// @name (Private) Registration Interfaces
+ /// @{
+
+ static void Register() {
+ MCJITCtor = createJIT;
+ }
+
+ // FIXME: This routine is scheduled for termination. Do not use it.
+ static TargetMachine *selectTarget(Module *M,
+ StringRef MArch,
+ StringRef MCPU,
+ const SmallVectorImpl<std::string>& MAttrs,
+ std::string *Err);
+
+ static ExecutionEngine *createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel,
+ bool GVsWithCode,
+ CodeModel::Model CMM,
+ StringRef MArch,
+ StringRef MCPU,
+ const SmallVectorImpl<std::string>& MAttrs);
+
+ // @}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile b/contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile
new file mode 100644
index 0000000..967efbc
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/Makefile
@@ -0,0 +1,13 @@
+##===- lib/ExecutionEngine/MCJIT/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMCJIT
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/TargetSelect.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/TargetSelect.cpp
new file mode 100644
index 0000000..50f6593
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/TargetSelect.cpp
@@ -0,0 +1,91 @@
+//===-- TargetSelect.cpp - Target Chooser Code ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This just asks the TargetRegistry for the appropriate JIT to use, and allows
+// the user to specify a specific one on the commandline with -march=x. Clients
+// should initialize targets prior to calling createJIT.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJIT.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+/// selectTarget - Pick a target either via -march or by guessing the native
+/// arch. Add any CPU features specified via -mcpu or -mattr.
+TargetMachine *MCJIT::selectTarget(Module *Mod,
+ StringRef MArch,
+ StringRef MCPU,
+ const SmallVectorImpl<std::string>& MAttrs,
+ std::string *ErrorStr) {
+ Triple TheTriple(Mod->getTargetTriple());
+ if (TheTriple.getTriple().empty())
+ TheTriple.setTriple(sys::getHostTriple());
+
+ // Adjust the triple to match what the user requested.
+ const Target *TheTarget = 0;
+ if (!MArch.empty()) {
+ for (TargetRegistry::iterator it = TargetRegistry::begin(),
+ ie = TargetRegistry::end(); it != ie; ++it) {
+ if (MArch == it->getName()) {
+ TheTarget = &*it;
+ break;
+ }
+ }
+
+ if (!TheTarget) {
+ *ErrorStr = "No available targets are compatible with this -march, "
+ "see -version for the available targets.\n";
+ return 0;
+ }
+
+ // Adjust the triple to match (if known), otherwise stick with the
+ // module/host triple.
+ Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
+ if (Type != Triple::UnknownArch)
+ TheTriple.setArch(Type);
+ } else {
+ std::string Error;
+ TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Error);
+ if (TheTarget == 0) {
+ if (ErrorStr)
+ *ErrorStr = Error;
+ return 0;
+ }
+ }
+
+ if (!TheTarget->hasJIT()) {
+ errs() << "WARNING: This target JIT is not designed for the host you are"
+ << " running. If bad things happen, please choose a different "
+ << "-march switch.\n";
+ }
+
+ // Package up features to be passed to target/subtarget
+ std::string FeaturesStr;
+ if (!MCPU.empty() || !MAttrs.empty()) {
+ SubtargetFeatures Features;
+ Features.setCPU(MCPU);
+ for (unsigned i = 0; i != MAttrs.size(); ++i)
+ Features.AddFeature(MAttrs[i]);
+ FeaturesStr = Features.getString();
+ }
+
+ // Allocate a target...
+ TargetMachine *Target =
+ TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr);
+ assert(Target && "Could not allocate target machine!");
+ return Target;
+}
diff --git a/contrib/llvm/lib/Linker/LinkItems.cpp b/contrib/llvm/lib/Linker/LinkItems.cpp
index 1be2bec..52a0d17 100644
--- a/contrib/llvm/lib/Linker/LinkItems.cpp
+++ b/contrib/llvm/lib/Linker/LinkItems.cpp
@@ -15,9 +15,10 @@
#include "llvm/Linker.h"
#include "llvm/Module.h"
#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/system_error.h"
using namespace llvm;
// LinkItems - This function is the main entry point into linking. It takes a
@@ -160,19 +161,19 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
// Check for a file of name "-", which means "read standard input"
if (File.str() == "-") {
std::auto_ptr<Module> M;
- if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN(&Error)) {
+ OwningPtr<MemoryBuffer> Buffer;
+ error_code ec;
+ if (!(ec = MemoryBuffer::getSTDIN(Buffer))) {
if (!Buffer->getBufferSize()) {
- delete Buffer;
Error = "standard input is empty";
} else {
- M.reset(ParseBitcodeFile(Buffer, Context, &Error));
- delete Buffer;
+ M.reset(ParseBitcodeFile(Buffer.get(), Context, &Error));
if (M.get())
if (!LinkInModule(M.get(), &Error))
return false;
}
}
- return error("Cannot link stdin: " + Error);
+ return error("Cannot link stdin: " + ec.message());
}
// Determine what variety of file it is.
diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp
index 7e8245a..5aa06ab 100644
--- a/contrib/llvm/lib/Linker/LinkModules.cpp
+++ b/contrib/llvm/lib/Linker/LinkModules.cpp
@@ -28,7 +28,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/ADT/DenseMap.h"
using namespace llvm;
@@ -434,8 +434,10 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
}
// Check visibility
- if (Dest && Src->getVisibility() != Dest->getVisibility())
- if (!Src->isDeclaration() && !Dest->isDeclaration())
+ if (Dest && Src->getVisibility() != Dest->getVisibility() &&
+ !Src->isDeclaration() && !Dest->isDeclaration() &&
+ !Src->hasAvailableExternallyLinkage() &&
+ !Dest->hasAvailableExternallyLinkage())
return Error(Err, "Linking globals named '" + Src->getName() +
"': symbols have different visibilities!");
return false;
@@ -449,10 +451,9 @@ static void LinkNamedMDNodes(Module *Dest, Module *Src,
const NamedMDNode *SrcNMD = I;
NamedMDNode *DestNMD = Dest->getOrInsertNamedMetadata(SrcNMD->getName());
// Add Src elements into Dest node.
- for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i)
+ for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i)
DestNMD->addOperand(cast<MDNode>(MapValue(SrcNMD->getOperand(i),
- ValueMap,
- true)));
+ ValueMap)));
}
}
@@ -520,6 +521,8 @@ static bool LinkGlobals(Module *Dest, const Module *Src,
continue;
}
+ bool HasUnnamedAddr = SGV->hasUnnamedAddr() && DGV->hasUnnamedAddr();
+
// If the visibilities of the symbols disagree and the destination is a
// prototype, take the visibility of its input.
if (DGV->isDeclaration())
@@ -559,14 +562,17 @@ static bool LinkGlobals(Module *Dest, const Module *Src,
// we are replacing may be a function (if a prototype, weak, etc) or a
// global variable.
GlobalVariable *NewDGV =
- new GlobalVariable(*Dest, SGV->getType()->getElementType(),
- SGV->isConstant(), NewLinkage, /*init*/0,
+ new GlobalVariable(*Dest, SGV->getType()->getElementType(),
+ SGV->isConstant(), NewLinkage, /*init*/0,
DGV->getName(), 0, false,
SGV->getType()->getAddressSpace());
+ // Set the unnamed_addr.
+ NewDGV->setUnnamedAddr(HasUnnamedAddr);
+
// Propagate alignment, section, and visibility info.
CopyGVAttributes(NewDGV, SGV);
- DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV,
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV,
DGV->getType()));
// DGV will conflict with NewDGV because they both had the same
@@ -608,8 +614,9 @@ static bool LinkGlobals(Module *Dest, const Module *Src,
"': symbol multiple defined");
}
- // Set calculated linkage
+ // Set calculated linkage and unnamed_addr
DGV->setLinkage(NewLinkage);
+ DGV->setUnnamedAddr(HasUnnamedAddr);
// Make sure to remember this mapping...
ValueMap[SGV] = ConstantExpr::getBitCast(DGV, SGV->getType());
@@ -668,6 +675,13 @@ static bool LinkAlias(Module *Dest, const Module *Src,
GlobalValue* DAliasee = cast<GlobalValue>(VMI->second);
GlobalValue* DGV = NULL;
+ // Fixup aliases to bitcasts. Note that aliases to GEPs are still broken
+ // by this, but aliases to GEPs are broken to a lot of other things, so
+ // it's less important.
+ Constant *DAliaseeConst = DAliasee;
+ if (SGA->getType() != DAliasee->getType())
+ DAliaseeConst = ConstantExpr::getBitCast(DAliasee, SGA->getType());
+
// Try to find something 'similar' to SGA in destination module.
if (!DGV && !SGA->hasLocalLinkage()) {
DGV = Dest->getNamedAlias(SGA->getName());
@@ -721,7 +735,7 @@ static bool LinkAlias(Module *Dest, const Module *Src,
"': aliasee is not global variable");
NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
- SGA->getName(), DAliasee, Dest);
+ SGA->getName(), DAliaseeConst, Dest);
CopyGVAttributes(NewGA, SGA);
// Any uses of DGV need to change to NewGA, with cast, if needed.
@@ -750,7 +764,7 @@ static bool LinkAlias(Module *Dest, const Module *Src,
"': aliasee is not function");
NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
- SGA->getName(), DAliasee, Dest);
+ SGA->getName(), DAliaseeConst, Dest);
CopyGVAttributes(NewGA, SGA);
// Any uses of DF need to change to NewGA, with cast, if needed.
@@ -772,14 +786,8 @@ static bool LinkAlias(Module *Dest, const Module *Src,
} else {
// No linking to be performed, simply create an identical version of the
// alias over in the dest module...
- Constant *Aliasee = DAliasee;
- // Fixup aliases to bitcasts. Note that aliases to GEPs are still broken
- // by this, but aliases to GEPs are broken to a lot of other things, so
- // it's less important.
- if (SGA->getType() != DAliasee->getType())
- Aliasee = ConstantExpr::getBitCast(DAliasee, SGA->getType());
NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
- SGA->getName(), Aliasee, Dest);
+ SGA->getName(), DAliaseeConst, Dest);
CopyGVAttributes(NewGA, SGA);
// Proceed to 'common' steps
@@ -813,9 +821,9 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src,
const GlobalVariable *SGV = I;
if (SGV->hasInitializer()) { // Only process initialized GV's
- // Figure out what the initializer looks like in the dest module...
+ // Figure out what the initializer looks like in the dest module.
Constant *SInit =
- cast<Constant>(MapValue(SGV->getInitializer(), ValueMap, true));
+ cast<Constant>(MapValue(SGV->getInitializer(), ValueMap));
// Grab destination global variable or alias.
GlobalValue *DGV = cast<GlobalValue>(ValueMap[SGV]->stripPointerCasts());
@@ -927,7 +935,7 @@ static bool LinkFunctionProtos(Module *Dest, const Module *Src,
CopyGVAttributes(NewDF, SF);
// Any uses of DF need to change to NewDF, with cast
- DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF,
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF,
DGV->getType()));
// DF will conflict with NewDF because they both had the same. We must
@@ -995,32 +1003,10 @@ static bool LinkFunctionBody(Function *Dest, Function *Src,
// At this point, all of the instructions and values of the function are now
// copied over. The only problem is that they are still referencing values in
// the Source function as operands. Loop through all of the operands of the
- // functions and patch them up to point to the local versions...
- //
- // This is the same as RemapInstruction, except that it avoids remapping
- // instruction and basic block operands.
- //
+ // functions and patch them up to point to the local versions.
for (Function::iterator BB = Dest->begin(), BE = Dest->end(); BB != BE; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- // Remap operands.
- for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
- OI != OE; ++OI)
- if (!isa<Instruction>(*OI) && !isa<BasicBlock>(*OI))
- *OI = MapValue(*OI, ValueMap, true);
-
- // Remap attached metadata.
- SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
- I->getAllMetadata(MDs);
- for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
- MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
- Value *Old = MI->second;
- if (!isa<Instruction>(Old) && !isa<BasicBlock>(Old)) {
- Value *New = MapValue(Old, ValueMap, true);
- if (New != Old)
- I->setMetadata(MI->first, cast<MDNode>(New));
- }
- }
- }
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries);
// There is no need to map the arguments anymore.
for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
@@ -1099,7 +1085,7 @@ static bool LinkAppendingVars(Module *M,
"Appending variables with different section name need to be linked!");
unsigned NewSize = T1->getNumElements() + T2->getNumElements();
- ArrayType *NewType = ArrayType::get(T1->getElementType(),
+ ArrayType *NewType = ArrayType::get(T1->getElementType(),
NewSize);
G1->setName(""); // Clear G1's name in case of a conflict!
@@ -1143,7 +1129,7 @@ static bool LinkAppendingVars(Module *M,
// getelementptr instructions to not use the Cast!
G1->replaceAllUsesWith(ConstantExpr::getBitCast(NG,
G1->getType()));
- G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG,
+ G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG,
G2->getType()));
// Remove the two globals from the module now...
@@ -1217,8 +1203,13 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
Src->getDataLayout() != Dest->getDataLayout())
errs() << "WARNING: Linking two modules of different data layouts!\n";
if (!Src->getTargetTriple().empty() &&
- Dest->getTargetTriple() != Src->getTargetTriple())
- errs() << "WARNING: Linking two modules of different target triples!\n";
+ Dest->getTargetTriple() != Src->getTargetTriple()) {
+ errs() << "WARNING: Linking two modules of different target triples: ";
+ if (!Src->getModuleIdentifier().empty())
+ errs() << Src->getModuleIdentifier() << ": ";
+ errs() << "'" << Src->getTargetTriple() << "' and '"
+ << Dest->getTargetTriple() << "'\n";
+ }
// Append the module inline asm string.
if (!Src->getModuleInlineAsm().empty()) {
@@ -1300,10 +1291,9 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
// If the source library's module id is in the dependent library list of the
// destination library, remove it since that module is now linked in.
- sys::Path modId;
- modId.set(Src->getModuleIdentifier());
- if (!modId.isEmpty())
- Dest->removeLibrary(modId.getBasename());
+ const std::string &modId = Src->getModuleIdentifier();
+ if (!modId.empty())
+ Dest->removeLibrary(sys::path::stem(modId));
return false;
}
diff --git a/contrib/llvm/lib/Linker/Linker.cpp b/contrib/llvm/lib/Linker/Linker.cpp
index 32aa0f9..fba91da 100644
--- a/contrib/llvm/lib/Linker/Linker.cpp
+++ b/contrib/llvm/lib/Linker/Linker.cpp
@@ -14,10 +14,11 @@
#include "llvm/Linker.h"
#include "llvm/Module.h"
#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Config/config.h"
+#include "llvm/Support/system_error.h"
using namespace llvm;
Linker::Linker(StringRef progname, StringRef modname,
@@ -97,13 +98,14 @@ std::auto_ptr<Module>
Linker::LoadObject(const sys::Path &FN) {
std::string ParseErrorMessage;
Module *Result = 0;
-
- std::auto_ptr<MemoryBuffer> Buffer(MemoryBuffer::getFileOrSTDIN(FN.c_str()));
- if (Buffer.get())
- Result = ParseBitcodeFile(Buffer.get(), Context, &ParseErrorMessage);
+
+ OwningPtr<MemoryBuffer> Buffer;
+ if (error_code ec = MemoryBuffer::getFileOrSTDIN(FN.c_str(), Buffer))
+ ParseErrorMessage = "Error reading file '" + FN.str() + "'" + ": "
+ + ec.message();
else
- ParseErrorMessage = "Error reading file '" + FN.str() + "'";
-
+ Result = ParseBitcodeFile(Buffer.get(), Context, &ParseErrorMessage);
+
if (Result)
return std::auto_ptr<Module>(Result);
Error = "Bitcode file '" + FN.str() + "' could not be loaded";
@@ -133,7 +135,7 @@ static inline sys::Path IsLibrary(StringRef Name,
// Try the libX.so (or .dylib) form
FullPath.eraseSuffix();
- FullPath.appendSuffix(&(LTDL_SHLIB_EXT[1]));
+ FullPath.appendSuffix(sys::Path::GetDLLSuffix());
if (FullPath.isDynamicLibrary()) // Native shared library?
return FullPath;
if (FullPath.isBitcodeFile()) // .so file containing bitcode?
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
index cf35b45..8a00a16 100644
--- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
@@ -11,7 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/ELFObjectWriter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
@@ -20,6 +21,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbol.h"
@@ -28,27 +30,76 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ELF.h"
#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/ADT/StringSwitch.h"
#include "../Target/X86/X86FixupKinds.h"
+#include "../Target/ARM/ARMFixupKinds.h"
#include <vector>
using namespace llvm;
-namespace {
+static unsigned GetType(const MCSymbolData &SD) {
+ uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift;
+ assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+ Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+ Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+ Type == ELF::STT_TLS);
+ return Type;
+}
- class ELFObjectWriterImpl {
- static bool isFixupKindX86PCRel(unsigned Kind) {
- switch (Kind) {
- default:
- return false;
- case X86::reloc_pcrel_1byte:
- case X86::reloc_pcrel_4byte:
- case X86::reloc_riprel_4byte:
- case X86::reloc_riprel_4byte_movq_load:
- return true;
- }
- }
+static unsigned GetBinding(const MCSymbolData &SD) {
+ uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
+ assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+ Binding == ELF::STB_WEAK);
+ return Binding;
+}
+
+static void SetBinding(MCSymbolData &SD, unsigned Binding) {
+ assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+ Binding == ELF::STB_WEAK);
+ uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
+ SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
+}
+static unsigned GetVisibility(MCSymbolData &SD) {
+ unsigned Visibility =
+ (SD.getFlags() & (0xf << ELF_STV_Shift)) >> ELF_STV_Shift;
+ assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+ Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+ return Visibility;
+}
+
+
+static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) {
+ switch (Variant) {
+ default:
+ return false;
+ case MCSymbolRefExpr::VK_GOT:
+ case MCSymbolRefExpr::VK_PLT:
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ case MCSymbolRefExpr::VK_TPOFF:
+ case MCSymbolRefExpr::VK_TLSGD:
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ case MCSymbolRefExpr::VK_NTPOFF:
+ case MCSymbolRefExpr::VK_GOTNTPOFF:
+ case MCSymbolRefExpr::VK_TLSLDM:
+ case MCSymbolRefExpr::VK_DTPOFF:
+ case MCSymbolRefExpr::VK_TLSLD:
+ return true;
+ }
+}
+
+static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+ const MCFixupKindInfo &FKI =
+ Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind);
+
+ return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+}
+
+namespace {
+ class ELFObjectWriter : public MCObjectWriter {
+ protected:
/*static bool isFixupKindX86RIPRel(unsigned Kind) {
return Kind == X86::reloc_riprel_4byte ||
Kind == X86::reloc_riprel_4byte_movq_load;
@@ -64,6 +115,10 @@ namespace {
// Support lexicographic sorting.
bool operator<(const ELFSymbolData &RHS) const {
+ if (GetType(*SymbolData) == ELF::STT_FILE)
+ return true;
+ if (GetType(*RHS.SymbolData) == ELF::STT_FILE)
+ return false;
return SymbolData->getSymbol().getName() <
RHS.SymbolData->getSymbol().getName();
}
@@ -75,15 +130,33 @@ namespace {
struct ELFRelocationEntry {
// Make these big enough for both 32-bit and 64-bit
uint64_t r_offset;
- uint64_t r_info;
+ int Index;
+ unsigned Type;
+ const MCSymbol *Symbol;
uint64_t r_addend;
+ ELFRelocationEntry()
+ : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {}
+
+ ELFRelocationEntry(uint64_t RelocOffset, int Idx,
+ unsigned RelType, const MCSymbol *Sym,
+ uint64_t Addend)
+ : r_offset(RelocOffset), Index(Idx), Type(RelType),
+ Symbol(Sym), r_addend(Addend) {}
+
// Support lexicographic sorting.
bool operator<(const ELFRelocationEntry &RE) const {
return RE.r_offset < r_offset;
}
};
+ /// The target specific ELF writer instance.
+ llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter;
+
+ SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
+ SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
+ DenseMap<const MCSymbol *, const MCSymbol *> Renames;
+
llvm::DenseMap<const MCSectionData*,
std::vector<ELFRelocationEntry> > Relocations;
DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
@@ -99,49 +172,52 @@ namespace {
/// @}
- ELFObjectWriter *Writer;
-
- raw_ostream &OS;
-
- // This holds the current offset into the object file.
- size_t FileOff;
-
- unsigned Is64Bit : 1;
+ bool NeedsGOT;
- bool HasRelocationAddend;
+ bool NeedsSymtabShndx;
// This holds the symbol table index of the last local symbol.
unsigned LastLocalSymbolIndex;
// This holds the .strtab section index.
unsigned StringTableIndex;
+ // This holds the .symtab section index.
+ unsigned SymbolTableIndex;
unsigned ShstrtabIndex;
+
+ const MCSymbol *SymbolToReloc(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F) const;
+
+ // For arch-specific emission of explicit reloc symbol
+ virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ bool IsBSS) const {
+ return NULL;
+ }
+
+ bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+ bool hasRelocationAddend() const {
+ return TargetObjectWriter->hasRelocationAddend();
+ }
+
public:
- ELFObjectWriterImpl(ELFObjectWriter *_Writer, bool _Is64Bit,
- bool _HasRelAddend)
- : Writer(_Writer), OS(Writer->getStream()), FileOff(0),
- Is64Bit(_Is64Bit), HasRelocationAddend(_HasRelAddend) {
+ ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS, bool IsLittleEndian)
+ : MCObjectWriter(_OS, IsLittleEndian),
+ TargetObjectWriter(MOTW),
+ NeedsGOT(false), NeedsSymtabShndx(false){
}
- void Write8(uint8_t Value) { Writer->Write8(Value); }
- void Write16(uint16_t Value) { Writer->Write16(Value); }
- void Write32(uint32_t Value) { Writer->Write32(Value); }
- //void Write64(uint64_t Value) { Writer->Write64(Value); }
- void WriteZeros(unsigned N) { Writer->WriteZeros(N); }
- //void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
- // Writer->WriteBytes(Str, ZeroFillSize);
- //}
+ virtual ~ELFObjectWriter();
void WriteWord(uint64_t W) {
- if (Is64Bit)
- Writer->Write64(W);
+ if (is64Bit())
+ Write64(W);
else
- Writer->Write32(W);
- }
-
- void String8(char *buf, uint8_t Value) {
- buf[0] = Value;
+ Write32(W);
}
void StringLE16(char *buf, uint16_t Value) {
@@ -174,86 +250,191 @@ namespace {
StringBE32(buf + 4, uint32_t(Value >> 0));
}
- void String16(char *buf, uint16_t Value) {
- if (Writer->isLittleEndian())
+ void String8(MCDataFragment &F, uint8_t Value) {
+ char buf[1];
+ buf[0] = Value;
+ F.getContents() += StringRef(buf, 1);
+ }
+
+ void String16(MCDataFragment &F, uint16_t Value) {
+ char buf[2];
+ if (isLittleEndian())
StringLE16(buf, Value);
else
StringBE16(buf, Value);
+ F.getContents() += StringRef(buf, 2);
}
- void String32(char *buf, uint32_t Value) {
- if (Writer->isLittleEndian())
+ void String32(MCDataFragment &F, uint32_t Value) {
+ char buf[4];
+ if (isLittleEndian())
StringLE32(buf, Value);
else
StringBE32(buf, Value);
+ F.getContents() += StringRef(buf, 4);
}
- void String64(char *buf, uint64_t Value) {
- if (Writer->isLittleEndian())
+ void String64(MCDataFragment &F, uint64_t Value) {
+ char buf[8];
+ if (isLittleEndian())
StringLE64(buf, Value);
else
StringBE64(buf, Value);
+ F.getContents() += StringRef(buf, 8);
}
- void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
+ virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
+
+ /// Default e_flags = 0
+ virtual void WriteEFlags() { Write32(0); }
- void WriteSymbolEntry(MCDataFragment *F, uint64_t name, uint8_t info,
+ virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+ uint64_t name, uint8_t info,
uint64_t value, uint64_t size,
- uint8_t other, uint16_t shndx);
+ uint8_t other, uint32_t shndx,
+ bool Reserved);
- void WriteSymbol(MCDataFragment *F, ELFSymbolData &MSD,
+ virtual void WriteSymbol(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+ ELFSymbolData &MSD,
const MCAsmLayout &Layout);
- void WriteSymbolTable(MCDataFragment *F, const MCAssembler &Asm,
- const MCAsmLayout &Layout);
+ typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy;
+ virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap);
- void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue);
+ virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
- uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+ virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
const MCSymbol *S);
+ // Map from a group section to the signature symbol
+ typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
+ // Map from a signature symbol to the group section
+ typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
+
/// ComputeSymbolTable - Compute the symbol table data
///
/// \param StringTable [out] - The string table data.
/// \param StringIndexMap [out] - Map from symbol names to offsets in the
/// string table.
- void ComputeSymbolTable(MCAssembler &Asm);
+ virtual void ComputeSymbolTable(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ RevGroupMapTy RevGroupMap);
- void WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
+ virtual void ComputeIndexMap(MCAssembler &Asm,
+ SectionIndexMapTy &SectionIndexMap);
+
+ virtual void WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
const MCSectionData &SD);
- void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) {
+ virtual void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) {
for (MCAssembler::const_iterator it = Asm.begin(),
ie = Asm.end(); it != ie; ++it) {
WriteRelocation(Asm, Layout, *it);
}
}
- void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout);
+ virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap);
- void ExecutePostLayoutBinding(MCAssembler &Asm) {
- // Compute symbol table information.
- ComputeSymbolTable(Asm);
- }
+ // Create the sections that show up in the symbol table. Currently
+ // those are the .note.GNU-stack section and the group sections.
+ virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
+ GroupMapTy &GroupMap,
+ RevGroupMapTy &RevGroupMap);
- void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
+ virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout);
+
+ virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
uint64_t Address, uint64_t Offset,
uint64_t Size, uint32_t Link, uint32_t Info,
uint64_t Alignment, uint64_t EntrySize);
- void WriteRelocationsFragment(const MCAssembler &Asm, MCDataFragment *F,
- const MCSectionData *SD);
+ virtual void WriteRelocationsFragment(const MCAssembler &Asm,
+ MCDataFragment *F,
+ const MCSectionData *SD);
+
+ virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+ virtual void WriteSection(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ uint32_t GroupSymbolIndex,
+ uint64_t Offset, uint64_t Size, uint64_t Alignment,
+ const MCSectionELF &Section);
+
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) = 0;
+ };
+
+ //===- X86ELFObjectWriter -------------------------------------------===//
+
+ class X86ELFObjectWriter : public ELFObjectWriter {
+ public:
+ X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian);
+
+ virtual ~X86ELFObjectWriter();
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend);
+ };
+
+
+ //===- ARMELFObjectWriter -------------------------------------------===//
+
+ class ARMELFObjectWriter : public ELFObjectWriter {
+ public:
+ // FIXME: MCAssembler can't yet return the Subtarget,
+ enum { DefaultEABIVersion = 0x05000000U };
+
+ ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian);
- void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
+ virtual ~ARMELFObjectWriter();
+
+ virtual void WriteEFlags();
+ protected:
+ virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ bool IsBSS) const;
+
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend);
};
+ //===- MBlazeELFObjectWriter -------------------------------------------===//
+
+ class MBlazeELFObjectWriter : public ELFObjectWriter {
+ public:
+ MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian);
+
+ virtual ~MBlazeELFObjectWriter();
+ protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend);
+ };
}
+ELFObjectWriter::~ELFObjectWriter()
+{}
+
// Emit the ELF header.
-void ELFObjectWriterImpl::WriteHeader(uint64_t SectionDataSize,
- unsigned NumberOfSections) {
+void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize,
+ unsigned NumberOfSections) {
// ELF Header
// ----------
//
@@ -267,140 +448,193 @@ void ELFObjectWriterImpl::WriteHeader(uint64_t SectionDataSize,
Write8('L'); // e_ident[EI_MAG2]
Write8('F'); // e_ident[EI_MAG3]
- Write8(Is64Bit ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
+ Write8(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
// e_ident[EI_DATA]
- Write8(Writer->isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB);
+ Write8(isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB);
Write8(ELF::EV_CURRENT); // e_ident[EI_VERSION]
- Write8(ELF::ELFOSABI_LINUX); // e_ident[EI_OSABI]
+ // e_ident[EI_OSABI]
+ switch (TargetObjectWriter->getOSType()) {
+ case Triple::FreeBSD: Write8(ELF::ELFOSABI_FREEBSD); break;
+ case Triple::Linux: Write8(ELF::ELFOSABI_LINUX); break;
+ default: Write8(ELF::ELFOSABI_NONE); break;
+ }
Write8(0); // e_ident[EI_ABIVERSION]
WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD);
Write16(ELF::ET_REL); // e_type
- // FIXME: Make this configurable
- Write16(Is64Bit ? ELF::EM_X86_64 : ELF::EM_386); // e_machine = target
+ Write16(TargetObjectWriter->getEMachine()); // e_machine = target
Write32(ELF::EV_CURRENT); // e_version
WriteWord(0); // e_entry, no entry point in .o file
WriteWord(0); // e_phoff, no program header for .o
- WriteWord(SectionDataSize + (Is64Bit ? sizeof(ELF::Elf64_Ehdr) :
+ WriteWord(SectionDataSize + (is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
sizeof(ELF::Elf32_Ehdr))); // e_shoff = sec hdr table off in bytes
- // FIXME: Make this configurable.
- Write32(0); // e_flags = whatever the target wants
+ // e_flags = whatever the target wants
+ WriteEFlags();
// e_ehsize = ELF header size
- Write16(Is64Bit ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
+ Write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
Write16(0); // e_phentsize = prog header entry size
Write16(0); // e_phnum = # prog header entries = 0
// e_shentsize = Section header entry size
- Write16(Is64Bit ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
+ Write16(is64Bit() ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
// e_shnum = # of section header ents
- Write16(NumberOfSections);
+ if (NumberOfSections >= ELF::SHN_LORESERVE)
+ Write16(0);
+ else
+ Write16(NumberOfSections);
// e_shstrndx = Section # of '.shstrtab'
- Write16(ShstrtabIndex);
+ if (NumberOfSections >= ELF::SHN_LORESERVE)
+ Write16(ELF::SHN_XINDEX);
+ else
+ Write16(ShstrtabIndex);
}
-void ELFObjectWriterImpl::WriteSymbolEntry(MCDataFragment *F, uint64_t name,
- uint8_t info, uint64_t value,
- uint64_t size, uint8_t other,
- uint16_t shndx) {
- if (Is64Bit) {
- char buf[8];
+void ELFObjectWriter::WriteSymbolEntry(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ uint64_t name,
+ uint8_t info, uint64_t value,
+ uint64_t size, uint8_t other,
+ uint32_t shndx,
+ bool Reserved) {
+ if (ShndxF) {
+ if (shndx >= ELF::SHN_LORESERVE && !Reserved)
+ String32(*ShndxF, shndx);
+ else
+ String32(*ShndxF, 0);
+ }
- String32(buf, name);
- F->getContents() += StringRef(buf, 4); // st_name
+ uint16_t Index = (shndx >= ELF::SHN_LORESERVE && !Reserved) ?
+ uint16_t(ELF::SHN_XINDEX) : shndx;
- String8(buf, info);
- F->getContents() += StringRef(buf, 1); // st_info
+ if (is64Bit()) {
+ String32(*SymtabF, name); // st_name
+ String8(*SymtabF, info); // st_info
+ String8(*SymtabF, other); // st_other
+ String16(*SymtabF, Index); // st_shndx
+ String64(*SymtabF, value); // st_value
+ String64(*SymtabF, size); // st_size
+ } else {
+ String32(*SymtabF, name); // st_name
+ String32(*SymtabF, value); // st_value
+ String32(*SymtabF, size); // st_size
+ String8(*SymtabF, info); // st_info
+ String8(*SymtabF, other); // st_other
+ String16(*SymtabF, Index); // st_shndx
+ }
+}
- String8(buf, other);
- F->getContents() += StringRef(buf, 1); // st_other
+static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout) {
+ if (Data.isCommon() && Data.isExternal())
+ return Data.getCommonAlignment();
- String16(buf, shndx);
- F->getContents() += StringRef(buf, 2); // st_shndx
+ const MCSymbol &Symbol = Data.getSymbol();
- String64(buf, value);
- F->getContents() += StringRef(buf, 8); // st_value
+ if (Symbol.isAbsolute() && Symbol.isVariable()) {
+ if (const MCExpr *Value = Symbol.getVariableValue()) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue, Layout))
+ return (uint64_t)IntValue;
+ }
+ }
- String64(buf, size);
- F->getContents() += StringRef(buf, 8); // st_size
- } else {
- char buf[4];
+ if (!Symbol.isInSection())
+ return 0;
+
+ if (Data.getFragment())
+ return Layout.getSymbolOffset(&Data);
+
+ return 0;
+}
- String32(buf, name);
- F->getContents() += StringRef(buf, 4); // st_name
+void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ // The presence of symbol versions causes undefined symbols and
+ // versions declared with @@@ to be renamed.
- String32(buf, value);
- F->getContents() += StringRef(buf, 4); // st_value
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Alias = it->getSymbol();
+ const MCSymbol &Symbol = Alias.AliasedSymbol();
+ MCSymbolData &SD = Asm.getSymbolData(Symbol);
+
+ // Not an alias.
+ if (&Symbol == &Alias)
+ continue;
+
+ StringRef AliasName = Alias.getName();
+ size_t Pos = AliasName.find('@');
+ if (Pos == StringRef::npos)
+ continue;
- String32(buf, size);
- F->getContents() += StringRef(buf, 4); // st_size
+ // Aliases defined with .symvar copy the binding from the symbol they alias.
+ // This is the first place we are able to copy this information.
+ it->setExternal(SD.isExternal());
+ SetBinding(*it, GetBinding(SD));
- String8(buf, info);
- F->getContents() += StringRef(buf, 1); // st_info
+ StringRef Rest = AliasName.substr(Pos);
+ if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
+ continue;
- String8(buf, other);
- F->getContents() += StringRef(buf, 1); // st_other
+ // FIXME: produce a better error message.
+ if (Symbol.isUndefined() && Rest.startswith("@@") &&
+ !Rest.startswith("@@@"))
+ report_fatal_error("A @@ version cannot be undefined");
- String16(buf, shndx);
- F->getContents() += StringRef(buf, 2); // st_shndx
+ Renames.insert(std::make_pair(&Symbol, &Alias));
}
}
-void ELFObjectWriterImpl::WriteSymbol(MCDataFragment *F, ELFSymbolData &MSD,
- const MCAsmLayout &Layout) {
- MCSymbolData &Data = *MSD.SymbolData;
- uint8_t Info = (Data.getFlags() & 0xff);
- uint8_t Other = ((Data.getFlags() & 0xf00) >> ELF_STV_Shift);
- uint64_t Value = 0;
+void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ ELFSymbolData &MSD,
+ const MCAsmLayout &Layout) {
+ MCSymbolData &OrigData = *MSD.SymbolData;
+ MCSymbolData &Data =
+ Layout.getAssembler().getSymbolData(OrigData.getSymbol().AliasedSymbol());
+
+ bool IsReserved = Data.isCommon() || Data.getSymbol().isAbsolute() ||
+ Data.getSymbol().isVariable();
+
+ uint8_t Binding = GetBinding(OrigData);
+ uint8_t Visibility = GetVisibility(OrigData);
+ uint8_t Type = GetType(Data);
+
+ uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift);
+ uint8_t Other = Visibility;
+
+ uint64_t Value = SymbolValue(Data, Layout);
uint64_t Size = 0;
- const MCExpr *ESize;
- if (Data.isCommon() && Data.isExternal())
- Value = Data.getCommonAlignment();
-
- if (!Data.isCommon())
- if (MCFragment *FF = Data.getFragment())
- Value = Layout.getSymbolAddress(&Data) -
- Layout.getSectionAddress(FF->getParent());
-
- ESize = Data.getSize();
- if (Data.getSize()) {
- MCValue Res;
- if (ESize->getKind() == MCExpr::Binary) {
- const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(ESize);
-
- if (BE->EvaluateAsRelocatable(Res, &Layout)) {
- MCSymbolData &A =
- Layout.getAssembler().getSymbolData(Res.getSymA()->getSymbol());
- MCSymbolData &B =
- Layout.getAssembler().getSymbolData(Res.getSymB()->getSymbol());
-
- Size = Layout.getSymbolAddress(&A) - Layout.getSymbolAddress(&B);
- }
- } else if (ESize->getKind() == MCExpr::Constant) {
- Size = static_cast<const MCConstantExpr *>(ESize)->getValue();
- } else {
- assert(0 && "Unsupported size expression");
- }
+ assert(!(Data.isCommon() && !Data.isExternal()));
+
+ const MCExpr *ESize = Data.getSize();
+ if (ESize) {
+ int64_t Res;
+ if (!ESize->EvaluateAsAbsolute(Res, Layout))
+ report_fatal_error("Size expression must be absolute.");
+ Size = Res;
}
// Write out the symbol table entry
- WriteSymbolEntry(F, MSD.StringIndex, Info, Value,
- Size, Other, MSD.SectionIndex);
+ WriteSymbolEntry(SymtabF, ShndxF, MSD.StringIndex, Info, Value,
+ Size, Other, MSD.SectionIndex, IsReserved);
}
-void ELFObjectWriterImpl::WriteSymbolTable(MCDataFragment *F,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout) {
+void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
+ MCDataFragment *ShndxF,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap) {
// The string table must be emitted first because we need the index
// into the string table for all the symbol names.
assert(StringTable.size() && "Missing string table");
@@ -408,258 +642,343 @@ void ELFObjectWriterImpl::WriteSymbolTable(MCDataFragment *F,
// FIXME: Make sure the start of the symbol table is aligned.
// The first entry is the undefined symbol entry.
- unsigned EntrySize = Is64Bit ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
- F->getContents().append(EntrySize, '\x00');
+ WriteSymbolEntry(SymtabF, ShndxF, 0, 0, 0, 0, 0, 0, false);
// Write the symbol table entries.
LastLocalSymbolIndex = LocalSymbolData.size() + 1;
for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) {
ELFSymbolData &MSD = LocalSymbolData[i];
- WriteSymbol(F, MSD, Layout);
+ WriteSymbol(SymtabF, ShndxF, MSD, Layout);
}
- // Write out a symbol table entry for each section.
- // leaving out the just added .symtab which is at
- // the very end
- unsigned Index = 1;
- for (MCAssembler::const_iterator it = Asm.begin(),
- ie = Asm.end(); it != ie; ++it, ++Index) {
+ // Write out a symbol table entry for each regular section.
+ for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e;
+ ++i) {
const MCSectionELF &Section =
- static_cast<const MCSectionELF&>(it->getSection());
- // Leave out relocations so we don't have indexes within
- // the relocations messed up
- if (Section.getType() == ELF::SHT_RELA || Section.getType() == ELF::SHT_REL)
- continue;
- if (Index == Asm.size())
+ static_cast<const MCSectionELF&>(i->getSection());
+ if (Section.getType() == ELF::SHT_RELA ||
+ Section.getType() == ELF::SHT_REL ||
+ Section.getType() == ELF::SHT_STRTAB ||
+ Section.getType() == ELF::SHT_SYMTAB)
continue;
- WriteSymbolEntry(F, 0, ELF::STT_SECTION, 0, 0, ELF::STV_DEFAULT, Index);
+ WriteSymbolEntry(SymtabF, ShndxF, 0, ELF::STT_SECTION, 0, 0,
+ ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section), false);
LastLocalSymbolIndex++;
}
for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) {
ELFSymbolData &MSD = ExternalSymbolData[i];
MCSymbolData &Data = *MSD.SymbolData;
- assert((Data.getFlags() & ELF_STB_Global) &&
- "External symbol requires STB_GLOBAL flag");
- WriteSymbol(F, MSD, Layout);
- if (Data.getFlags() & ELF_STB_Local)
+ assert(((Data.getFlags() & ELF_STB_Global) ||
+ (Data.getFlags() & ELF_STB_Weak)) &&
+ "External symbol requires STB_GLOBAL or STB_WEAK flag");
+ WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+ if (GetBinding(Data) == ELF::STB_LOCAL)
LastLocalSymbolIndex++;
}
for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) {
ELFSymbolData &MSD = UndefinedSymbolData[i];
MCSymbolData &Data = *MSD.SymbolData;
- Data.setFlags(Data.getFlags() | ELF_STB_Global);
- WriteSymbol(F, MSD, Layout);
- if (Data.getFlags() & ELF_STB_Local)
+ WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+ if (GetBinding(Data) == ELF::STB_LOCAL)
LastLocalSymbolIndex++;
}
}
-// FIXME: this is currently X86/X86_64 only
-void ELFObjectWriterImpl::RecordRelocation(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup,
- MCValue Target,
- uint64_t &FixedValue) {
+const MCSymbol *ELFObjectWriter::SymbolToReloc(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F) const {
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+ const MCSymbol &ASymbol = Symbol.AliasedSymbol();
+ const MCSymbol *Renamed = Renames.lookup(&Symbol);
+ const MCSymbolData &SD = Asm.getSymbolData(Symbol);
+
+ if (ASymbol.isUndefined()) {
+ if (Renamed)
+ return Renamed;
+ return &ASymbol;
+ }
+
+ if (SD.isExternal()) {
+ if (Renamed)
+ return Renamed;
+ return &Symbol;
+ }
+
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(ASymbol.getSection());
+ const SectionKind secKind = Section.getKind();
+
+ if (secKind.isBSS())
+ return ExplicitRelSym(Asm, Target, F, true);
+
+ if (secKind.isThreadLocal()) {
+ if (Renamed)
+ return Renamed;
+ return &Symbol;
+ }
+
+ MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
+ const MCSectionELF &Sec2 =
+ static_cast<const MCSectionELF&>(F.getParent()->getSection());
+
+ if (&Sec2 != &Section &&
+ (Kind == MCSymbolRefExpr::VK_PLT ||
+ Kind == MCSymbolRefExpr::VK_GOTPCREL ||
+ Kind == MCSymbolRefExpr::VK_GOTOFF)) {
+ if (Renamed)
+ return Renamed;
+ return &Symbol;
+ }
+
+ if (Section.getFlags() & ELF::SHF_MERGE) {
+ if (Target.getConstant() == 0)
+ return NULL;
+ if (Renamed)
+ return Renamed;
+ return &Symbol;
+ }
+
+ return ExplicitRelSym(Asm, Target, F, false);
+}
+
+
+void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
int64_t Addend = 0;
- unsigned Index = 0;
+ int Index = 0;
int64_t Value = Target.getConstant();
+ const MCSymbol *RelocSymbol = NULL;
+ bool IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
if (!Target.isAbsolute()) {
- const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
- MCSymbolData &SD = Asm.getSymbolData(*Symbol);
- const MCSymbolData *Base = Asm.getAtom(Layout, &SD);
- MCFragment *F = SD.getFragment();
-
- if (Base) {
- if (F && (!Symbol->isInSection() || SD.isCommon()) && !SD.isExternal()) {
- Index = F->getParent()->getOrdinal() + LocalSymbolData.size() + 1;
- Value += Layout.getSymbolAddress(&SD);
- } else
- Index = getSymbolIndexInSymbolTable(Asm, Symbol);
- if (Base != &SD)
- Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base);
- Addend = Value;
- // Compensate for the addend on i386.
- if (Is64Bit)
- Value = 0;
- } else {
- if (F) {
- // Index of the section in .symtab against this symbol
- // is being relocated + 2 (empty section + abs. symbols).
- Index = F->getParent()->getOrdinal() + LocalSymbolData.size() + 1;
-
- MCSectionData *FSD = F->getParent();
- // Offset of the symbol in the section
- Addend = Layout.getSymbolAddress(&SD) - Layout.getSectionAddress(FSD);
- } else {
- FixedValue = Value;
- return;
- }
- }
- }
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+ const MCSymbol &ASymbol = Symbol.AliasedSymbol();
+ RelocSymbol = SymbolToReloc(Asm, Target, *Fragment);
- FixedValue = Value;
+ if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
+ const MCSymbol &SymbolB = RefB->getSymbol();
+ MCSymbolData &SDB = Asm.getSymbolData(SymbolB);
+ IsPCRel = true;
- // determine the type of the relocation
- bool IsPCRel = isFixupKindX86PCRel(Fixup.getKind());
- unsigned Type;
- if (Is64Bit) {
- if (IsPCRel) {
- Type = ELF::R_X86_64_PC32;
- } else {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
- case FK_Data_8: Type = ELF::R_X86_64_64; break;
- case X86::reloc_pcrel_4byte:
- case FK_Data_4:
- // check that the offset fits within a signed long
- if (isInt<32>(Target.getConstant()))
- Type = ELF::R_X86_64_32S;
- else
- Type = ELF::R_X86_64_32;
- break;
- case FK_Data_2: Type = ELF::R_X86_64_16; break;
- case X86::reloc_pcrel_1byte:
- case FK_Data_1: Type = ELF::R_X86_64_8; break;
- }
+ // Offset of the symbol in the section
+ int64_t a = Layout.getSymbolOffset(&SDB);
+
+ // Ofeset of the relocation in the section
+ int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ Value += b - a;
}
- } else {
- if (IsPCRel) {
- Type = ELF::R_386_PC32;
+
+ if (!RelocSymbol) {
+ MCSymbolData &SD = Asm.getSymbolData(ASymbol);
+ MCFragment *F = SD.getFragment();
+
+ Index = F->getParent()->getOrdinal() + 1;
+
+ // Offset of the symbol in the section
+ Value += Layout.getSymbolOffset(&SD);
} else {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
- case X86::reloc_pcrel_4byte:
- case FK_Data_4: Type = ELF::R_386_32; break;
- case FK_Data_2: Type = ELF::R_386_16; break;
- case X86::reloc_pcrel_1byte:
- case FK_Data_1: Type = ELF::R_386_8; break;
- }
+ if (Asm.getSymbolData(Symbol).getFlags() & ELF_Other_Weakref)
+ WeakrefUsedInReloc.insert(RelocSymbol);
+ else
+ UsedInReloc.insert(RelocSymbol);
+ Index = -1;
}
+ Addend = Value;
+ // Compensate for the addend on i386.
+ if (is64Bit())
+ Value = 0;
}
- ELFRelocationEntry ERE;
+ FixedValue = Value;
+ unsigned Type = GetRelocType(Target, Fixup, IsPCRel,
+ (RelocSymbol != 0), Addend);
+
+ uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) +
+ Fixup.getOffset();
+
+ if (!hasRelocationAddend())
+ Addend = 0;
+ ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend);
+ Relocations[Fragment->getParent()].push_back(ERE);
+}
- if (Is64Bit) {
- struct ELF::Elf64_Rela ERE64;
- ERE64.setSymbolAndType(Index, Type);
- ERE.r_info = ERE64.r_info;
- } else {
- struct ELF::Elf32_Rela ERE32;
- ERE32.setSymbolAndType(Index, Type);
- ERE.r_info = ERE32.r_info;
- }
- ERE.r_offset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+uint64_t
+ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+ const MCSymbol *S) {
+ MCSymbolData &SD = Asm.getSymbolData(*S);
+ return SD.getIndex();
+}
- if (HasRelocationAddend)
- ERE.r_addend = Addend;
- else
- ERE.r_addend = 0; // Silence compiler warning.
+static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
+ bool Used, bool Renamed) {
+ if (Data.getFlags() & ELF_Other_Weakref)
+ return false;
- Relocations[Fragment->getParent()].push_back(ERE);
+ if (Used)
+ return true;
+
+ if (Renamed)
+ return false;
+
+ const MCSymbol &Symbol = Data.getSymbol();
+
+ if (Symbol.getName() == "_GLOBAL_OFFSET_TABLE_")
+ return true;
+
+ const MCSymbol &A = Symbol.AliasedSymbol();
+ if (!A.isVariable() && A.isUndefined() && !Data.isCommon())
+ return false;
+
+ if (!Asm.isSymbolLinkerVisible(Symbol) && !Symbol.isUndefined())
+ return false;
+
+ if (Symbol.isTemporary())
+ return false;
+
+ return true;
}
-uint64_t
-ELFObjectWriterImpl::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
- const MCSymbol *S) {
- MCSymbolData &SD = Asm.getSymbolData(*S);
+static bool isLocal(const MCSymbolData &Data, bool isSignature,
+ bool isUsedInReloc) {
+ if (Data.isExternal())
+ return false;
+
+ const MCSymbol &Symbol = Data.getSymbol();
+ const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
- // Local symbol.
- if (!SD.isExternal() && !S->isUndefined())
- return SD.getIndex() + /* empty symbol */ 1;
+ if (RefSymbol.isUndefined() && !RefSymbol.isVariable()) {
+ if (isSignature && !isUsedInReloc)
+ return true;
- // External or undefined symbol.
- return SD.getIndex() + Asm.size() + /* empty symbol */ 1;
+ return false;
+ }
+
+ return true;
}
-void ELFObjectWriterImpl::ComputeSymbolTable(MCAssembler &Asm) {
- // Build section lookup table.
- DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm,
+ SectionIndexMapTy &SectionIndexMap) {
unsigned Index = 1;
for (MCAssembler::iterator it = Asm.begin(),
- ie = Asm.end(); it != ie; ++it, ++Index)
- SectionIndexMap[&it->getSection()] = Index;
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(it->getSection());
+ if (Section.getType() != ELF::SHT_GROUP)
+ continue;
+ SectionIndexMap[&Section] = Index++;
+ }
+
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(it->getSection());
+ if (Section.getType() == ELF::SHT_GROUP)
+ continue;
+ SectionIndexMap[&Section] = Index++;
+ }
+}
+
+void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ RevGroupMapTy RevGroupMap) {
+ // FIXME: Is this the correct place to do this?
+ if (NeedsGOT) {
+ llvm::StringRef Name = "_GLOBAL_OFFSET_TABLE_";
+ MCSymbol *Sym = Asm.getContext().GetOrCreateSymbol(Name);
+ MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym);
+ Data.setExternal(true);
+ SetBinding(Data, ELF::STB_GLOBAL);
+ }
+
+ // Build section lookup table.
+ int NumRegularSections = Asm.size();
// Index 0 is always the empty string.
StringMap<uint64_t> StringIndexMap;
StringTable += '\x00';
- // Add the data for local symbols.
+ // Add the data for the symbols.
for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
ie = Asm.symbol_end(); it != ie; ++it) {
const MCSymbol &Symbol = it->getSymbol();
- // Ignore non-linker visible symbols.
- if (!Asm.isSymbolLinkerVisible(Symbol))
- continue;
+ bool Used = UsedInReloc.count(&Symbol);
+ bool WeakrefUsed = WeakrefUsedInReloc.count(&Symbol);
+ bool isSignature = RevGroupMap.count(&Symbol);
- if (it->isExternal() || Symbol.isUndefined())
+ if (!isInSymtab(Asm, *it,
+ Used || WeakrefUsed || isSignature,
+ Renames.count(&Symbol)))
continue;
- uint64_t &Entry = StringIndexMap[Symbol.getName()];
- if (!Entry) {
- Entry = StringTable.size();
- StringTable += Symbol.getName();
- StringTable += '\x00';
- }
-
ELFSymbolData MSD;
MSD.SymbolData = it;
- MSD.StringIndex = Entry;
+ const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
+
+ // Undefined symbols are global, but this is the first place we
+ // are able to set it.
+ bool Local = isLocal(*it, isSignature, Used);
+ if (!Local && GetBinding(*it) == ELF::STB_LOCAL) {
+ MCSymbolData &SD = Asm.getSymbolData(RefSymbol);
+ SetBinding(*it, ELF::STB_GLOBAL);
+ SetBinding(SD, ELF::STB_GLOBAL);
+ }
+
+ if (RefSymbol.isUndefined() && !Used && WeakrefUsed)
+ SetBinding(*it, ELF::STB_WEAK);
- if (Symbol.isAbsolute()) {
+ if (it->isCommon()) {
+ assert(!Local);
+ MSD.SectionIndex = ELF::SHN_COMMON;
+ } else if (Symbol.isAbsolute() || RefSymbol.isVariable()) {
MSD.SectionIndex = ELF::SHN_ABS;
- LocalSymbolData.push_back(MSD);
+ } else if (RefSymbol.isUndefined()) {
+ if (isSignature && !Used)
+ MSD.SectionIndex = SectionIndexMap.lookup(RevGroupMap[&Symbol]);
+ else
+ MSD.SectionIndex = ELF::SHN_UNDEF;
} else {
- MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(RefSymbol.getSection());
+ MSD.SectionIndex = SectionIndexMap.lookup(&Section);
+ if (MSD.SectionIndex >= ELF::SHN_LORESERVE)
+ NeedsSymtabShndx = true;
assert(MSD.SectionIndex && "Invalid section index!");
- LocalSymbolData.push_back(MSD);
}
- }
-
- // Now add non-local symbols.
- for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
- ie = Asm.symbol_end(); it != ie; ++it) {
- const MCSymbol &Symbol = it->getSymbol();
- // Ignore non-linker visible symbols.
- if (!Asm.isSymbolLinkerVisible(Symbol))
- continue;
-
- if (!it->isExternal() && !Symbol.isUndefined())
- continue;
+ // The @@@ in symbol version is replaced with @ in undefined symbols and
+ // @@ in defined ones.
+ StringRef Name = Symbol.getName();
+ SmallString<32> Buf;
+
+ size_t Pos = Name.find("@@@");
+ if (Pos != StringRef::npos) {
+ Buf += Name.substr(0, Pos);
+ unsigned Skip = MSD.SectionIndex == ELF::SHN_UNDEF ? 2 : 1;
+ Buf += Name.substr(Pos + Skip);
+ Name = Buf;
+ }
- uint64_t &Entry = StringIndexMap[Symbol.getName()];
+ uint64_t &Entry = StringIndexMap[Name];
if (!Entry) {
Entry = StringTable.size();
- StringTable += Symbol.getName();
+ StringTable += Name;
StringTable += '\x00';
}
-
- ELFSymbolData MSD;
- MSD.SymbolData = it;
MSD.StringIndex = Entry;
-
- if (Symbol.isUndefined()) {
- MSD.SectionIndex = ELF::SHN_UNDEF;
- // XXX: for some reason we dont Emit* this
- it->setFlags(it->getFlags() | ELF_STB_Global);
+ if (MSD.SectionIndex == ELF::SHN_UNDEF)
UndefinedSymbolData.push_back(MSD);
- } else if (Symbol.isAbsolute()) {
- MSD.SectionIndex = ELF::SHN_ABS;
- ExternalSymbolData.push_back(MSD);
- } else if (it->isCommon()) {
- MSD.SectionIndex = ELF::SHN_COMMON;
- ExternalSymbolData.push_back(MSD);
- } else {
- MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
- assert(MSD.SectionIndex && "Invalid section index!");
+ else if (Local)
+ LocalSymbolData.push_back(MSD);
+ else
ExternalSymbolData.push_back(MSD);
- }
}
// Symbols are required to be in lexicographic order.
@@ -669,55 +988,56 @@ void ELFObjectWriterImpl::ComputeSymbolTable(MCAssembler &Asm) {
// Set the symbol indices. Local symbols must come before all other
// symbols with non-local bindings.
- Index = 0;
+ unsigned Index = 1;
for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
LocalSymbolData[i].SymbolData->setIndex(Index++);
+
+ Index += NumRegularSections;
+
for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
ExternalSymbolData[i].SymbolData->setIndex(Index++);
for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
UndefinedSymbolData[i].SymbolData->setIndex(Index++);
}
-void ELFObjectWriterImpl::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
- const MCSectionData &SD) {
+void ELFObjectWriter::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
+ const MCSectionData &SD) {
if (!Relocations[&SD].empty()) {
MCContext &Ctx = Asm.getContext();
- const MCSection *RelaSection;
+ const MCSectionELF *RelaSection;
const MCSectionELF &Section =
static_cast<const MCSectionELF&>(SD.getSection());
const StringRef SectionName = Section.getSectionName();
- std::string RelaSectionName = HasRelocationAddend ? ".rela" : ".rel";
+ std::string RelaSectionName = hasRelocationAddend() ? ".rela" : ".rel";
RelaSectionName += SectionName;
unsigned EntrySize;
- if (HasRelocationAddend)
- EntrySize = Is64Bit ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela);
+ if (hasRelocationAddend())
+ EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela);
else
- EntrySize = Is64Bit ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
+ EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
- RelaSection = Ctx.getELFSection(RelaSectionName, HasRelocationAddend ?
+ RelaSection = Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ?
ELF::SHT_RELA : ELF::SHT_REL, 0,
SectionKind::getReadOnly(),
- false, EntrySize);
+ EntrySize, "");
MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection);
- RelaSD.setAlignment(1);
+ RelaSD.setAlignment(is64Bit() ? 8 : 4);
MCDataFragment *F = new MCDataFragment(&RelaSD);
WriteRelocationsFragment(Asm, F, &SD);
-
- Asm.AddSectionToTheEnd(RelaSD, Layout);
}
}
-void ELFObjectWriterImpl::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
- uint64_t Flags, uint64_t Address,
- uint64_t Offset, uint64_t Size,
- uint32_t Link, uint32_t Info,
- uint64_t Alignment,
- uint64_t EntrySize) {
+void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
+ uint64_t Flags, uint64_t Address,
+ uint64_t Offset, uint64_t Size,
+ uint32_t Link, uint32_t Info,
+ uint64_t Alignment,
+ uint64_t EntrySize) {
Write32(Name); // sh_name: index into string table
Write32(Type); // sh_type
WriteWord(Flags); // sh_flags
@@ -730,9 +1050,9 @@ void ELFObjectWriterImpl::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
WriteWord(EntrySize); // sh_entsize
}
-void ELFObjectWriterImpl::WriteRelocationsFragment(const MCAssembler &Asm,
- MCDataFragment *F,
- const MCSectionData *SD) {
+void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
+ MCDataFragment *F,
+ const MCSectionData *SD) {
std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
// sort by the r_offset just like gnu as does
array_pod_sort(Relocs.begin(), Relocs.end());
@@ -740,67 +1060,90 @@ void ELFObjectWriterImpl::WriteRelocationsFragment(const MCAssembler &Asm,
for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
ELFRelocationEntry entry = Relocs[e - i - 1];
- unsigned WordSize = Is64Bit ? 8 : 4;
- F->getContents() += StringRef((const char *)&entry.r_offset, WordSize);
- F->getContents() += StringRef((const char *)&entry.r_info, WordSize);
+ if (!entry.Index)
+ ;
+ else if (entry.Index < 0)
+ entry.Index = getSymbolIndexInSymbolTable(Asm, entry.Symbol);
+ else
+ entry.Index += LocalSymbolData.size();
+ if (is64Bit()) {
+ String64(*F, entry.r_offset);
+
+ struct ELF::Elf64_Rela ERE64;
+ ERE64.setSymbolAndType(entry.Index, entry.Type);
+ String64(*F, ERE64.r_info);
- if (HasRelocationAddend)
- F->getContents() += StringRef((const char *)&entry.r_addend, WordSize);
+ if (hasRelocationAddend())
+ String64(*F, entry.r_addend);
+ } else {
+ String32(*F, entry.r_offset);
+
+ struct ELF::Elf32_Rela ERE32;
+ ERE32.setSymbolAndType(entry.Index, entry.Type);
+ String32(*F, ERE32.r_info);
+
+ if (hasRelocationAddend())
+ String32(*F, entry.r_addend);
+ }
}
}
-void ELFObjectWriterImpl::CreateMetadataSections(MCAssembler &Asm,
- MCAsmLayout &Layout) {
+void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
+ MCAsmLayout &Layout,
+ const SectionIndexMapTy &SectionIndexMap) {
MCContext &Ctx = Asm.getContext();
MCDataFragment *F;
- WriteRelocations(Asm, Layout);
-
- const MCSection *SymtabSection;
- unsigned EntrySize = Is64Bit ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
+ unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
- SymtabSection = Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
- SectionKind::getReadOnly(),
- false, EntrySize);
+ // We construct .shstrtab, .symtab and .strtab in this order to match gnu as.
+ const MCSectionELF *ShstrtabSection =
+ Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0,
+ SectionKind::getReadOnly());
+ MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection);
+ ShstrtabSD.setAlignment(1);
+ ShstrtabIndex = Asm.size();
+ const MCSectionELF *SymtabSection =
+ Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
+ SectionKind::getReadOnly(),
+ EntrySize, "");
MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection);
+ SymtabSD.setAlignment(is64Bit() ? 8 : 4);
+ SymbolTableIndex = Asm.size();
- SymtabSD.setAlignment(Is64Bit ? 8 : 4);
+ MCSectionData *SymtabShndxSD = NULL;
- F = new MCDataFragment(&SymtabSD);
-
- // Symbol table
- WriteSymbolTable(F, Asm, Layout);
- Asm.AddSectionToTheEnd(SymtabSD, Layout);
+ if (NeedsSymtabShndx) {
+ const MCSectionELF *SymtabShndxSection =
+ Ctx.getELFSection(".symtab_shndx", ELF::SHT_SYMTAB_SHNDX, 0,
+ SectionKind::getReadOnly(), 4, "");
+ SymtabShndxSD = &Asm.getOrCreateSectionData(*SymtabShndxSection);
+ SymtabShndxSD->setAlignment(4);
+ }
const MCSection *StrtabSection;
StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0,
- SectionKind::getReadOnly(), false);
-
+ SectionKind::getReadOnly());
MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection);
StrtabSD.setAlignment(1);
-
- // FIXME: This isn't right. If the sections get rearranged this will
- // be wrong. We need a proper lookup.
StringTableIndex = Asm.size();
- F = new MCDataFragment(&StrtabSD);
- F->getContents().append(StringTable.begin(), StringTable.end());
- Asm.AddSectionToTheEnd(StrtabSD, Layout);
+ WriteRelocations(Asm, Layout);
- const MCSection *ShstrtabSection;
- ShstrtabSection = Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0,
- SectionKind::getReadOnly(), false);
+ // Symbol table
+ F = new MCDataFragment(&SymtabSD);
+ MCDataFragment *ShndxF = NULL;
+ if (NeedsSymtabShndx) {
+ ShndxF = new MCDataFragment(SymtabShndxSD);
+ }
+ WriteSymbolTable(F, ShndxF, Asm, Layout, SectionIndexMap);
- MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection);
- ShstrtabSD.setAlignment(1);
+ F = new MCDataFragment(&StrtabSD);
+ F->getContents().append(StringTable.begin(), StringTable.end());
F = new MCDataFragment(&ShstrtabSD);
- // FIXME: This isn't right. If the sections get rearranged this will
- // be wrong. We need a proper lookup.
- ShstrtabIndex = Asm.size();
-
// Section header string table.
//
// The first entry of a string table holds a null character so skip
@@ -808,166 +1151,691 @@ void ELFObjectWriterImpl::CreateMetadataSections(MCAssembler &Asm,
uint64_t Index = 1;
F->getContents() += '\x00';
+ StringMap<uint64_t> SecStringMap;
for (MCAssembler::const_iterator it = Asm.begin(),
ie = Asm.end(); it != ie; ++it) {
const MCSectionELF &Section =
static_cast<const MCSectionELF&>(it->getSection());
+ // FIXME: We could merge suffixes like in .text and .rela.text.
+ StringRef Name = Section.getSectionName();
+ if (SecStringMap.count(Name)) {
+ SectionStringTableIndex[&Section] = SecStringMap[Name];
+ continue;
+ }
// Remember the index into the string table so we can write it
// into the sh_name field of the section header table.
- SectionStringTableIndex[&it->getSection()] = Index;
+ SectionStringTableIndex[&Section] = Index;
+ SecStringMap[Name] = Index;
- Index += Section.getSectionName().size() + 1;
- F->getContents() += Section.getSectionName();
+ Index += Name.size() + 1;
+ F->getContents() += Name;
F->getContents() += '\x00';
}
+}
+
+void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
+ MCAsmLayout &Layout,
+ GroupMapTy &GroupMap,
+ RevGroupMapTy &RevGroupMap) {
+ // Create the .note.GNU-stack section if needed.
+ MCContext &Ctx = Asm.getContext();
+ if (Asm.getNoExecStack()) {
+ const MCSectionELF *GnuStackSection =
+ Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0,
+ SectionKind::getReadOnly());
+ Asm.getOrCreateSectionData(*GnuStackSection);
+ }
+
+ // Build the groups
+ for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+ it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(it->getSection());
+ if (!(Section.getFlags() & ELF::SHF_GROUP))
+ continue;
+
+ const MCSymbol *SignatureSymbol = Section.getGroup();
+ Asm.getOrCreateSymbolData(*SignatureSymbol);
+ const MCSectionELF *&Group = RevGroupMap[SignatureSymbol];
+ if (!Group) {
+ Group = Ctx.CreateELFGroupSection();
+ MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
+ Data.setAlignment(4);
+ MCDataFragment *F = new MCDataFragment(&Data);
+ String32(*F, ELF::GRP_COMDAT);
+ }
+ GroupMap[Group] = SignatureSymbol;
+ }
+
+ // Add sections to the groups
+ unsigned Index = 1;
+ unsigned NumGroups = RevGroupMap.size();
+ for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+ it != ie; ++it, ++Index) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(it->getSection());
+ if (!(Section.getFlags() & ELF::SHF_GROUP))
+ continue;
+ const MCSectionELF *Group = RevGroupMap[Section.getGroup()];
+ MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
+ // FIXME: we could use the previous fragment
+ MCDataFragment *F = new MCDataFragment(&Data);
+ String32(*F, NumGroups + Index);
+ }
+}
+
+void ELFObjectWriter::WriteSection(MCAssembler &Asm,
+ const SectionIndexMapTy &SectionIndexMap,
+ uint32_t GroupSymbolIndex,
+ uint64_t Offset, uint64_t Size,
+ uint64_t Alignment,
+ const MCSectionELF &Section) {
+ uint64_t sh_link = 0;
+ uint64_t sh_info = 0;
+
+ switch(Section.getType()) {
+ case ELF::SHT_DYNAMIC:
+ sh_link = SectionStringTableIndex[&Section];
+ sh_info = 0;
+ break;
+
+ case ELF::SHT_REL:
+ case ELF::SHT_RELA: {
+ const MCSectionELF *SymtabSection;
+ const MCSectionELF *InfoSection;
+ SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB,
+ 0,
+ SectionKind::getReadOnly());
+ sh_link = SectionIndexMap.lookup(SymtabSection);
+ assert(sh_link && ".symtab not found");
+
+ // Remove ".rel" and ".rela" prefixes.
+ unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5;
+ StringRef SectionName = Section.getSectionName().substr(SecNameLen);
+
+ InfoSection = Asm.getContext().getELFSection(SectionName,
+ ELF::SHT_PROGBITS, 0,
+ SectionKind::getReadOnly());
+ sh_info = SectionIndexMap.lookup(InfoSection);
+ break;
+ }
+
+ case ELF::SHT_SYMTAB:
+ case ELF::SHT_DYNSYM:
+ sh_link = StringTableIndex;
+ sh_info = LastLocalSymbolIndex;
+ break;
+
+ case ELF::SHT_SYMTAB_SHNDX:
+ sh_link = SymbolTableIndex;
+ break;
+
+ case ELF::SHT_PROGBITS:
+ case ELF::SHT_STRTAB:
+ case ELF::SHT_NOBITS:
+ case ELF::SHT_NOTE:
+ case ELF::SHT_NULL:
+ case ELF::SHT_ARM_ATTRIBUTES:
+ case ELF::SHT_INIT_ARRAY:
+ case ELF::SHT_FINI_ARRAY:
+ case ELF::SHT_PREINIT_ARRAY:
+ case ELF::SHT_X86_64_UNWIND:
+ // Nothing to do.
+ break;
+
+ case ELF::SHT_GROUP: {
+ sh_link = SymbolTableIndex;
+ sh_info = GroupSymbolIndex;
+ break;
+ }
+
+ default:
+ assert(0 && "FIXME: sh_type value not supported!");
+ break;
+ }
- Asm.AddSectionToTheEnd(ShstrtabSD, Layout);
+ WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(),
+ Section.getFlags(), 0, Offset, Size, sh_link, sh_info,
+ Alignment, Section.getEntrySize());
}
-void ELFObjectWriterImpl::WriteObject(const MCAssembler &Asm,
- const MCAsmLayout &Layout) {
+static bool IsELFMetaDataSection(const MCSectionData &SD) {
+ return SD.getOrdinal() == ~UINT32_C(0) &&
+ !SD.getSection().isVirtualSection();
+}
+
+static uint64_t DataSectionSize(const MCSectionData &SD) {
+ uint64_t Ret = 0;
+ for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+ ++i) {
+ const MCFragment &F = *i;
+ assert(F.getKind() == MCFragment::FT_Data);
+ Ret += cast<MCDataFragment>(F).getContents().size();
+ }
+ return Ret;
+}
+
+static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
+ const MCSectionData &SD) {
+ if (IsELFMetaDataSection(SD))
+ return DataSectionSize(SD);
+ return Layout.getSectionFileSize(&SD);
+}
+
+static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
+ const MCSectionData &SD) {
+ if (IsELFMetaDataSection(SD))
+ return DataSectionSize(SD);
+ return Layout.getSectionAddressSize(&SD);
+}
+
+static void WriteDataSectionData(ELFObjectWriter *W, const MCSectionData &SD) {
+ for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+ ++i) {
+ const MCFragment &F = *i;
+ assert(F.getKind() == MCFragment::FT_Data);
+ W->WriteBytes(cast<MCDataFragment>(F).getContents().str());
+ }
+}
+
+void ELFObjectWriter::WriteObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ GroupMapTy GroupMap;
+ RevGroupMapTy RevGroupMap;
+ CreateIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap,
+ RevGroupMap);
+
+ SectionIndexMapTy SectionIndexMap;
+
+ ComputeIndexMap(Asm, SectionIndexMap);
+
+ // Compute symbol table information.
+ ComputeSymbolTable(Asm, SectionIndexMap, RevGroupMap);
+
CreateMetadataSections(const_cast<MCAssembler&>(Asm),
- const_cast<MCAsmLayout&>(Layout));
+ const_cast<MCAsmLayout&>(Layout),
+ SectionIndexMap);
+
+ // Update to include the metadata sections.
+ ComputeIndexMap(Asm, SectionIndexMap);
// Add 1 for the null section.
unsigned NumSections = Asm.size() + 1;
+ uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
+ uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
+ sizeof(ELF::Elf32_Ehdr);
+ uint64_t FileOff = HeaderSize;
+
+ std::vector<const MCSectionELF*> Sections;
+ Sections.resize(NumSections);
+
+ for (SectionIndexMapTy::const_iterator i=
+ SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) {
+ const std::pair<const MCSectionELF*, uint32_t> &p = *i;
+ Sections[p.second] = p.first;
+ }
- uint64_t SectionDataSize = 0;
+ for (unsigned i = 1; i < NumSections; ++i) {
+ const MCSectionELF &Section = *Sections[i];
+ const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
- for (MCAssembler::const_iterator it = Asm.begin(),
- ie = Asm.end(); it != ie; ++it) {
- const MCSectionData &SD = *it;
+ FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
// Get the size of the section in the output file (including padding).
- uint64_t Size = Layout.getSectionFileSize(&SD);
- SectionDataSize += Size;
+ FileOff += GetSectionFileSize(Layout, SD);
}
+ FileOff = RoundUpToAlignment(FileOff, NaturalAlignment);
+
// Write out the ELF header ...
- WriteHeader(SectionDataSize, NumSections);
- FileOff = Is64Bit ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr);
+ WriteHeader(FileOff - HeaderSize, NumSections);
+
+ FileOff = HeaderSize;
// ... then all of the sections ...
DenseMap<const MCSection*, uint64_t> SectionOffsetMap;
- DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+ for (unsigned i = 1; i < NumSections; ++i) {
+ const MCSectionELF &Section = *Sections[i];
+ const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
- unsigned Index = 1;
- for (MCAssembler::const_iterator it = Asm.begin(),
- ie = Asm.end(); it != ie; ++it) {
- // Remember the offset into the file for this section.
- SectionOffsetMap[&it->getSection()] = FileOff;
+ uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment());
+ WriteZeros(Padding);
+ FileOff += Padding;
- SectionIndexMap[&it->getSection()] = Index++;
+ // Remember the offset into the file for this section.
+ SectionOffsetMap[&Section] = FileOff;
- const MCSectionData &SD = *it;
- FileOff += Layout.getSectionFileSize(&SD);
+ FileOff += GetSectionFileSize(Layout, SD);
- Asm.WriteSectionData(it, Layout, Writer);
+ if (IsELFMetaDataSection(SD))
+ WriteDataSectionData(this, SD);
+ else
+ Asm.WriteSectionData(&SD, Layout);
}
+ uint64_t Padding = OffsetToAlignment(FileOff, NaturalAlignment);
+ WriteZeros(Padding);
+ FileOff += Padding;
+
// ... and then the section header table.
// Should we align the section header table?
//
// Null section first.
- WriteSecHdrEntry(0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ uint64_t FirstSectionSize =
+ NumSections >= ELF::SHN_LORESERVE ? NumSections : 0;
+ uint32_t FirstSectionLink =
+ ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0;
+ WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0);
+
+ for (unsigned i = 1; i < NumSections; ++i) {
+ const MCSectionELF &Section = *Sections[i];
+ const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+ uint32_t GroupSymbolIndex;
+ if (Section.getType() != ELF::SHT_GROUP)
+ GroupSymbolIndex = 0;
+ else
+ GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm, GroupMap[&Section]);
- for (MCAssembler::const_iterator it = Asm.begin(),
- ie = Asm.end(); it != ie; ++it) {
- const MCSectionData &SD = *it;
- const MCSectionELF &Section =
- static_cast<const MCSectionELF&>(SD.getSection());
+ uint64_t Size = GetSectionAddressSize(Layout, SD);
- uint64_t sh_link = 0;
- uint64_t sh_info = 0;
+ WriteSection(Asm, SectionIndexMap, GroupSymbolIndex,
+ SectionOffsetMap[&Section], Size,
+ SD.getAlignment(), Section);
+ }
+}
- switch(Section.getType()) {
- case ELF::SHT_DYNAMIC:
- sh_link = SectionStringTableIndex[&it->getSection()];
- sh_info = 0;
- break;
+MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &OS,
+ bool IsLittleEndian) {
+ switch (MOTW->getEMachine()) {
+ case ELF::EM_386:
+ case ELF::EM_X86_64:
+ return new X86ELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+ case ELF::EM_ARM:
+ return new ARMELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+ case ELF::EM_MBLAZE:
+ return new MBlazeELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+ default: llvm_unreachable("Unsupported architecture"); break;
+ }
+}
+
+
+/// START OF SUBCLASSES for ELFObjectWriter
+//===- ARMELFObjectWriter -------------------------------------------===//
+
+ARMELFObjectWriter::ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian)
+ : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
+{}
+
+ARMELFObjectWriter::~ARMELFObjectWriter()
+{}
- case ELF::SHT_REL:
- case ELF::SHT_RELA: {
- const MCSection *SymtabSection;
- const MCSection *InfoSection;
-
- SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
- SectionKind::getReadOnly(),
- false);
- sh_link = SectionIndexMap[SymtabSection];
-
- // Remove ".rel" and ".rela" prefixes.
- unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5;
- StringRef SectionName = Section.getSectionName().substr(SecNameLen);
-
- InfoSection = Asm.getContext().getELFSection(SectionName,
- ELF::SHT_PROGBITS, 0,
- SectionKind::getReadOnly(),
- false);
- sh_info = SectionIndexMap[InfoSection];
+// FIXME: get the real EABI Version from the Triple.
+void ARMELFObjectWriter::WriteEFlags() {
+ Write32(ELF::EF_ARM_EABIMASK & DefaultEABIVersion);
+}
+
+// In ARM, _MergedGlobals and other most symbols get emitted directly.
+// I.e. not as an offset to a section symbol.
+// This code is a first-cut approximation of what ARM/gcc does.
+
+const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ bool IsBSS) const {
+ const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+ bool EmitThisSym = false;
+
+ if (IsBSS) {
+ EmitThisSym = StringSwitch<bool>(Symbol.getName())
+ .Case("_MergedGlobals", true)
+ .Default(false);
+ } else {
+ EmitThisSym = StringSwitch<bool>(Symbol.getName())
+ .Case("_MergedGlobals", true)
+ .StartsWith(".L.str", true)
+ .Default(false);
+ }
+ if (EmitThisSym)
+ return &Symbol;
+ if (! Symbol.isTemporary())
+ return &Symbol;
+ return NULL;
+}
+
+unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) {
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+ unsigned Type = 0;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default: assert(0 && "Unimplemented");
+ case FK_Data_4:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_BASE_PREL;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ assert(0 && "unimplemented");
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ Type = ELF::R_ARM_TLS_IE32;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_uncondbranch:
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_ARM_PLT:
+ Type = ELF::R_ARM_PLT32;
+ break;
+ default:
+ Type = ELF::R_ARM_CALL;
+ break;
+ }
+ break;
+ case ARM::fixup_arm_condbranch:
+ Type = ELF::R_ARM_JUMP24;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ Type = ELF::R_ARM_MOVT_PREL;
+ break;
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ Type = ELF::R_ARM_MOVW_PREL_NC;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ Type = ELF::R_ARM_THM_MOVT_PREL;
+ break;
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ Type = ELF::R_ARM_THM_MOVW_PREL_NC;
break;
}
-
- case ELF::SHT_SYMTAB:
- case ELF::SHT_DYNSYM:
- sh_link = StringTableIndex;
- sh_info = LastLocalSymbolIndex;
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_4:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier"); break;
+ case MCSymbolRefExpr::VK_ARM_GOT:
+ Type = ELF::R_ARM_GOT_BREL;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ Type = ELF::R_ARM_TLS_GD32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_TPOFF:
+ Type = ELF::R_ARM_TLS_LE32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+ Type = ELF::R_ARM_TLS_IE32;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_ABS32;
+ break;
+ case MCSymbolRefExpr::VK_ARM_GOTOFF:
+ Type = ELF::R_ARM_GOTOFF32;
+ break;
+ }
break;
-
- case ELF::SHT_PROGBITS:
- case ELF::SHT_STRTAB:
- case ELF::SHT_NOBITS:
- case ELF::SHT_NULL:
- // Nothing to do.
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_cb:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_arm_thumb_br:
+ assert(0 && "Unimplemented");
break;
-
- case ELF::SHT_HASH:
- case ELF::SHT_GROUP:
- case ELF::SHT_SYMTAB_SHNDX:
- default:
- assert(0 && "FIXME: sh_type value not supported!");
+ case ARM::fixup_arm_uncondbranch:
+ Type = ELF::R_ARM_CALL;
+ break;
+ case ARM::fixup_arm_condbranch:
+ Type = ELF::R_ARM_JUMP24;
+ break;
+ case ARM::fixup_arm_movt_hi16:
+ Type = ELF::R_ARM_MOVT_ABS;
+ break;
+ case ARM::fixup_arm_movw_lo16:
+ Type = ELF::R_ARM_MOVW_ABS_NC;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ Type = ELF::R_ARM_THM_MOVT_ABS;
+ break;
+ case ARM::fixup_t2_movw_lo16:
+ Type = ELF::R_ARM_THM_MOVW_ABS_NC;
break;
}
-
- WriteSecHdrEntry(SectionStringTableIndex[&it->getSection()],
- Section.getType(), Section.getFlags(),
- Layout.getSectionAddress(&SD),
- SectionOffsetMap.lookup(&SD.getSection()),
- Layout.getSectionSize(&SD), sh_link,
- sh_info, SD.getAlignment(),
- Section.getEntrySize());
}
-}
-ELFObjectWriter::ELFObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- bool IsLittleEndian,
- bool HasRelocationAddend)
- : MCObjectWriter(OS, IsLittleEndian)
-{
- Impl = new ELFObjectWriterImpl(this, Is64Bit, HasRelocationAddend);
+ if (RelocNeedsGOT(Modifier))
+ NeedsGOT = true;
+
+ return Type;
}
-ELFObjectWriter::~ELFObjectWriter() {
- delete (ELFObjectWriterImpl*) Impl;
+//===- MBlazeELFObjectWriter -------------------------------------------===//
+
+MBlazeELFObjectWriter::MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian)
+ : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {
}
-void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
- ((ELFObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm);
+MBlazeELFObjectWriter::~MBlazeELFObjectWriter() {
}
-void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue) {
- ((ELFObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup,
- Target, FixedValue);
+unsigned MBlazeELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) {
+ // determine the type of the relocation
+ unsigned Type;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case FK_PCRel_4:
+ Type = ELF::R_MICROBLAZE_64_PCREL;
+ break;
+ case FK_PCRel_2:
+ Type = ELF::R_MICROBLAZE_32_PCREL;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_4:
+ Type = ((IsRelocWithSymbol || Addend !=0)
+ ? ELF::R_MICROBLAZE_32
+ : ELF::R_MICROBLAZE_64);
+ break;
+ case FK_Data_2:
+ Type = ELF::R_MICROBLAZE_32;
+ break;
+ }
+ }
+ return Type;
}
-void ELFObjectWriter::WriteObject(const MCAssembler &Asm,
- const MCAsmLayout &Layout) {
- ((ELFObjectWriterImpl*) Impl)->WriteObject(Asm, Layout);
+//===- X86ELFObjectWriter -------------------------------------------===//
+
+
+X86ELFObjectWriter::X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+ raw_ostream &_OS,
+ bool IsLittleEndian)
+ : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
+{}
+
+X86ELFObjectWriter::~X86ELFObjectWriter()
+{}
+
+unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) {
+ // determine the type of the relocation
+
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+ unsigned Type;
+ if (is64Bit()) {
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_PCRel_8:
+ assert(Modifier == MCSymbolRefExpr::VK_None);
+ Type = ELF::R_X86_64_PC64;
+ break;
+ case X86::reloc_signed_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ case FK_Data_4: // FIXME?
+ case X86::reloc_riprel_4byte:
+ case FK_PCRel_4:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_X86_64_PC32;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_X86_64_PLT32;
+ break;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ Type = ELF::R_X86_64_GOTPCREL;
+ break;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ Type = ELF::R_X86_64_GOTTPOFF;
+ break;
+ case MCSymbolRefExpr::VK_TLSGD:
+ Type = ELF::R_X86_64_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_TLSLD:
+ Type = ELF::R_X86_64_TLSLD;
+ break;
+ }
+ break;
+ case FK_PCRel_2:
+ assert(Modifier == MCSymbolRefExpr::VK_None);
+ Type = ELF::R_X86_64_PC16;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_8: Type = ELF::R_X86_64_64; break;
+ case X86::reloc_signed_4byte:
+ assert(isInt<32>(Target.getConstant()));
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_X86_64_32S;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_X86_64_GOT32;
+ break;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ Type = ELF::R_X86_64_GOTPCREL;
+ break;
+ case MCSymbolRefExpr::VK_TPOFF:
+ Type = ELF::R_X86_64_TPOFF32;
+ break;
+ case MCSymbolRefExpr::VK_DTPOFF:
+ Type = ELF::R_X86_64_DTPOFF32;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ Type = ELF::R_X86_64_32;
+ break;
+ case FK_Data_2: Type = ELF::R_X86_64_16; break;
+ case FK_PCRel_1:
+ case FK_Data_1: Type = ELF::R_X86_64_8; break;
+ }
+ }
+ } else {
+ if (IsPCRel) {
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_386_PC32;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_386_PLT32;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+
+ case X86::reloc_global_offset_table:
+ Type = ELF::R_386_GOTPC;
+ break;
+
+ // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
+ // instead?
+ case X86::reloc_signed_4byte:
+ case FK_PCRel_4:
+ case FK_Data_4:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_386_32;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_386_GOT32;
+ break;
+ case MCSymbolRefExpr::VK_GOTOFF:
+ Type = ELF::R_386_GOTOFF;
+ break;
+ case MCSymbolRefExpr::VK_TLSGD:
+ Type = ELF::R_386_TLS_GD;
+ break;
+ case MCSymbolRefExpr::VK_TPOFF:
+ Type = ELF::R_386_TLS_LE_32;
+ break;
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ Type = ELF::R_386_TLS_IE;
+ break;
+ case MCSymbolRefExpr::VK_NTPOFF:
+ Type = ELF::R_386_TLS_LE;
+ break;
+ case MCSymbolRefExpr::VK_GOTNTPOFF:
+ Type = ELF::R_386_TLS_GOTIE;
+ break;
+ case MCSymbolRefExpr::VK_TLSLDM:
+ Type = ELF::R_386_TLS_LDM;
+ break;
+ case MCSymbolRefExpr::VK_DTPOFF:
+ Type = ELF::R_386_TLS_LDO_32;
+ break;
+ }
+ break;
+ case FK_Data_2: Type = ELF::R_386_16; break;
+ case FK_PCRel_1:
+ case FK_Data_1: Type = ELF::R_386_8; break;
+ }
+ }
+ }
+
+ if (RelocNeedsGOT(Modifier))
+ NeedsGOT = true;
+
+ return Type;
}
diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp
index 670b2e9..cc1afbd 100644
--- a/contrib/llvm/lib/MC/MCAsmInfo.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
#include <cctype>
#include <cstring>
using namespace llvm;
@@ -23,11 +23,13 @@ MCAsmInfo::MCAsmInfo() {
HasMachoZeroFillDirective = false;
HasMachoTBSSDirective = false;
HasStaticCtorDtorReferenceInStaticMode = false;
+ LinkerRequiresNonEmptyDwarfLines = false;
MaxInstLength = 4;
PCSymbol = "$";
SeparatorChar = ';';
CommentColumn = 40;
CommentString = "#";
+ LabelSuffix = ":";
GlobalPrefix = "";
PrivateGlobalPrefix = ".";
LinkerPrivateGlobalPrefix = "";
@@ -52,18 +54,19 @@ MCAsmInfo::MCAsmInfo() {
GPRel32Directive = 0;
GlobalDirective = "\t.globl\t";
HasSetDirective = true;
+ HasAggressiveSymbolFolding = true;
HasLCOMMDirective = false;
COMMDirectiveAlignmentIsInBytes = true;
HasDotTypeDotSizeDirective = true;
HasSingleParameterDotFile = true;
HasNoDeadStrip = false;
+ HasSymbolResolver = false;
WeakRefDirective = 0;
WeakDefDirective = 0;
LinkOnceDirective = 0;
HiddenVisibilityAttr = MCSA_Hidden;
ProtectedVisibilityAttr = MCSA_Protected;
HasLEB128 = false;
- HasDotLocAndDotFile = false;
SupportsDebugInformation = false;
ExceptionsType = ExceptionHandling::None;
DwarfRequiresFrameSection = true;
diff --git a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
index e0e261a..13776f0 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
@@ -37,13 +37,20 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
HasMachoZeroFillDirective = true; // Uses .zerofill
HasMachoTBSSDirective = true; // Uses .tbss
HasStaticCtorDtorReferenceInStaticMode = true;
-
+
+ // FIXME: Darwin 10 and newer don't need this.
+ LinkerRequiresNonEmptyDwarfLines = true;
+
+ // FIXME: Change this once MC is the system assembler.
+ HasAggressiveSymbolFolding = false;
+
HiddenVisibilityAttr = MCSA_PrivateExtern;
// Doesn't support protected visibility.
ProtectedVisibilityAttr = MCSA_Global;
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
+ HasSymbolResolver = true;
DwarfUsesAbsoluteLabelForStmtList = false;
DwarfUsesLabelOffsetForRanges = false;
diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
index 1cc8fb0..8d06982 100644
--- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
@@ -12,6 +12,7 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSectionMachO.h"
@@ -23,6 +24,10 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <cctype>
using namespace llvm;
namespace {
@@ -32,29 +37,33 @@ class MCAsmStreamer : public MCStreamer {
const MCAsmInfo &MAI;
OwningPtr<MCInstPrinter> InstPrinter;
OwningPtr<MCCodeEmitter> Emitter;
-
+ OwningPtr<TargetAsmBackend> AsmBackend;
+
SmallString<128> CommentToEmit;
raw_svector_ostream CommentStream;
- unsigned IsLittleEndian : 1;
unsigned IsVerboseAsm : 1;
unsigned ShowInst : 1;
+ unsigned UseLoc : 1;
+
+ bool needsSet(const MCExpr *Value);
public:
MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
- bool isLittleEndian, bool isVerboseAsm, MCInstPrinter *printer,
- MCCodeEmitter *emitter, bool showInst)
+ bool isVerboseAsm,
+ bool useLoc,
+ MCInstPrinter *printer, MCCodeEmitter *emitter,
+ TargetAsmBackend *asmbackend,
+ bool showInst)
: MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
- InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
- IsLittleEndian(isLittleEndian), IsVerboseAsm(isVerboseAsm),
- ShowInst(showInst) {
+ InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
+ CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
+ ShowInst(showInst), UseLoc(useLoc) {
if (InstPrinter && IsVerboseAsm)
InstPrinter->setCommentStream(CommentStream);
}
~MCAsmStreamer() {}
- bool isLittleEndian() const { return IsLittleEndian; }
-
inline void EmitEOL() {
// If we don't have any comments, just emit a \n.
if (!IsVerboseAsm) {
@@ -68,7 +77,7 @@ public:
/// isVerboseAsm - Return true if this streamer supports verbose assembly at
/// all.
virtual bool isVerboseAsm() const { return IsVerboseAsm; }
-
+
/// hasRawTextSupport - We support EmitRawText.
virtual bool hasRawTextSupport() const { return true; }
@@ -98,13 +107,26 @@ public:
/// @name MCStreamer Interface
/// @{
- virtual void SwitchSection(const MCSection *Section);
+ virtual void ChangeSection(const MCSection *Section);
+
+ virtual void InitSections() {
+ // FIXME, this is MachO specific, but the testsuite
+ // expects this.
+ SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 0, SectionKind::getText()));
+ }
virtual void EmitLabel(MCSymbol *Symbol);
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitThumbFunc(MCSymbol *Func);
virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label);
virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
@@ -122,19 +144,26 @@ public:
/// @param Symbol - The common symbol to emit.
/// @param Size - The size of the common symbol.
virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
-
+
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0);
virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment = 0);
-
+
virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
- virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
- virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace);
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ bool isPCRel, unsigned AddrSpace);
+ virtual void EmitIntValue(uint64_t Value, unsigned Size,
+ unsigned AddrSpace = 0);
+
+ virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+
+ virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+
virtual void EmitGPRel32Value(const MCExpr *Value);
-
+
virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
unsigned AddrSpace);
@@ -150,17 +179,28 @@ public:
unsigned char Value = 0);
virtual void EmitFileDirective(StringRef Filename);
- virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+ virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa, unsigned Discriminator);
+
+ virtual bool EmitCFIStartProc();
+ virtual bool EmitCFIEndProc();
+ virtual bool EmitCFIDefCfaOffset(int64_t Offset);
+ virtual bool EmitCFIDefCfaRegister(int64_t Register);
+ virtual bool EmitCFIOffset(int64_t Register, int64_t Offset);
+ virtual bool EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding);
+ virtual bool EmitCFILsda(const MCSymbol *Sym, unsigned Encoding);
virtual void EmitInstruction(const MCInst &Inst);
-
- /// EmitRawText - If this file is backed by a assembly streamer, this dumps
+
+ /// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
virtual void EmitRawText(StringRef String);
-
+
virtual void Finish();
-
+
/// @}
};
@@ -172,14 +212,14 @@ public:
/// verbose assembly output is enabled.
void MCAsmStreamer::AddComment(const Twine &T) {
if (!IsVerboseAsm) return;
-
+
// Make sure that CommentStream is flushed.
CommentStream.flush();
-
+
T.toVector(CommentToEmit);
// Each comment goes on its own line.
CommentToEmit.push_back('\n');
-
+
// Tell the comment stream that the vector changed underneath it.
CommentStream.resync();
}
@@ -189,10 +229,10 @@ void MCAsmStreamer::EmitCommentsAndEOL() {
OS << '\n';
return;
}
-
+
CommentStream.flush();
StringRef Comments = CommentToEmit.str();
-
+
assert(Comments.back() == '\n' &&
"Comment array not newline terminated");
do {
@@ -200,10 +240,10 @@ void MCAsmStreamer::EmitCommentsAndEOL() {
OS.PadToColumn(MAI.getCommentColumn());
size_t Position = Comments.find('\n');
OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
-
+
Comments = Comments.substr(Position+1);
} while (!Comments.empty());
-
+
CommentToEmit.clear();
// Tell the comment stream that the vector changed underneath it.
CommentStream.resync();
@@ -214,33 +254,41 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
}
-void MCAsmStreamer::SwitchSection(const MCSection *Section) {
+void MCAsmStreamer::ChangeSection(const MCSection *Section) {
assert(Section && "Cannot switch to a null section!");
- if (Section != CurSection) {
- PrevSection = CurSection;
- CurSection = Section;
- Section->PrintSwitchToSection(MAI, OS);
- }
+ Section->PrintSwitchToSection(MAI, OS);
}
void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
- assert(CurSection && "Cannot emit before setting section!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
- OS << *Symbol << ":";
+ OS << *Symbol << MAI.getLabelSuffix();
EmitEOL();
- Symbol->setSection(*CurSection);
+ Symbol->setSection(*getCurrentSection());
}
void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
switch (Flag) {
default: assert(0 && "Invalid flag!");
+ case MCAF_SyntaxUnified: OS << "\t.syntax unified"; break;
case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break;
+ case MCAF_Code16: OS << "\t.code\t16"; break;
+ case MCAF_Code32: OS << "\t.code\t32"; break;
}
EmitEOL();
}
+void MCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {
+ // This needs to emit to a temporary string to get properly quoted
+ // MCSymbols when they have spaces in them.
+ OS << "\t.thumb_func";
+ if (Func)
+ OS << '\t' << *Func;
+ EmitEOL();
+}
+
void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
OS << *Symbol << " = " << *Value;
EmitEOL();
@@ -249,6 +297,18 @@ void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
Symbol->setVariableValue(Value);
}
+void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+ OS << ".weakref " << *Alias << ", " << *Symbol;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label) {
+ EmitDwarfSetLineAddr(LineDelta, Label,
+ getContext().getTargetAsmInfo().getPointerSize());
+}
+
void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) {
switch (Attribute) {
@@ -259,6 +319,7 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_ELF_TypeTLS: /// .type _foo, STT_TLS # aka @tls_object
case MCSA_ELF_TypeCommon: /// .type _foo, STT_COMMON # aka @common
case MCSA_ELF_TypeNoType: /// .type _foo, STT_NOTYPE # aka @notype
+ case MCSA_ELF_TypeGnuUniqueObject: /// .type _foo, @gnu_unique_object
assert(MAI.hasDotTypeDotSizeDirective() && "Symbol Attr not supported");
OS << "\t.type\t" << *Symbol << ','
<< ((MAI.getCommentString()[0] != '@') ? '@' : '%');
@@ -270,6 +331,7 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_ELF_TypeTLS: OS << "tls_object"; break;
case MCSA_ELF_TypeCommon: OS << "common"; break;
case MCSA_ELF_TypeNoType: OS << "no_type"; break;
+ case MCSA_ELF_TypeGnuUniqueObject: OS << "gnu_unique_object"; break;
}
EmitEOL();
return;
@@ -282,6 +344,7 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_LazyReference: OS << "\t.lazy_reference\t"; break;
case MCSA_Local: OS << "\t.local\t"; break;
case MCSA_NoDeadStrip: OS << "\t.no_dead_strip\t"; break;
+ case MCSA_SymbolResolver: OS << "\t.symbol_resolver\t"; break;
case MCSA_PrivateExtern: OS << "\t.private_extern\t"; break;
case MCSA_Protected: OS << "\t.protected\t"; break;
case MCSA_Reference: OS << "\t.reference\t"; break;
@@ -352,11 +415,11 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
unsigned Size, unsigned ByteAlignment) {
// Note: a .zerofill directive does not switch sections.
OS << ".zerofill ";
-
+
// This is a mach-o specific directive.
const MCSectionMachO *MOSection = ((const MCSectionMachO*)Section);
OS << MOSection->getSegmentName() << "," << MOSection->getSectionName();
-
+
if (Symbol != NULL) {
OS << ',' << *Symbol << ',' << Size;
if (ByteAlignment != 0)
@@ -374,11 +437,11 @@ void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
// Instead of using the Section we'll just use the shortcut.
// This is a mach-o specific directive and section.
OS << ".tbss " << *Symbol << ", " << Size;
-
+
// Output align if we have it. We default to 1 so don't bother printing
// that.
if (ByteAlignment > 1) OS << ", " << Log2_32(ByteAlignment);
-
+
EmitEOL();
}
@@ -386,19 +449,19 @@ static inline char toOctal(int X) { return (X&7)+'0'; }
static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
OS << '"';
-
+
for (unsigned i = 0, e = Data.size(); i != e; ++i) {
unsigned char C = Data[i];
if (C == '"' || C == '\\') {
OS << '\\' << (char)C;
continue;
}
-
+
if (isprint((unsigned char)C)) {
OS << (char)C;
continue;
}
-
+
switch (C) {
case '\b': OS << "\\b"; break;
case '\f': OS << "\\f"; break;
@@ -413,15 +476,15 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
break;
}
}
-
+
OS << '"';
}
void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
- assert(CurSection && "Cannot emit contents before setting section!");
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
if (Data.empty()) return;
-
+
if (Data.size() == 1) {
OS << MAI.getData8bitsDirective(AddrSpace);
OS << (unsigned)(unsigned char)Data[0];
@@ -443,11 +506,15 @@ void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
EmitEOL();
}
-/// EmitIntValue - Special case of EmitValue that avoids the client having
-/// to pass in a MCExpr for constant integers.
void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
unsigned AddrSpace) {
- assert(CurSection && "Cannot emit contents before setting section!");
+ EmitValue(MCConstantExpr::Create(Value, getContext()), Size, AddrSpace);
+}
+
+void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ bool isPCRel, unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ assert(!isPCRel && "Cannot emit pc relative relocations!");
const char *Directive = 0;
switch (Size) {
default: break;
@@ -458,35 +525,43 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
Directive = MAI.getData64bitsDirective(AddrSpace);
// If the target doesn't support 64-bit data, emit as two 32-bit halves.
if (Directive) break;
- if (isLittleEndian()) {
- EmitIntValue((uint32_t)(Value >> 0 ), 4, AddrSpace);
- EmitIntValue((uint32_t)(Value >> 32), 4, AddrSpace);
+ int64_t IntValue;
+ if (!Value->EvaluateAsAbsolute(IntValue))
+ report_fatal_error("Don't know how to emit this value.");
+ if (getContext().getTargetAsmInfo().isLittleEndian()) {
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
} else {
- EmitIntValue((uint32_t)(Value >> 32), 4, AddrSpace);
- EmitIntValue((uint32_t)(Value >> 0 ), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
}
return;
}
-
+
assert(Directive && "Invalid size for machine code value!");
- OS << Directive << truncateToSize(Value, Size);
+ OS << Directive << *Value;
EmitEOL();
}
-void MCAsmStreamer::EmitValue(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace) {
- assert(CurSection && "Cannot emit contents before setting section!");
- const char *Directive = 0;
- switch (Size) {
- default: break;
- case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
- case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
- case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
- case 8: Directive = MAI.getData64bitsDirective(AddrSpace); break;
+void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue)) {
+ EmitULEB128IntValue(IntValue, AddrSpace);
+ return;
}
-
- assert(Directive && "Invalid size for machine code value!");
- OS << Directive << *Value;
+ assert(MAI.hasLEB128() && "Cannot print a .uleb");
+ OS << ".uleb128 " << *Value;
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue)) {
+ EmitSLEB128IntValue(IntValue, AddrSpace);
+ return;
+ }
+ assert(MAI.hasLEB128() && "Cannot print a .sleb");
+ OS << ".sleb128 " << *Value;
EmitEOL();
}
@@ -502,7 +577,7 @@ void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
unsigned AddrSpace) {
if (NumBytes == 0) return;
-
+
if (AddrSpace == 0)
if (const char *ZeroDirective = MAI.getZeroDirective()) {
OS << ZeroDirective << NumBytes;
@@ -530,7 +605,7 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
case 4: OS << ".p2alignl "; break;
case 8: llvm_unreachable("Unsupported alignment size!");
}
-
+
if (MAI.getAlignmentIsInBytes())
OS << ByteAlignment;
else
@@ -540,13 +615,13 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
OS << ", 0x";
OS.write_hex(truncateToSize(Value, ValueSize));
- if (MaxBytesToEmit)
+ if (MaxBytesToEmit)
OS << ", " << MaxBytesToEmit;
}
EmitEOL();
return;
}
-
+
// Non-power of two alignment. This is not widely supported by assemblers.
// FIXME: Parameterize this based on MAI.
switch (ValueSize) {
@@ -559,7 +634,7 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
OS << ' ' << ByteAlignment;
OS << ", " << truncateToSize(Value, ValueSize);
- if (MaxBytesToEmit)
+ if (MaxBytesToEmit)
OS << ", " << MaxBytesToEmit;
EmitEOL();
}
@@ -586,10 +661,118 @@ void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
EmitEOL();
}
-void MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){
- OS << "\t.file\t" << FileNo << ' ';
- PrintQuotedString(Filename, OS);
+bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){
+ if (UseLoc) {
+ OS << "\t.file\t" << FileNo << ' ';
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+ }
+ return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+}
+
+void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa,
+ unsigned Discriminator) {
+ this->MCStreamer::EmitDwarfLocDirective(FileNo, Line, Column, Flags,
+ Isa, Discriminator);
+ if (!UseLoc)
+ return;
+
+ OS << "\t.loc\t" << FileNo << " " << Line << " " << Column;
+ if (Flags & DWARF2_FLAG_BASIC_BLOCK)
+ OS << " basic_block";
+ if (Flags & DWARF2_FLAG_PROLOGUE_END)
+ OS << " prologue_end";
+ if (Flags & DWARF2_FLAG_EPILOGUE_BEGIN)
+ OS << " epilogue_begin";
+
+ unsigned OldFlags = getContext().getCurrentDwarfLoc().getFlags();
+ if ((Flags & DWARF2_FLAG_IS_STMT) != (OldFlags & DWARF2_FLAG_IS_STMT)) {
+ OS << " is_stmt ";
+
+ if (Flags & DWARF2_FLAG_IS_STMT)
+ OS << "1";
+ else
+ OS << "0";
+ }
+
+ if (Isa)
+ OS << "isa " << Isa;
+ if (Discriminator)
+ OS << "discriminator " << Discriminator;
+ EmitEOL();
+}
+
+bool MCAsmStreamer::EmitCFIStartProc() {
+ if (this->MCStreamer::EmitCFIStartProc())
+ return true;
+
+ OS << "\t.cfi_startproc";
EmitEOL();
+
+ return false;
+}
+
+bool MCAsmStreamer::EmitCFIEndProc() {
+ if (this->MCStreamer::EmitCFIEndProc())
+ return true;
+
+ OS << "\t.cfi_endproc";
+ EmitEOL();
+
+ return false;
+}
+
+bool MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+ if (this->MCStreamer::EmitCFIDefCfaOffset(Offset))
+ return true;
+
+ OS << "\t.cfi_def_cfa_offset " << Offset;
+ EmitEOL();
+
+ return false;
+}
+
+bool MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+ if (this->MCStreamer::EmitCFIDefCfaRegister(Register))
+ return true;
+
+ OS << "\t.cfi_def_cfa_register " << Register;
+ EmitEOL();
+
+ return false;
+}
+
+bool MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+ if (this->MCStreamer::EmitCFIOffset(Register, Offset))
+ return true;
+
+ OS << "\t.cfi_offset " << Register << ", " << Offset;
+ EmitEOL();
+
+ return false;
+}
+
+bool MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+ unsigned Encoding) {
+ if (this->MCStreamer::EmitCFIPersonality(Sym, Encoding))
+ return true;
+
+ OS << "\t.cfi_personality " << Encoding << ", " << *Sym;
+ EmitEOL();
+
+ return false;
+}
+
+bool MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+ if (this->MCStreamer::EmitCFILsda(Sym, Encoding))
+ return true;
+
+ OS << "\t.cfi_lsda " << Encoding << ", " << *Sym;
+ EmitEOL();
+
+ return false;
}
void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
@@ -610,7 +793,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
MCFixup &F = Fixups[i];
- const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
+ const MCFixupKindInfo &Info = AsmBackend->getFixupKindInfo(F.getKind());
for (unsigned j = 0; j != Info.TargetSize; ++j) {
unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j;
assert(Index < Code.size() * 8 && "Invalid offset in fixup!");
@@ -618,6 +801,8 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
}
}
+ // FIXME: Node the fixup comments for Thumb2 are completely bogus since the
+ // high order halfword of a 32-bit Thumb2 instruction is emitted first.
OS << "encoding: [";
for (unsigned i = 0, e = Code.size(); i != e; ++i) {
if (i)
@@ -637,15 +822,26 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
if (MapEntry == 0) {
OS << format("0x%02x", uint8_t(Code[i]));
} else {
- assert(Code[i] == 0 && "Encoder wrote into fixed up bit!");
- OS << char('A' + MapEntry - 1);
+ if (Code[i]) {
+ // FIXME: Some of the 8 bits require fix up.
+ OS << format("0x%02x", uint8_t(Code[i])) << '\''
+ << char('A' + MapEntry - 1) << '\'';
+ } else
+ OS << char('A' + MapEntry - 1);
}
} else {
// Otherwise, write out in binary.
OS << "0b";
for (unsigned j = 8; j--;) {
unsigned Bit = (Code[i] >> j) & 1;
- if (uint8_t MapEntry = FixupMap[i * 8 + j]) {
+
+ unsigned FixupBit;
+ if (getContext().getTargetAsmInfo().isLittleEndian())
+ FixupBit = i * 8 + j;
+ else
+ FixupBit = i * 8 + (7-j);
+
+ if (uint8_t MapEntry = FixupMap[FixupBit]) {
assert(Bit == 0 && "Encoder wrote into fixed up bit!");
OS << char('A' + MapEntry - 1);
} else
@@ -657,14 +853,17 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
MCFixup &F = Fixups[i];
- const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
+ const MCFixupKindInfo &Info = AsmBackend->getFixupKindInfo(F.getKind());
OS << " fixup " << char('A' + i) << " - " << "offset: " << F.getOffset()
<< ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n";
}
}
void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
- assert(CurSection && "Cannot emit contents before setting section!");
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+ if (!UseLoc)
+ MCLineEntry::Make(this, getCurrentSection());
// Show the encoding in a comment if we have a code emitter.
if (Emitter)
@@ -684,7 +883,7 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
EmitEOL();
}
-/// EmitRawText - If this file is backed by a assembly streamer, this dumps
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
void MCAsmStreamer::EmitRawText(StringRef String) {
@@ -695,13 +894,16 @@ void MCAsmStreamer::EmitRawText(StringRef String) {
}
void MCAsmStreamer::Finish() {
+ // Dump out the dwarf file & directory tables and line tables.
+ if (getContext().hasDwarfFiles() && !UseLoc)
+ MCDwarfFileTable::Emit(this);
}
MCStreamer *llvm::createAsmStreamer(MCContext &Context,
formatted_raw_ostream &OS,
- bool isLittleEndian,
- bool isVerboseAsm, MCInstPrinter *IP,
- MCCodeEmitter *CE, bool ShowInst) {
- return new MCAsmStreamer(Context, OS, isLittleEndian, isVerboseAsm,
- IP, CE, ShowInst);
+ bool isVerboseAsm, bool useLoc,
+ MCInstPrinter *IP, MCCodeEmitter *CE,
+ TargetAsmBackend *TAB, bool ShowInst) {
+ return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
+ IP, CE, TAB, ShowInst);
}
diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp
index f0e1d7f..9992646 100644
--- a/contrib/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@@ -11,10 +11,13 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -36,7 +39,6 @@ STATISTIC(FragmentLayouts, "Number of fragment layouts");
STATISTIC(ObjectBytes, "Number of emitted object file bytes");
STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
-STATISTIC(SectionLayouts, "Number of section layouts");
}
}
@@ -48,131 +50,78 @@ STATISTIC(SectionLayouts, "Number of section layouts");
/* *** */
MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
- : Assembler(Asm), LastValidFragment(0)
+ : Assembler(Asm), LastValidFragment()
{
// Compute the section layout order. Virtual sections must go last.
for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
- if (!Asm.getBackend().isVirtualSection(it->getSection()))
+ if (!it->getSection().isVirtualSection())
SectionOrder.push_back(&*it);
for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
- if (Asm.getBackend().isVirtualSection(it->getSection()))
+ if (it->getSection().isVirtualSection())
SectionOrder.push_back(&*it);
}
-bool MCAsmLayout::isSectionUpToDate(const MCSectionData *SD) const {
- // The first section is always up-to-date.
- unsigned Index = SD->getLayoutOrder();
- if (!Index)
- return true;
-
- // Otherwise, sections are always implicitly computed when the preceeding
- // fragment is layed out.
- const MCSectionData *Prev = getSectionOrder()[Index - 1];
- return isFragmentUpToDate(&(Prev->getFragmentList().back()));
-}
-
bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const {
- return (LastValidFragment &&
- F->getLayoutOrder() <= LastValidFragment->getLayoutOrder());
+ const MCSectionData &SD = *F->getParent();
+ const MCFragment *LastValid = LastValidFragment.lookup(&SD);
+ if (!LastValid)
+ return false;
+ assert(LastValid->getParent() == F->getParent());
+ return F->getLayoutOrder() <= LastValid->getLayoutOrder();
}
-void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) {
+void MCAsmLayout::Invalidate(MCFragment *F) {
// If this fragment wasn't already up-to-date, we don't need to do anything.
if (!isFragmentUpToDate(F))
return;
- // Otherwise, reset the last valid fragment to the predecessor of the
- // invalidated fragment.
- LastValidFragment = F->getPrevNode();
- if (!LastValidFragment) {
- unsigned Index = F->getParent()->getLayoutOrder();
- if (Index != 0) {
- MCSectionData *Prev = getSectionOrder()[Index - 1];
- LastValidFragment = &(Prev->getFragmentList().back());
- }
- }
+ // Otherwise, reset the last valid fragment to this fragment.
+ const MCSectionData &SD = *F->getParent();
+ LastValidFragment[&SD] = F;
}
void MCAsmLayout::EnsureValid(const MCFragment *F) const {
+ MCSectionData &SD = *F->getParent();
+
+ MCFragment *Cur = LastValidFragment[&SD];
+ if (!Cur)
+ Cur = &*SD.begin();
+ else
+ Cur = Cur->getNextNode();
+
// Advance the layout position until the fragment is up-to-date.
while (!isFragmentUpToDate(F)) {
- // Advance to the next fragment.
- MCFragment *Cur = LastValidFragment;
- if (Cur)
- Cur = Cur->getNextNode();
- if (!Cur) {
- unsigned NextIndex = 0;
- if (LastValidFragment)
- NextIndex = LastValidFragment->getParent()->getLayoutOrder() + 1;
- Cur = SectionOrder[NextIndex]->begin();
- }
-
const_cast<MCAsmLayout*>(this)->LayoutFragment(Cur);
+ Cur = Cur->getNextNode();
}
}
-void MCAsmLayout::FragmentReplaced(MCFragment *Src, MCFragment *Dst) {
- if (LastValidFragment == Src)
- LastValidFragment = Dst;
-
- Dst->Offset = Src->Offset;
- Dst->EffectiveSize = Src->EffectiveSize;
-}
-
-uint64_t MCAsmLayout::getFragmentAddress(const MCFragment *F) const {
- assert(F->getParent() && "Missing section()!");
- return getSectionAddress(F->getParent()) + getFragmentOffset(F);
-}
-
-uint64_t MCAsmLayout::getFragmentEffectiveSize(const MCFragment *F) const {
- EnsureValid(F);
- assert(F->EffectiveSize != ~UINT64_C(0) && "Address not set!");
- return F->EffectiveSize;
-}
-
uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
EnsureValid(F);
assert(F->Offset != ~UINT64_C(0) && "Address not set!");
return F->Offset;
}
-uint64_t MCAsmLayout::getSymbolAddress(const MCSymbolData *SD) const {
- assert(SD->getFragment() && "Invalid getAddress() on undefined symbol!");
- return getFragmentAddress(SD->getFragment()) + SD->getOffset();
-}
-
-uint64_t MCAsmLayout::getSectionAddress(const MCSectionData *SD) const {
- EnsureValid(SD->begin());
- assert(SD->Address != ~UINT64_C(0) && "Address not set!");
- return SD->Address;
+uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
+ assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!");
+ return getFragmentOffset(SD->getFragment()) + SD->getOffset();
}
uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
// The size is the last fragment's end offset.
const MCFragment &F = SD->getFragmentList().back();
- return getFragmentOffset(&F) + getFragmentEffectiveSize(&F);
+ return getFragmentOffset(&F) + getAssembler().ComputeFragmentSize(*this, F);
}
uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
// Virtual sections have no file size.
- if (getAssembler().getBackend().isVirtualSection(SD->getSection()))
+ if (SD->getSection().isVirtualSection())
return 0;
// Otherwise, the file size is the same as the address space size.
return getSectionAddressSize(SD);
}
-uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const {
- // The logical size is the address space size minus any tail padding.
- uint64_t Size = getSectionAddressSize(SD);
- const MCAlignFragment *AF =
- dyn_cast<MCAlignFragment>(&(SD->getFragmentList().back()));
- if (AF && AF->hasOnlyAlignAddress())
- Size -= getFragmentEffectiveSize(AF);
-
- return Size;
-}
-
/* *** */
MCFragment::MCFragment() : Kind(FragmentType(~0)) {
@@ -182,8 +131,7 @@ MCFragment::~MCFragment() {
}
MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
- : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0)),
- EffectiveSize(~UINT64_C(0))
+ : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0))
{
if (Parent)
Parent->getFragmentList().push_back(this);
@@ -195,8 +143,8 @@ MCSectionData::MCSectionData() : Section(0) {}
MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
: Section(&_Section),
+ Ordinal(~UINT32_C(0)),
Alignment(1),
- Address(~UINT64_C(0)),
HasInstructions(false)
{
if (A)
@@ -220,99 +168,17 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
/* *** */
-MCAssembler::MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend,
- MCCodeEmitter &_Emitter, raw_ostream &_OS)
- : Context(_Context), Backend(_Backend), Emitter(_Emitter),
- OS(_OS), RelaxAll(false), SubsectionsViaSymbols(false)
+MCAssembler::MCAssembler(MCContext &Context_, TargetAsmBackend &Backend_,
+ MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
+ raw_ostream &OS_)
+ : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
+ OS(OS_), RelaxAll(false), NoExecStack(false), SubsectionsViaSymbols(false)
{
}
MCAssembler::~MCAssembler() {
}
-static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
- const MCFixup &Fixup,
- const MCValue Target,
- const MCSection *BaseSection) {
- // The effective fixup address is
- // addr(atom(A)) + offset(A)
- // - addr(atom(B)) - offset(B)
- // - addr(<base symbol>) + <fixup offset from base symbol>
- // and the offsets are not relocatable, so the fixup is fully resolved when
- // addr(atom(A)) - addr(atom(B)) - addr(<base symbol>)) == 0.
- //
- // The simple (Darwin, except on x86_64) way of dealing with this was to
- // assume that any reference to a temporary symbol *must* be a temporary
- // symbol in the same atom, unless the sections differ. Therefore, any PCrel
- // relocation to a temporary symbol (in the same section) is fully
- // resolved. This also works in conjunction with absolutized .set, which
- // requires the compiler to use .set to absolutize the differences between
- // symbols which the compiler knows to be assembly time constants, so we don't
- // need to worry about considering symbol differences fully resolved.
-
- // Non-relative fixups are only resolved if constant.
- if (!BaseSection)
- return Target.isAbsolute();
-
- // Otherwise, relative fixups are only resolved if not a difference and the
- // target is a temporary in the same section.
- if (Target.isAbsolute() || Target.getSymB())
- return false;
-
- const MCSymbol *A = &Target.getSymA()->getSymbol();
- if (!A->isTemporary() || !A->isInSection() ||
- &A->getSection() != BaseSection)
- return false;
-
- return true;
-}
-
-static bool isScatteredFixupFullyResolved(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFixup &Fixup,
- const MCValue Target,
- const MCSymbolData *BaseSymbol) {
- // The effective fixup address is
- // addr(atom(A)) + offset(A)
- // - addr(atom(B)) - offset(B)
- // - addr(BaseSymbol) + <fixup offset from base symbol>
- // and the offsets are not relocatable, so the fixup is fully resolved when
- // addr(atom(A)) - addr(atom(B)) - addr(BaseSymbol) == 0.
- //
- // Note that "false" is almost always conservatively correct (it means we emit
- // a relocation which is unnecessary), except when it would force us to emit a
- // relocation which the target cannot encode.
-
- const MCSymbolData *A_Base = 0, *B_Base = 0;
- if (const MCSymbolRefExpr *A = Target.getSymA()) {
- // Modified symbol references cannot be resolved.
- if (A->getKind() != MCSymbolRefExpr::VK_None)
- return false;
-
- A_Base = Asm.getAtom(Layout, &Asm.getSymbolData(A->getSymbol()));
- if (!A_Base)
- return false;
- }
-
- if (const MCSymbolRefExpr *B = Target.getSymB()) {
- // Modified symbol references cannot be resolved.
- if (B->getKind() != MCSymbolRefExpr::VK_None)
- return false;
-
- B_Base = Asm.getAtom(Layout, &Asm.getSymbolData(B->getSymbol()));
- if (!B_Base)
- return false;
- }
-
- // If there is no base, A and B have to be the same atom for this fixup to be
- // fully resolved.
- if (!BaseSymbol)
- return A_Base == B_Base;
-
- // Otherwise, B must be missing and A must be the base.
- return !B_Base && BaseSymbol == A_Base;
-}
-
bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
// Non-temporary labels should always be visible to the linker.
if (!Symbol.isTemporary())
@@ -326,8 +192,7 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
return getBackend().doesSectionRequireSymbols(Symbol.getSection());
}
-const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout,
- const MCSymbolData *SD) const {
+const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
// Linker visible symbols define atoms.
if (isSymbolLinkerVisible(SD->getSymbol()))
return SD;
@@ -351,67 +216,78 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
MCValue &Target, uint64_t &Value) const {
++stats::EvaluateFixup;
- if (!Fixup.getValue()->EvaluateAsRelocatable(Target, &Layout))
+ if (!Fixup.getValue()->EvaluateAsRelocatable(Target, Layout))
report_fatal_error("expected relocatable expression");
- // FIXME: How do non-scattered symbols work in ELF? I presume the linker
- // doesn't support small relocations, but then under what criteria does the
- // assembler allow symbol differences?
+ bool IsPCRel = Backend.getFixupKindInfo(
+ Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
+
+ bool IsResolved;
+ if (IsPCRel) {
+ if (Target.getSymB()) {
+ IsResolved = false;
+ } else if (!Target.getSymA()) {
+ IsResolved = false;
+ } else {
+ const MCSymbolRefExpr *A = Target.getSymA();
+ const MCSymbol &SA = A->getSymbol();
+ if (A->getKind() != MCSymbolRefExpr::VK_None ||
+ SA.AliasedSymbol().isUndefined()) {
+ IsResolved = false;
+ } else {
+ const MCSymbolData &DataA = getSymbolData(SA);
+ IsResolved =
+ getWriter().IsSymbolRefDifferenceFullyResolvedImpl(*this, DataA,
+ *DF, false, true);
+ }
+ }
+ } else {
+ IsResolved = Target.isAbsolute();
+ }
Value = Target.getConstant();
- bool IsPCRel = Emitter.getFixupKindInfo(
- Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
- bool IsResolved = true;
+ bool IsThumb = false;
if (const MCSymbolRefExpr *A = Target.getSymA()) {
- if (A->getSymbol().isDefined())
- Value += Layout.getSymbolAddress(&getSymbolData(A->getSymbol()));
- else
- IsResolved = false;
+ const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+ if (Sym.isDefined())
+ Value += Layout.getSymbolOffset(&getSymbolData(Sym));
+ if (isThumbFunc(&Sym))
+ IsThumb = true;
}
if (const MCSymbolRefExpr *B = Target.getSymB()) {
- if (B->getSymbol().isDefined())
- Value -= Layout.getSymbolAddress(&getSymbolData(B->getSymbol()));
- else
- IsResolved = false;
+ const MCSymbol &Sym = B->getSymbol().AliasedSymbol();
+ if (Sym.isDefined())
+ Value -= Layout.getSymbolOffset(&getSymbolData(Sym));
}
- // If we are using scattered symbols, determine whether this value is actually
- // resolved; scattering may cause atoms to move.
- if (IsResolved && getBackend().hasScatteredSymbols()) {
- if (getBackend().hasReliableSymbolDifference()) {
- // If this is a PCrel relocation, find the base atom (identified by its
- // symbol) that the fixup value is relative to.
- const MCSymbolData *BaseSymbol = 0;
- if (IsPCRel) {
- BaseSymbol = DF->getAtom();
- if (!BaseSymbol)
- IsResolved = false;
- }
- if (IsResolved)
- IsResolved = isScatteredFixupFullyResolved(*this, Layout, Fixup, Target,
- BaseSymbol);
- } else {
- const MCSection *BaseSection = 0;
- if (IsPCRel)
- BaseSection = &DF->getParent()->getSection();
+ bool ShouldAlignPC = Backend.getFixupKindInfo(Fixup.getKind()).Flags &
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits;
+ assert((ShouldAlignPC ? IsPCRel : true) &&
+ "FKF_IsAlignedDownTo32Bits is only allowed on PC-relative fixups!");
- IsResolved = isScatteredFixupFullyResolvedSimple(*this, Fixup, Target,
- BaseSection);
- }
+ if (IsPCRel) {
+ uint32_t Offset = Layout.getFragmentOffset(DF) + Fixup.getOffset();
+
+ // A number of ARM fixups in Thumb mode require that the effective PC
+ // address be determined as the 32-bit aligned version of the actual offset.
+ if (ShouldAlignPC) Offset &= ~0x3;
+ Value -= Offset;
}
- if (IsPCRel)
- Value -= Layout.getFragmentAddress(DF) + Fixup.getOffset();
+ // ARM fixups based from a thumb function address need to have the low
+ // bit set. The actual value is always at least 16-bit aligned, so the
+ // low bit is normally clear and available for use as an ISA flag for
+ // interworking.
+ if (IsThumb)
+ Value |= 1;
return IsResolved;
}
-uint64_t MCAssembler::ComputeFragmentSize(MCAsmLayout &Layout,
- const MCFragment &F,
- uint64_t SectionAddress,
- uint64_t FragmentOffset) const {
+uint64_t MCAssembler::ComputeFragmentSize(const MCAsmLayout &Layout,
+ const MCFragment &F) const {
switch (F.getKind()) {
case MCFragment::FT_Data:
return cast<MCDataFragment>(F).getContents().size();
@@ -420,62 +296,48 @@ uint64_t MCAssembler::ComputeFragmentSize(MCAsmLayout &Layout,
case MCFragment::FT_Inst:
return cast<MCInstFragment>(F).getInstSize();
+ case MCFragment::FT_LEB:
+ return cast<MCLEBFragment>(F).getContents().size();
+
case MCFragment::FT_Align: {
const MCAlignFragment &AF = cast<MCAlignFragment>(F);
-
- assert((!AF.hasOnlyAlignAddress() || !AF.getNextNode()) &&
- "Invalid OnlyAlignAddress bit, not the last fragment!");
-
- uint64_t Size = OffsetToAlignment(SectionAddress + FragmentOffset,
- AF.getAlignment());
-
- // Honor MaxBytesToEmit.
+ unsigned Offset = Layout.getFragmentOffset(&AF);
+ unsigned Size = OffsetToAlignment(Offset, AF.getAlignment());
if (Size > AF.getMaxBytesToEmit())
return 0;
-
return Size;
}
case MCFragment::FT_Org: {
- const MCOrgFragment &OF = cast<MCOrgFragment>(F);
-
- // FIXME: We should compute this sooner, we don't want to recurse here, and
- // we would like to be more functional.
+ MCOrgFragment &OF = cast<MCOrgFragment>(F);
int64_t TargetLocation;
- if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout))
+ if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, Layout))
report_fatal_error("expected assembly-time absolute expression");
// FIXME: We need a way to communicate this error.
- int64_t Offset = TargetLocation - FragmentOffset;
- if (Offset < 0)
+ uint64_t FragmentOffset = Layout.getFragmentOffset(&OF);
+ int64_t Size = TargetLocation - FragmentOffset;
+ if (Size < 0 || Size >= 0x40000000)
report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
- "' (at offset '" + Twine(FragmentOffset) + "'");
-
- return Offset;
+ "' (at offset '" + Twine(FragmentOffset) + "')");
+ return Size;
}
+
+ case MCFragment::FT_Dwarf:
+ return cast<MCDwarfLineAddrFragment>(F).getContents().size();
+ case MCFragment::FT_DwarfFrame:
+ return cast<MCDwarfCallFrameFragment>(F).getContents().size();
}
assert(0 && "invalid fragment kind");
return 0;
}
-void MCAsmLayout::LayoutFile() {
- // Initialize the first section and set the valid fragment layout point. All
- // actual layout computations are done lazily.
- LastValidFragment = 0;
- if (!getSectionOrder().empty())
- getSectionOrder().front()->Address = 0;
-}
-
void MCAsmLayout::LayoutFragment(MCFragment *F) {
MCFragment *Prev = F->getPrevNode();
// We should never try to recompute something which is up-to-date.
assert(!isFragmentUpToDate(F) && "Attempt to recompute up-to-date fragment!");
- // We should never try to compute the fragment layout if the section isn't
- // up-to-date.
- assert(isSectionUpToDate(F->getParent()) &&
- "Attempt to compute fragment before it's section!");
// We should never try to compute the fragment layout if it's predecessor
// isn't up-to-date.
assert((!Prev || isFragmentUpToDate(Prev)) &&
@@ -483,55 +345,26 @@ void MCAsmLayout::LayoutFragment(MCFragment *F) {
++stats::FragmentLayouts;
- // Compute the fragment start address.
- uint64_t StartAddress = F->getParent()->Address;
- uint64_t Address = StartAddress;
- if (Prev)
- Address += Prev->Offset + Prev->EffectiveSize;
-
// Compute fragment offset and size.
- F->Offset = Address - StartAddress;
- F->EffectiveSize = getAssembler().ComputeFragmentSize(*this, *F, StartAddress,
- F->Offset);
- LastValidFragment = F;
-
- // If this is the last fragment in a section, update the next section address.
- if (!F->getNextNode()) {
- unsigned NextIndex = F->getParent()->getLayoutOrder() + 1;
- if (NextIndex != getSectionOrder().size())
- LayoutSection(getSectionOrder()[NextIndex]);
- }
-}
-
-void MCAsmLayout::LayoutSection(MCSectionData *SD) {
- unsigned SectionOrderIndex = SD->getLayoutOrder();
-
- ++stats::SectionLayouts;
-
- // Compute the section start address.
- uint64_t StartAddress = 0;
- if (SectionOrderIndex) {
- MCSectionData *Prev = getSectionOrder()[SectionOrderIndex - 1];
- StartAddress = getSectionAddress(Prev) + getSectionAddressSize(Prev);
- }
-
- // Honor the section alignment requirements.
- StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
+ uint64_t Offset = 0;
+ if (Prev)
+ Offset += Prev->Offset + getAssembler().ComputeFragmentSize(*this, *Prev);
- // Set the section address.
- SD->Address = StartAddress;
+ F->Offset = Offset;
+ LastValidFragment[F->getParent()] = F;
}
/// WriteFragmentData - Write the \arg F data to the output file.
static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment &F, MCObjectWriter *OW) {
+ const MCFragment &F) {
+ MCObjectWriter *OW = &Asm.getWriter();
uint64_t Start = OW->getStream().tell();
(void) Start;
++stats::EmittedFragments;
// FIXME: Embed in fragments instead?
- uint64_t FragmentSize = Layout.getFragmentEffectiveSize(&F);
+ uint64_t FragmentSize = Asm.ComputeFragmentSize(Layout, F);
switch (F.getKind()) {
case MCFragment::FT_Align: {
MCAlignFragment &AF = cast<MCAlignFragment>(F);
@@ -598,9 +431,17 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
break;
}
- case MCFragment::FT_Inst:
- llvm_unreachable("unexpected inst fragment after lowering");
+ case MCFragment::FT_Inst: {
+ MCInstFragment &IF = cast<MCInstFragment>(F);
+ OW->WriteBytes(StringRef(IF.getCode().begin(), IF.getCode().size()));
+ break;
+ }
+
+ case MCFragment::FT_LEB: {
+ MCLEBFragment &LF = cast<MCLEBFragment>(F);
+ OW->WriteBytes(LF.getContents().str());
break;
+ }
case MCFragment::FT_Org: {
MCOrgFragment &OF = cast<MCOrgFragment>(F);
@@ -610,16 +451,26 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
break;
}
+
+ case MCFragment::FT_Dwarf: {
+ const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F);
+ OW->WriteBytes(OF.getContents().str());
+ break;
+ }
+ case MCFragment::FT_DwarfFrame: {
+ const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F);
+ OW->WriteBytes(CF.getContents().str());
+ break;
+ }
}
assert(OW->getStream().tell() - Start == FragmentSize);
}
void MCAssembler::WriteSectionData(const MCSectionData *SD,
- const MCAsmLayout &Layout,
- MCObjectWriter *OW) const {
+ const MCAsmLayout &Layout) const {
// Ignore virtual sections.
- if (getBackend().isVirtualSection(SD->getSection())) {
+ if (SD->getSection().isVirtualSection()) {
assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!");
// Check that contents are only things legal inside a virtual section.
@@ -657,51 +508,34 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
return;
}
- uint64_t Start = OW->getStream().tell();
+ uint64_t Start = getWriter().getStream().tell();
(void) Start;
for (MCSectionData::const_iterator it = SD->begin(),
ie = SD->end(); it != ie; ++it)
- WriteFragmentData(*this, Layout, *it, OW);
+ WriteFragmentData(*this, Layout, *it);
- assert(OW->getStream().tell() - Start == Layout.getSectionFileSize(SD));
+ assert(getWriter().getStream().tell() - Start ==
+ Layout.getSectionAddressSize(SD));
}
-void MCAssembler::AddSectionToTheEnd(MCSectionData &SD, MCAsmLayout &Layout) {
- // Create dummy fragments and assign section ordinals.
- unsigned SectionIndex = 0;
- for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it)
- SectionIndex++;
-
- SD.setOrdinal(SectionIndex);
-
- // Assign layout order indices to sections and fragments.
- unsigned FragmentIndex = 0;
- unsigned i = 0;
- for (unsigned e = Layout.getSectionOrder().size(); i != e; ++i) {
- MCSectionData *SD = Layout.getSectionOrder()[i];
- for (MCSectionData::iterator it2 = SD->begin(),
- ie2 = SD->end(); it2 != ie2; ++it2)
- FragmentIndex++;
- }
+uint64_t MCAssembler::HandleFixup(const MCAsmLayout &Layout,
+ MCFragment &F,
+ const MCFixup &Fixup) {
+ // Evaluate the fixup.
+ MCValue Target;
+ uint64_t FixedValue;
+ if (!EvaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
+ // The fixup was unresolved, we need a relocation. Inform the object
+ // writer of the relocation, and give it an opportunity to adjust the
+ // fixup value if need be.
+ getWriter().RecordRelocation(*this, Layout, &F, Fixup, Target, FixedValue);
+ }
+ return FixedValue;
+ }
- SD.setLayoutOrder(i);
- for (MCSectionData::iterator it2 = SD.begin(),
- ie2 = SD.end(); it2 != ie2; ++it2) {
- it2->setLayoutOrder(FragmentIndex++);
- }
- Layout.getSectionOrder().push_back(&SD);
-
- Layout.LayoutSection(&SD);
-
- // Layout until everything fits.
- while (LayoutOnce(Layout))
- continue;
-
-}
-
-void MCAssembler::Finish(MCObjectWriter *Writer) {
+void MCAssembler::Finish() {
DEBUG_WITH_TYPE("mc-dump", {
llvm::errs() << "assembler backend - pre-layout\n--\n";
dump(); });
@@ -709,47 +543,23 @@ void MCAssembler::Finish(MCObjectWriter *Writer) {
// Create the layout object.
MCAsmLayout Layout(*this);
- // Insert additional align fragments for concrete sections to explicitly pad
- // the previous section to match their alignment requirements. This is for
- // 'gas' compatibility, it shouldn't strictly be necessary.
- //
- // FIXME: This may be Mach-O specific.
- for (unsigned i = 1, e = Layout.getSectionOrder().size(); i < e; ++i) {
- MCSectionData *SD = Layout.getSectionOrder()[i];
-
- // Ignore sections without alignment requirements.
- unsigned Align = SD->getAlignment();
- if (Align <= 1)
- continue;
-
- // Ignore virtual sections, they don't cause file size modifications.
- if (getBackend().isVirtualSection(SD->getSection()))
- continue;
-
- // Otherwise, create a new align fragment at the end of the previous
- // section.
- MCAlignFragment *AF = new MCAlignFragment(Align, 0, 1, Align,
- Layout.getSectionOrder()[i - 1]);
- AF->setOnlyAlignAddress(true);
- }
-
// Create dummy fragments and assign section ordinals.
unsigned SectionIndex = 0;
for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
// Create dummy fragments to eliminate any empty sections, this simplifies
// layout.
if (it->getFragmentList().empty())
- new MCFillFragment(0, 1, 0, it);
+ new MCDataFragment(it);
it->setOrdinal(SectionIndex++);
}
// Assign layout order indices to sections and fragments.
- unsigned FragmentIndex = 0;
for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) {
MCSectionData *SD = Layout.getSectionOrder()[i];
SD->setLayoutOrder(i);
+ unsigned FragmentIndex = 0;
for (MCSectionData::iterator it2 = SD->begin(),
ie2 = SD->end(); it2 != ie2; ++it2)
it2->setLayoutOrder(FragmentIndex++);
@@ -772,48 +582,39 @@ void MCAssembler::Finish(MCObjectWriter *Writer) {
uint64_t StartOffset = OS.tell();
- llvm::OwningPtr<MCObjectWriter> OwnWriter(0);
- if (Writer == 0) {
- //no custom Writer_ : create the default one life-managed by OwningPtr
- OwnWriter.reset(getBackend().createObjectWriter(OS));
- Writer = OwnWriter.get();
- if (!Writer)
- report_fatal_error("unable to create object writer!");
- }
-
// Allow the object writer a chance to perform post-layout binding (for
// example, to set the index fields in the symbol data).
- Writer->ExecutePostLayoutBinding(*this);
+ getWriter().ExecutePostLayoutBinding(*this, Layout);
// Evaluate and apply the fixups, generating relocation entries as necessary.
for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
for (MCSectionData::iterator it2 = it->begin(),
ie2 = it->end(); it2 != ie2; ++it2) {
MCDataFragment *DF = dyn_cast<MCDataFragment>(it2);
- if (!DF)
- continue;
-
- for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
- ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
- MCFixup &Fixup = *it3;
-
- // Evaluate the fixup.
- MCValue Target;
- uint64_t FixedValue;
- if (!EvaluateFixup(Layout, Fixup, DF, Target, FixedValue)) {
- // The fixup was unresolved, we need a relocation. Inform the object
- // writer of the relocation, and give it an opportunity to adjust the
- // fixup value if need be.
- Writer->RecordRelocation(*this, Layout, DF, Fixup, Target,FixedValue);
+ if (DF) {
+ for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
+ ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
+ MCFixup &Fixup = *it3;
+ uint64_t FixedValue = HandleFixup(Layout, *DF, Fixup);
+ getBackend().ApplyFixup(Fixup, DF->getContents().data(),
+ DF->getContents().size(), FixedValue);
+ }
+ }
+ MCInstFragment *IF = dyn_cast<MCInstFragment>(it2);
+ if (IF) {
+ for (MCInstFragment::fixup_iterator it3 = IF->fixup_begin(),
+ ie3 = IF->fixup_end(); it3 != ie3; ++it3) {
+ MCFixup &Fixup = *it3;
+ uint64_t FixedValue = HandleFixup(Layout, *IF, Fixup);
+ getBackend().ApplyFixup(Fixup, IF->getCode().data(),
+ IF->getCode().size(), FixedValue);
}
-
- getBackend().ApplyFixup(Fixup, *DF, FixedValue);
}
}
}
// Write the object file.
- Writer->WriteObject(*this, Layout);
+ getWriter().WriteObject(*this, Layout);
stats::ObjectBytes += OS.tell() - StartOffset;
}
@@ -852,100 +653,144 @@ bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
return false;
}
-bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
- ++stats::RelaxationSteps;
+bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
+ MCInstFragment &IF) {
+ if (!FragmentNeedsRelaxation(&IF, Layout))
+ return false;
- // Layout the sections in order.
- Layout.LayoutFile();
+ ++stats::RelaxedInstructions;
+ // FIXME-PERF: We could immediately lower out instructions if we can tell
+ // they are fully resolved, to avoid retesting on later passes.
+
+ // Relax the fragment.
+
+ MCInst Relaxed;
+ getBackend().RelaxInstruction(IF.getInst(), Relaxed);
+
+ // Encode the new instruction.
+ //
+ // FIXME-PERF: If it matters, we could let the target do this. It can
+ // probably do so more efficiently in many cases.
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups);
+ VecOS.flush();
+
+ // Update the instruction fragment.
+ IF.setInst(Relaxed);
+ IF.getCode() = Code;
+ IF.getFixups().clear();
+ // FIXME: Eliminate copy.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+ IF.getFixups().push_back(Fixups[i]);
+
+ return true;
+}
+
+bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
+ int64_t Value = 0;
+ uint64_t OldSize = LF.getContents().size();
+ LF.getValue().EvaluateAsAbsolute(Value, Layout);
+ SmallString<8> &Data = LF.getContents();
+ Data.clear();
+ raw_svector_ostream OSE(Data);
+ if (LF.isSigned())
+ MCObjectWriter::EncodeSLEB128(Value, OSE);
+ else
+ MCObjectWriter::EncodeULEB128(Value, OSE);
+ OSE.flush();
+ return OldSize != LF.getContents().size();
+}
+
+bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
+ MCDwarfLineAddrFragment &DF) {
+ int64_t AddrDelta = 0;
+ uint64_t OldSize = DF.getContents().size();
+ bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
+ (void)IsAbs;
+ assert(IsAbs);
+ int64_t LineDelta;
+ LineDelta = DF.getLineDelta();
+ SmallString<8> &Data = DF.getContents();
+ Data.clear();
+ raw_svector_ostream OSE(Data);
+ MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OSE);
+ OSE.flush();
+ return OldSize != Data.size();
+}
+
+bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+ MCDwarfCallFrameFragment &DF) {
+ int64_t AddrDelta = 0;
+ uint64_t OldSize = DF.getContents().size();
+ bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
+ (void)IsAbs;
+ assert(IsAbs);
+ SmallString<8> &Data = DF.getContents();
+ Data.clear();
+ raw_svector_ostream OSE(Data);
+ MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE);
+ OSE.flush();
+ return OldSize != Data.size();
+}
+
+bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
+ MCSectionData &SD) {
+ MCFragment *FirstInvalidFragment = NULL;
// Scan for fragments that need relaxation.
+ for (MCSectionData::iterator it2 = SD.begin(),
+ ie2 = SD.end(); it2 != ie2; ++it2) {
+ // Check if this is an fragment that needs relaxation.
+ bool relaxedFrag = false;
+ switch(it2->getKind()) {
+ default:
+ break;
+ case MCFragment::FT_Inst:
+ relaxedFrag = RelaxInstruction(Layout, *cast<MCInstFragment>(it2));
+ break;
+ case MCFragment::FT_Dwarf:
+ relaxedFrag = RelaxDwarfLineAddr(Layout,
+ *cast<MCDwarfLineAddrFragment>(it2));
+ break;
+ case MCFragment::FT_DwarfFrame:
+ relaxedFrag =
+ RelaxDwarfCallFrameFragment(Layout,
+ *cast<MCDwarfCallFrameFragment>(it2));
+ break;
+ case MCFragment::FT_LEB:
+ relaxedFrag = RelaxLEB(Layout, *cast<MCLEBFragment>(it2));
+ break;
+ }
+ // Update the layout, and remember that we relaxed.
+ if (relaxedFrag && !FirstInvalidFragment)
+ FirstInvalidFragment = it2;
+ }
+ if (FirstInvalidFragment) {
+ Layout.Invalidate(FirstInvalidFragment);
+ return true;
+ }
+ return false;
+}
+
+bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
+ ++stats::RelaxationSteps;
+
bool WasRelaxed = false;
for (iterator it = begin(), ie = end(); it != ie; ++it) {
MCSectionData &SD = *it;
-
- for (MCSectionData::iterator it2 = SD.begin(),
- ie2 = SD.end(); it2 != ie2; ++it2) {
- // Check if this is an instruction fragment that needs relaxation.
- MCInstFragment *IF = dyn_cast<MCInstFragment>(it2);
- if (!IF || !FragmentNeedsRelaxation(IF, Layout))
- continue;
-
- ++stats::RelaxedInstructions;
-
- // FIXME-PERF: We could immediately lower out instructions if we can tell
- // they are fully resolved, to avoid retesting on later passes.
-
- // Relax the fragment.
-
- MCInst Relaxed;
- getBackend().RelaxInstruction(IF->getInst(), Relaxed);
-
- // Encode the new instruction.
- //
- // FIXME-PERF: If it matters, we could let the target do this. It can
- // probably do so more efficiently in many cases.
- SmallVector<MCFixup, 4> Fixups;
- SmallString<256> Code;
- raw_svector_ostream VecOS(Code);
- getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups);
- VecOS.flush();
-
- // Update the instruction fragment.
- int SlideAmount = Code.size() - IF->getInstSize();
- IF->setInst(Relaxed);
- IF->getCode() = Code;
- IF->getFixups().clear();
- // FIXME: Eliminate copy.
- for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
- IF->getFixups().push_back(Fixups[i]);
-
- // Update the layout, and remember that we relaxed.
- Layout.UpdateForSlide(IF, SlideAmount);
+ while(LayoutSectionOnce(Layout, SD))
WasRelaxed = true;
- }
}
return WasRelaxed;
}
void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
- // Lower out any instruction fragments, to simplify the fixup application and
- // output.
- //
- // FIXME-PERF: We don't have to do this, but the assumption is that it is
- // cheap (we will mostly end up eliminating fragments and appending on to data
- // fragments), so the extra complexity downstream isn't worth it. Evaluate
- // this assumption.
- for (iterator it = begin(), ie = end(); it != ie; ++it) {
- MCSectionData &SD = *it;
-
- for (MCSectionData::iterator it2 = SD.begin(),
- ie2 = SD.end(); it2 != ie2; ++it2) {
- MCInstFragment *IF = dyn_cast<MCInstFragment>(it2);
- if (!IF)
- continue;
-
- // Create a new data fragment for the instruction.
- //
- // FIXME-PERF: Reuse previous data fragment if possible.
- MCDataFragment *DF = new MCDataFragment();
- SD.getFragmentList().insert(it2, DF);
-
- // Update the data fragments layout data.
- DF->setParent(IF->getParent());
- DF->setAtom(IF->getAtom());
- DF->setLayoutOrder(IF->getLayoutOrder());
- Layout.FragmentReplaced(IF, DF);
-
- // Copy in the data and the fixups.
- DF->getContents().append(IF->getCode().begin(), IF->getCode().end());
- for (unsigned i = 0, e = IF->getFixups().size(); i != e; ++i)
- DF->getFixups().push_back(IF->getFixups()[i]);
-
- // Delete the instruction fragment and update the iterator.
- SD.getFragmentList().erase(IF);
- it2 = DF;
- }
+ // The layout is done. Mark every fragment as valid.
+ for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
+ Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
}
}
@@ -972,18 +817,19 @@ void MCFragment::dump() {
case MCFragment::FT_Fill: OS << "MCFillFragment"; break;
case MCFragment::FT_Inst: OS << "MCInstFragment"; break;
case MCFragment::FT_Org: OS << "MCOrgFragment"; break;
+ case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
+ case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
+ case MCFragment::FT_LEB: OS << "MCLEBFragment"; break;
}
OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
- << " Offset:" << Offset << " EffectiveSize:" << EffectiveSize << ">";
+ << " Offset:" << Offset << ">";
switch (getKind()) {
case MCFragment::FT_Align: {
const MCAlignFragment *AF = cast<MCAlignFragment>(this);
if (AF->hasEmitNops())
OS << " (emit nops)";
- if (AF->hasOnlyAlignAddress())
- OS << " (only align section)";
OS << "\n ";
OS << " Alignment:" << AF->getAlignment()
<< " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
@@ -1032,6 +878,25 @@ void MCFragment::dump() {
OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
break;
}
+ case MCFragment::FT_Dwarf: {
+ const MCDwarfLineAddrFragment *OF = cast<MCDwarfLineAddrFragment>(this);
+ OS << "\n ";
+ OS << " AddrDelta:" << OF->getAddrDelta()
+ << " LineDelta:" << OF->getLineDelta();
+ break;
+ }
+ case MCFragment::FT_DwarfFrame: {
+ const MCDwarfCallFrameFragment *CF = cast<MCDwarfCallFrameFragment>(this);
+ OS << "\n ";
+ OS << " AddrDelta:" << CF->getAddrDelta();
+ break;
+ }
+ case MCFragment::FT_LEB: {
+ const MCLEBFragment *LF = cast<MCLEBFragment>(this);
+ OS << "\n ";
+ OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
+ break;
+ }
}
OS << ">";
}
@@ -1040,8 +905,7 @@ void MCSectionData::dump() {
raw_ostream &OS = llvm::errs();
OS << "<MCSectionData";
- OS << " Alignment:" << getAlignment() << " Address:" << Address
- << " Fragments:[\n ";
+ OS << " Alignment:" << getAlignment() << " Fragments:[\n ";
for (iterator it = begin(), ie = end(); it != ie; ++it) {
if (it != begin()) OS << ",\n ";
it->dump();
diff --git a/contrib/llvm/lib/MC/MCCodeEmitter.cpp b/contrib/llvm/lib/MC/MCCodeEmitter.cpp
index d513237..c122763 100644
--- a/contrib/llvm/lib/MC/MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/MC/MCCodeEmitter.cpp
@@ -16,15 +16,3 @@ MCCodeEmitter::MCCodeEmitter() {
MCCodeEmitter::~MCCodeEmitter() {
}
-
-const MCFixupKindInfo &MCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const {
- static const MCFixupKindInfo Builtins[] = {
- { "FK_Data_1", 0, 8, 0 },
- { "FK_Data_2", 0, 16, 0 },
- { "FK_Data_4", 0, 32, 0 },
- { "FK_Data_8", 0, 64, 0 }
- };
-
- assert(Kind <= 3 && "Unknown fixup kind");
- return Builtins[Kind];
-}
diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp
index e5586a0..018f00c 100644
--- a/contrib/llvm/lib/MC/MCContext.cpp
+++ b/contrib/llvm/lib/MC/MCContext.cpp
@@ -15,8 +15,10 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCLabel.h"
#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
@@ -24,8 +26,9 @@ typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
-MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0),
- CurrentDwarfLoc(0,0,0,0,0) {
+MCContext::MCContext(const MCAsmInfo &mai, const TargetAsmInfo *tai) :
+ MAI(mai), TAI(tai), NextUniqueID(0),
+ CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0) {
MachOUniquingMap = 0;
ELFUniquingMap = 0;
COFFUniquingMap = 0;
@@ -40,7 +43,7 @@ MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0),
MCContext::~MCContext() {
// NOTE: The symbols are all allocated out of a bump pointer allocator,
// we don't need to free them here.
-
+
// If we have the MachO uniquing map, free it.
delete (MachOUniqueMapTy*)MachOUniquingMap;
delete (ELFUniqueMapTy*)ELFUniquingMap;
@@ -48,6 +51,8 @@ MCContext::~MCContext() {
// If the stream for the .secure_log_unique directive was created free it.
delete (raw_ostream*)SecureLog;
+
+ delete TAI;
}
//===----------------------------------------------------------------------===//
@@ -56,20 +61,42 @@ MCContext::~MCContext() {
MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
assert(!Name.empty() && "Normal symbols cannot be unnamed!");
-
- // Determine whether this is an assembler temporary or normal label.
- bool isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix());
-
+
// Do the lookup and get the entire StringMapEntry. We want access to the
// key if we are creating the entry.
StringMapEntry<MCSymbol*> &Entry = Symbols.GetOrCreateValue(Name);
- if (Entry.getValue()) return Entry.getValue();
+ MCSymbol *Sym = Entry.getValue();
+
+ if (Sym)
+ return Sym;
+
+ Sym = CreateSymbol(Name);
+ Entry.setValue(Sym);
+ return Sym;
+}
+
+MCSymbol *MCContext::CreateSymbol(StringRef Name) {
+ // Determine whether this is an assembler temporary or normal label.
+ bool isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix());
+
+ StringMapEntry<bool> *NameEntry = &UsedNames.GetOrCreateValue(Name);
+ if (NameEntry->getValue()) {
+ assert(isTemporary && "Cannot rename non temporary symbols");
+ SmallString<128> NewName;
+ do {
+ Twine T = Name + Twine(NextUniqueID++);
+ T.toVector(NewName);
+ StringRef foo = NewName;
+ NameEntry = &UsedNames.GetOrCreateValue(foo);
+ } while (NameEntry->getValue());
+ }
+ NameEntry->setValue(true);
// Ok, the entry doesn't already exist. Have the MCSymbol object itself refer
- // to the copy of the string that is embedded in the StringMapEntry.
- MCSymbol *Result = new (*this) MCSymbol(Entry.getKey(), isTemporary);
- Entry.setValue(Result);
- return Result;
+ // to the copy of the string that is embedded in the UsedNames entry.
+ MCSymbol *Result = new (*this) MCSymbol(NameEntry->getKey(), isTemporary);
+
+ return Result;
}
MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
@@ -79,8 +106,11 @@ MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
}
MCSymbol *MCContext::CreateTempSymbol() {
- return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
- "tmp" + Twine(NextUniqueID++));
+ SmallString<128> NameSV;
+ Twine Name = Twine(MAI.getPrivateGlobalPrefix()) + "tmp" +
+ Twine(NextUniqueID++);
+ Name.toVector(NameSV);
+ return CreateSymbol(NameSV);
}
unsigned MCContext::NextInstance(int64_t LocalLabelVal) {
@@ -123,49 +153,70 @@ const MCSectionMachO *MCContext::
getMachOSection(StringRef Segment, StringRef Section,
unsigned TypeAndAttributes,
unsigned Reserved2, SectionKind Kind) {
-
+
// We unique sections by their segment/section pair. The returned section
// may not have the same flags as the requested section, if so this should be
// diagnosed by the client as an error.
-
+
// Create the map if it doesn't already exist.
if (MachOUniquingMap == 0)
MachOUniquingMap = new MachOUniqueMapTy();
MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap;
-
+
// Form the name to look up.
SmallString<64> Name;
Name += Segment;
Name.push_back(',');
Name += Section;
-
+
// Do the lookup, if we have a hit, return it.
const MCSectionMachO *&Entry = Map[Name.str()];
if (Entry) return Entry;
-
+
// Otherwise, return a new section.
return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes,
Reserved2, Kind);
}
+const MCSectionELF *MCContext::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+ SectionKind Kind) {
+ return getELFSection(Section, Type, Flags, Kind, 0, "");
+}
-const MCSection *MCContext::
+const MCSectionELF *MCContext::
getELFSection(StringRef Section, unsigned Type, unsigned Flags,
- SectionKind Kind, bool IsExplicit, unsigned EntrySize) {
+ SectionKind Kind, unsigned EntrySize, StringRef Group) {
if (ELFUniquingMap == 0)
ELFUniquingMap = new ELFUniqueMapTy();
ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
-
+
// Do the lookup, if we have a hit, return it.
StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section);
if (Entry.getValue()) return Entry.getValue();
-
+
+ // Possibly refine the entry size first.
+ if (!EntrySize) {
+ EntrySize = MCSectionELF::DetermineEntrySize(Kind);
+ }
+
+ MCSymbol *GroupSym = NULL;
+ if (!Group.empty())
+ GroupSym = GetOrCreateSymbol(Group);
+
MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags,
- Kind, IsExplicit, EntrySize);
+ Kind, EntrySize, GroupSym);
Entry.setValue(Result);
return Result;
}
+const MCSectionELF *MCContext::CreateELFGroupSection() {
+ MCSectionELF *Result =
+ new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0,
+ SectionKind::getReadOnly(), 4, NULL);
+ return Result;
+}
+
const MCSection *MCContext::getCOFFSection(StringRef Section,
unsigned Characteristics,
int Selection,
@@ -173,15 +224,15 @@ const MCSection *MCContext::getCOFFSection(StringRef Section,
if (COFFUniquingMap == 0)
COFFUniquingMap = new COFFUniqueMapTy();
COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
-
+
// Do the lookup, if we have a hit, return it.
StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section);
if (Entry.getValue()) return Entry.getValue();
-
+
MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(),
Characteristics,
Selection, Kind);
-
+
Entry.setValue(Result);
return Result;
}
@@ -240,7 +291,7 @@ unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
// stored at MCDwarfFiles[FileNumber].Name .
DirIndex++;
}
-
+
// Now make the MCDwarfFile entry and place it in the slot in the MCDwarfFiles
// vector.
char *Buf = static_cast<char *>(Allocate(Name.size()));
@@ -251,15 +302,11 @@ unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
return FileNumber;
}
-/// ValidateDwarfFileNumber - takes a dwarf file number and returns true if it
+/// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
/// currently is assigned and false otherwise.
-bool MCContext::ValidateDwarfFileNumber(unsigned FileNumber) {
+bool MCContext::isValidDwarfFileNumber(unsigned FileNumber) {
if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
return false;
- MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber];
- if (ExistingFile)
- return true;
- else
- return false;
+ return MCDwarfFiles[FileNumber] != 0;
}
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp
index 697b3d9..2fd14db 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -354,7 +354,7 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
SourceMgr sourceMgr;
sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
- MCContext context(*AsmInfo);
+ MCContext context(*AsmInfo, NULL);
OwningPtr<MCStreamer> streamer(createNullStreamer(context));
OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
context, *streamer,
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h
index e2f850b..71e45f0 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDDisassembler.h
@@ -21,7 +21,7 @@
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include <map>
#include <set>
@@ -89,8 +89,10 @@ struct EDDisassembler {
bool operator<(const CPUKey &key) const {
if(Arch > key.Arch)
return false;
- if(Syntax >= key.Syntax)
- return false;
+ else if (Arch == key.Arch) {
+ if(Syntax > key.Syntax)
+ return false;
+ }
return true;
}
};
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp
index e22408f..63b049f 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDInst.cpp
@@ -62,6 +62,8 @@ int EDInst::stringify() {
if (Disassembler.printInst(String, *Inst))
return StringifyResult.setResult(-1);
+
+ String.push_back('\n');
return StringifyResult.setResult(0);
}
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDInst.h b/contrib/llvm/lib/MC/MCDisassembler/EDInst.h
index 39d264f..ceb9505 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/EDInst.h
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDInst.h
@@ -16,7 +16,7 @@
#ifndef LLVM_EDINST_H
#define LLVM_EDINST_H
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
#include "llvm/ADT/SmallVector.h"
#include <string>
#include <vector>
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp
index 2aed123..cfeb56f 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.cpp
@@ -260,23 +260,20 @@ int EDOperand::isMemory() {
}
#ifdef __BLOCKS__
-struct RegisterReaderWrapper {
- EDOperand::EDRegisterBlock_t regBlock;
-};
+namespace {
+ struct RegisterReaderWrapper {
+ EDOperand::EDRegisterBlock_t regBlock;
+ };
+}
-int readerWrapperCallback(uint64_t *value,
- unsigned regID,
- void *arg) {
- struct RegisterReaderWrapper *wrapper = (struct RegisterReaderWrapper *)arg;
+static int readerWrapperCallback(uint64_t *value, unsigned regID, void *arg) {
+ RegisterReaderWrapper *wrapper = (RegisterReaderWrapper *)arg;
return wrapper->regBlock(value, regID);
}
-int EDOperand::evaluate(uint64_t &result,
- EDRegisterBlock_t regBlock) {
- struct RegisterReaderWrapper wrapper;
+int EDOperand::evaluate(uint64_t &result, EDRegisterBlock_t regBlock) {
+ RegisterReaderWrapper wrapper;
wrapper.regBlock = regBlock;
- return evaluate(result,
- readerWrapperCallback,
- (void*)&wrapper);
+ return evaluate(result, readerWrapperCallback, (void*)&wrapper);
}
#endif
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h
index 6e69522..50260ec 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDOperand.h
@@ -16,7 +16,7 @@
#ifndef LLVM_EDOPERAND_H
#define LLVM_EDOPERAND_H
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
namespace llvm {
diff --git a/contrib/llvm/lib/MC/MCDisassembler/EDToken.h b/contrib/llvm/lib/MC/MCDisassembler/EDToken.h
index 6b2aeac..ba46707 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/EDToken.h
+++ b/contrib/llvm/lib/MC/MCDisassembler/EDToken.h
@@ -17,7 +17,7 @@
#define LLVM_EDTOKEN_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
#include <string>
#include <vector>
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
index 2da71f9..112d7d8 100644
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -7,11 +7,420 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
using namespace llvm;
+// Given a special op, return the address skip amount (in units of
+// DWARF2_LINE_MIN_INSN_LENGTH.
+#define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE)
+
+// The maximum address skip amount that can be encoded with a special op.
+#define MAX_SPECIAL_ADDR_DELTA SPECIAL_ADDR(255)
+
+// First special line opcode - leave room for the standard opcodes.
+// Note: If you want to change this, you'll have to update the
+// "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit().
+#define DWARF2_LINE_OPCODE_BASE 13
+
+// Minimum line offset in a special line info. opcode. This value
+// was chosen to give a reasonable range of values.
+#define DWARF2_LINE_BASE -5
+
+// Range of line offsets in a special line info. opcode.
+# define DWARF2_LINE_RANGE 14
+
+// Define the architecture-dependent minimum instruction length (in bytes).
+// This value should be rather too small than too big.
+# define DWARF2_LINE_MIN_INSN_LENGTH 1
+
+// Note: when DWARF2_LINE_MIN_INSN_LENGTH == 1 which is the current setting,
+// this routine is a nop and will be optimized away.
+static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta)
+{
+ if (DWARF2_LINE_MIN_INSN_LENGTH == 1)
+ return AddrDelta;
+ if (AddrDelta % DWARF2_LINE_MIN_INSN_LENGTH != 0) {
+ // TODO: report this error, but really only once.
+ ;
+ }
+ return AddrDelta / DWARF2_LINE_MIN_INSN_LENGTH;
+}
+
+//
+// This is called when an instruction is assembled into the specified section
+// and if there is information from the last .loc directive that has yet to have
+// a line entry made for it is made.
+//
+void MCLineEntry::Make(MCStreamer *MCOS, const MCSection *Section) {
+ if (!MCOS->getContext().getDwarfLocSeen())
+ return;
+
+ // Create a symbol at in the current section for use in the line entry.
+ MCSymbol *LineSym = MCOS->getContext().CreateTempSymbol();
+ // Set the value of the symbol to use for the MCLineEntry.
+ MCOS->EmitLabel(LineSym);
+
+ // Get the current .loc info saved in the context.
+ const MCDwarfLoc &DwarfLoc = MCOS->getContext().getCurrentDwarfLoc();
+
+ // Create a (local) line entry with the symbol and the current .loc info.
+ MCLineEntry LineEntry(LineSym, DwarfLoc);
+
+ // clear DwarfLocSeen saying the current .loc info is now used.
+ MCOS->getContext().ClearDwarfLocSeen();
+
+ // Get the MCLineSection for this section, if one does not exist for this
+ // section create it.
+ const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+ MCOS->getContext().getMCLineSections();
+ MCLineSection *LineSection = MCLineSections.lookup(Section);
+ if (!LineSection) {
+ // Create a new MCLineSection. This will be deleted after the dwarf line
+ // table is created using it by iterating through the MCLineSections
+ // DenseMap.
+ LineSection = new MCLineSection;
+ // Save a pointer to the new LineSection into the MCLineSections DenseMap.
+ MCOS->getContext().addMCLineSection(Section, LineSection);
+ }
+
+ // Add the line entry to this section's entries.
+ LineSection->addLineEntry(LineEntry);
+}
+
+//
+// This helper routine returns an expression of End - Start + IntVal .
+//
+static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS,
+ const MCSymbol &Start,
+ const MCSymbol &End,
+ int IntVal) {
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(&End, Variant, MCOS.getContext());
+ const MCExpr *RHS =
+ MCSymbolRefExpr::Create(&Start, Variant, MCOS.getContext());
+ const MCExpr *Res1 =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS, MCOS.getContext());
+ const MCExpr *Res2 =
+ MCConstantExpr::Create(IntVal, MCOS.getContext());
+ const MCExpr *Res3 =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, MCOS.getContext());
+ return Res3;
+}
+
+//
+// This emits the Dwarf line table for the specified section from the entries
+// in the LineSection.
+//
+static inline void EmitDwarfLineTable(MCStreamer *MCOS,
+ const MCSection *Section,
+ const MCLineSection *LineSection) {
+ unsigned FileNum = 1;
+ unsigned LastLine = 1;
+ unsigned Column = 0;
+ unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
+ unsigned Isa = 0;
+ MCSymbol *LastLabel = NULL;
+
+ // Loop through each MCLineEntry and encode the dwarf line number table.
+ for (MCLineSection::const_iterator
+ it = LineSection->getMCLineEntries()->begin(),
+ ie = LineSection->getMCLineEntries()->end(); it != ie; ++it) {
+
+ if (FileNum != it->getFileNum()) {
+ FileNum = it->getFileNum();
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_file, 1);
+ MCOS->EmitULEB128IntValue(FileNum);
+ }
+ if (Column != it->getColumn()) {
+ Column = it->getColumn();
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_column, 1);
+ MCOS->EmitULEB128IntValue(Column);
+ }
+ if (Isa != it->getIsa()) {
+ Isa = it->getIsa();
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_isa, 1);
+ MCOS->EmitULEB128IntValue(Isa);
+ }
+ if ((it->getFlags() ^ Flags) & DWARF2_FLAG_IS_STMT) {
+ Flags = it->getFlags();
+ MCOS->EmitIntValue(dwarf::DW_LNS_negate_stmt, 1);
+ }
+ if (it->getFlags() & DWARF2_FLAG_BASIC_BLOCK)
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_basic_block, 1);
+ if (it->getFlags() & DWARF2_FLAG_PROLOGUE_END)
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_prologue_end, 1);
+ if (it->getFlags() & DWARF2_FLAG_EPILOGUE_BEGIN)
+ MCOS->EmitIntValue(dwarf::DW_LNS_set_epilogue_begin, 1);
+
+ int64_t LineDelta = static_cast<int64_t>(it->getLine()) - LastLine;
+ MCSymbol *Label = it->getLabel();
+
+ // At this point we want to emit/create the sequence to encode the delta in
+ // line numbers and the increment of the address from the previous Label
+ // and the current Label.
+ MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label);
+
+ LastLine = it->getLine();
+ LastLabel = Label;
+ }
+
+ // Emit a DW_LNE_end_sequence for the end of the section.
+ // Using the pointer Section create a temporary label at the end of the
+ // section and use that and the LastLabel to compute the address delta
+ // and use INT64_MAX as the line delta which is the signal that this is
+ // actually a DW_LNE_end_sequence.
+
+ // Switch to the section to be able to create a symbol at its end.
+ MCOS->SwitchSection(Section);
+
+ MCContext &context = MCOS->getContext();
+ // Create a symbol at the end of the section.
+ MCSymbol *SectionEnd = context.CreateTempSymbol();
+ // Set the value of the symbol, as we are at the end of the section.
+ MCOS->EmitLabel(SectionEnd);
+
+ // Switch back the the dwarf line section.
+ MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
+
+ MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd);
+}
+
+//
+// This emits the Dwarf file and the line tables.
+//
+void MCDwarfFileTable::Emit(MCStreamer *MCOS) {
+ MCContext &context = MCOS->getContext();
+ // Switch to the section where the table will be emitted into.
+ MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
+
+ // Create a symbol at the beginning of this section.
+ MCSymbol *LineStartSym = context.CreateTempSymbol();
+ // Set the value of the symbol, as we are at the start of the section.
+ MCOS->EmitLabel(LineStartSym);
+
+ // Create a symbol for the end of the section (to be set when we get there).
+ MCSymbol *LineEndSym = context.CreateTempSymbol();
+
+ // The first 4 bytes is the total length of the information for this
+ // compilation unit (not including these 4 bytes for the length).
+ MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *LineEndSym,4),
+ 4);
+
+ // Next 2 bytes is the Version, which is Dwarf 2.
+ MCOS->EmitIntValue(2, 2);
+
+ // Create a symbol for the end of the prologue (to be set when we get there).
+ MCSymbol *ProEndSym = context.CreateTempSymbol(); // Lprologue_end
+
+ // Length of the prologue, is the next 4 bytes. Which is the start of the
+ // section to the end of the prologue. Not including the 4 bytes for the
+ // total length, the 2 bytes for the version, and these 4 bytes for the
+ // length of the prologue.
+ MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym,
+ (4 + 2 + 4)),
+ 4, 0);
+
+ // Parameters of the state machine, are next.
+ MCOS->EmitIntValue(DWARF2_LINE_MIN_INSN_LENGTH, 1);
+ MCOS->EmitIntValue(DWARF2_LINE_DEFAULT_IS_STMT, 1);
+ MCOS->EmitIntValue(DWARF2_LINE_BASE, 1);
+ MCOS->EmitIntValue(DWARF2_LINE_RANGE, 1);
+ MCOS->EmitIntValue(DWARF2_LINE_OPCODE_BASE, 1);
+
+ // Standard opcode lengths
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_copy
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_pc
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_line
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_file
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_column
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_negate_stmt
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_basic_block
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_const_add_pc
+ MCOS->EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end
+ MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin
+ MCOS->EmitIntValue(1, 1); // DW_LNS_set_isa
+
+ // Put out the directory and file tables.
+
+ // First the directory table.
+ const std::vector<StringRef> &MCDwarfDirs =
+ context.getMCDwarfDirs();
+ for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
+ MCOS->EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName
+ MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string
+ }
+ MCOS->EmitIntValue(0, 1); // Terminate the directory list
+
+ // Second the file table.
+ const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ MCOS->getContext().getMCDwarfFiles();
+ for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+ MCOS->EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName
+ MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string
+ // the Directory num
+ MCOS->EmitULEB128IntValue(MCDwarfFiles[i]->getDirIndex());
+ MCOS->EmitIntValue(0, 1); // last modification timestamp (always 0)
+ MCOS->EmitIntValue(0, 1); // filesize (always 0)
+ }
+ MCOS->EmitIntValue(0, 1); // Terminate the file list
+
+ // This is the end of the prologue, so set the value of the symbol at the
+ // end of the prologue (that was used in a previous expression).
+ MCOS->EmitLabel(ProEndSym);
+
+ // Put out the line tables.
+ const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+ MCOS->getContext().getMCLineSections();
+ const std::vector<const MCSection *> &MCLineSectionOrder =
+ MCOS->getContext().getMCLineSectionOrder();
+ for (std::vector<const MCSection*>::const_iterator it =
+ MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie;
+ ++it) {
+ const MCSection *Sec = *it;
+ const MCLineSection *Line = MCLineSections.lookup(Sec);
+ EmitDwarfLineTable(MCOS, Sec, Line);
+
+ // Now delete the MCLineSections that were created in MCLineEntry::Make()
+ // and used to emit the line table.
+ delete Line;
+ }
+
+ if (MCOS->getContext().getAsmInfo().getLinkerRequiresNonEmptyDwarfLines()
+ && MCLineSectionOrder.begin() == MCLineSectionOrder.end()) {
+ // The darwin9 linker has a bug (see PR8715). For for 32-bit architectures
+ // it requires:
+ // total_length >= prologue_length + 10
+ // We are 4 bytes short, since we have total_length = 51 and
+ // prologue_length = 45
+
+ // The regular end_sequence should be sufficient.
+ MCDwarfLineAddr::Emit(MCOS, INT64_MAX, 0);
+ }
+
+ // This is the end of the section, so set the value of the symbol at the end
+ // of this section (that was used in a previous expression).
+ MCOS->EmitLabel(LineEndSym);
+}
+
+/// Utility function to write the encoding to an object writer.
+void MCDwarfLineAddr::Write(MCObjectWriter *OW, int64_t LineDelta,
+ uint64_t AddrDelta) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
+ OW->WriteBytes(OS.str());
+}
+
+/// Utility function to emit the encoding to a streamer.
+void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta,
+ uint64_t AddrDelta) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
+ MCOS->EmitBytes(OS.str(), /*AddrSpace=*/0);
+}
+
+/// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
+void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
+ raw_ostream &OS) {
+ uint64_t Temp, Opcode;
+ bool NeedCopy = false;
+
+ // Scale the address delta by the minimum instruction length.
+ AddrDelta = ScaleAddrDelta(AddrDelta);
+
+ // A LineDelta of INT64_MAX is a signal that this is actually a
+ // DW_LNE_end_sequence. We cannot use special opcodes here, since we want the
+ // end_sequence to emit the matrix entry.
+ if (LineDelta == INT64_MAX) {
+ if (AddrDelta == MAX_SPECIAL_ADDR_DELTA)
+ OS << char(dwarf::DW_LNS_const_add_pc);
+ else {
+ OS << char(dwarf::DW_LNS_advance_pc);
+ SmallString<32> Tmp;
+ raw_svector_ostream OSE(Tmp);
+ MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
+ OS << OSE.str();
+ }
+ OS << char(dwarf::DW_LNS_extended_op);
+ OS << char(1);
+ OS << char(dwarf::DW_LNE_end_sequence);
+ return;
+ }
+
+ // Bias the line delta by the base.
+ Temp = LineDelta - DWARF2_LINE_BASE;
+
+ // If the line increment is out of range of a special opcode, we must encode
+ // it with DW_LNS_advance_line.
+ if (Temp >= DWARF2_LINE_RANGE) {
+ OS << char(dwarf::DW_LNS_advance_line);
+ SmallString<32> Tmp;
+ raw_svector_ostream OSE(Tmp);
+ MCObjectWriter::EncodeSLEB128(LineDelta, OSE);
+ OS << OSE.str();
+
+ LineDelta = 0;
+ Temp = 0 - DWARF2_LINE_BASE;
+ NeedCopy = true;
+ }
+
+ // Use DW_LNS_copy instead of a "line +0, addr +0" special opcode.
+ if (LineDelta == 0 && AddrDelta == 0) {
+ OS << char(dwarf::DW_LNS_copy);
+ return;
+ }
+
+ // Bias the opcode by the special opcode base.
+ Temp += DWARF2_LINE_OPCODE_BASE;
+
+ // Avoid overflow when addr_delta is large.
+ if (AddrDelta < 256 + MAX_SPECIAL_ADDR_DELTA) {
+ // Try using a special opcode.
+ Opcode = Temp + AddrDelta * DWARF2_LINE_RANGE;
+ if (Opcode <= 255) {
+ OS << char(Opcode);
+ return;
+ }
+
+ // Try using DW_LNS_const_add_pc followed by special op.
+ Opcode = Temp + (AddrDelta - MAX_SPECIAL_ADDR_DELTA) * DWARF2_LINE_RANGE;
+ if (Opcode <= 255) {
+ OS << char(dwarf::DW_LNS_const_add_pc);
+ OS << char(Opcode);
+ return;
+ }
+ }
+
+ // Otherwise use DW_LNS_advance_pc.
+ OS << char(dwarf::DW_LNS_advance_pc);
+ SmallString<32> Tmp;
+ raw_svector_ostream OSE(Tmp);
+ MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
+ OS << OSE.str();
+
+ if (NeedCopy)
+ OS << char(dwarf::DW_LNS_copy);
+ else
+ OS << char(Temp);
+}
+
void MCDwarfFile::print(raw_ostream &OS) const {
OS << '"' << getName() << '"';
}
@@ -19,3 +428,387 @@ void MCDwarfFile::print(raw_ostream &OS) const {
void MCDwarfFile::dump() const {
print(dbgs());
}
+
+static int getDataAlignmentFactor(MCStreamer &streamer) {
+ MCContext &context = streamer.getContext();
+ const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+ int size = asmInfo.getPointerSize();
+ if (asmInfo.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
+ return size;
+ else
+ return -size;
+}
+
+static void EmitCFIInstruction(MCStreamer &Streamer,
+ const MCCFIInstruction &Instr) {
+ int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
+
+ switch (Instr.getOperation()) {
+ case MCCFIInstruction::Move: {
+ const MachineLocation &Dst = Instr.getDestination();
+ const MachineLocation &Src = Instr.getSource();
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ assert(!Src.isReg() && "Machine move not supported yet.");
+
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
+ } else {
+ Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
+ Streamer.EmitULEB128IntValue(Src.getReg());
+ }
+
+ Streamer.EmitULEB128IntValue(-Src.getOffset(), 1);
+ return;
+ }
+
+ if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+ assert(Dst.isReg() && "Machine move not supported yet.");
+ Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
+ Streamer.EmitULEB128IntValue(Dst.getReg());
+ return;
+ }
+
+ unsigned Reg = Src.getReg();
+ int Offset = Dst.getOffset() / dataAlignmentFactor;
+
+ if (Offset < 0) {
+ Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1);
+ Streamer.EmitULEB128IntValue(Reg);
+ Streamer.EmitSLEB128IntValue(Offset);
+ } else if (Reg < 64) {
+ Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1);
+ Streamer.EmitULEB128IntValue(Offset, 1);
+ } else {
+ Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1);
+ Streamer.EmitULEB128IntValue(Reg, 1);
+ Streamer.EmitULEB128IntValue(Offset, 1);
+ }
+ return;
+ }
+ case MCCFIInstruction::Remember:
+ Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
+ return;
+ case MCCFIInstruction::Restore:
+ Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
+ return;
+ }
+ llvm_unreachable("Unhandled case in switch");
+}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+static void EmitCFIInstructions(MCStreamer &streamer,
+ const std::vector<MCCFIInstruction> &Instrs,
+ MCSymbol *BaseLabel) {
+ for (unsigned i = 0, N = Instrs.size(); i < N; ++i) {
+ const MCCFIInstruction &Instr = Instrs[i];
+ MCSymbol *Label = Instr.getLabel();
+ // Throw out move if the label is invalid.
+ if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+
+ // Advance row if new location.
+ if (BaseLabel && Label) {
+ MCSymbol *ThisSym = Label;
+ if (ThisSym != BaseLabel) {
+ streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
+ BaseLabel = ThisSym;
+ }
+ }
+
+ EmitCFIInstruction(streamer, Instr);
+ }
+}
+
+static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
+ unsigned symbolEncoding) {
+ MCContext &context = streamer.getContext();
+ const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+ unsigned format = symbolEncoding & 0x0f;
+ unsigned application = symbolEncoding & 0x70;
+ unsigned size;
+ switch (format) {
+ default:
+ assert(0 && "Unknown Encoding");
+ case dwarf::DW_EH_PE_absptr:
+ case dwarf::DW_EH_PE_signed:
+ size = asmInfo.getPointerSize();
+ break;
+ case dwarf::DW_EH_PE_udata2:
+ case dwarf::DW_EH_PE_sdata2:
+ size = 2;
+ break;
+ case dwarf::DW_EH_PE_udata4:
+ case dwarf::DW_EH_PE_sdata4:
+ size = 4;
+ break;
+ case dwarf::DW_EH_PE_udata8:
+ case dwarf::DW_EH_PE_sdata8:
+ size = 8;
+ break;
+ }
+ switch (application) {
+ default:
+ assert(0 && "Unknown Encoding");
+ break;
+ case 0:
+ streamer.EmitSymbolValue(&symbol, size);
+ break;
+ case dwarf::DW_EH_PE_pcrel:
+ streamer.EmitPCRelSymbolValue(&symbol, size);
+ break;
+ }
+}
+
+static const MachineLocation TranslateMachineLocation(
+ const TargetAsmInfo &AsmInfo,
+ const MachineLocation &Loc) {
+ unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ?
+ MachineLocation::VirtualFP :
+ unsigned(AsmInfo.getDwarfRegNum(Loc.getReg(), true));
+ const MachineLocation &NewLoc = Loc.isReg() ?
+ MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset());
+ return NewLoc;
+}
+
+static const MCSymbol &EmitCIE(MCStreamer &streamer,
+ const MCSymbol *personality,
+ unsigned personalityEncoding,
+ const MCSymbol *lsda,
+ unsigned lsdaEncoding) {
+ MCContext &context = streamer.getContext();
+ const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+ const MCSection &section = *asmInfo.getEHFrameSection();
+ streamer.SwitchSection(&section);
+ MCSymbol *sectionStart = streamer.getContext().CreateTempSymbol();
+ MCSymbol *sectionEnd = streamer.getContext().CreateTempSymbol();
+
+ // Length
+ const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
+ *sectionEnd, 4);
+ streamer.EmitLabel(sectionStart);
+ streamer.EmitValue(Length, 4);
+
+ // CIE ID
+ streamer.EmitIntValue(0, 4);
+
+ // Version
+ streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1);
+
+ // Augmentation String
+ SmallString<8> Augmentation;
+ Augmentation += "z";
+ if (personality)
+ Augmentation += "P";
+ if (lsda)
+ Augmentation += "L";
+ Augmentation += "R";
+ streamer.EmitBytes(Augmentation.str(), 0);
+ streamer.EmitIntValue(0, 1);
+
+ // Code Alignment Factor
+ streamer.EmitULEB128IntValue(1);
+
+ // Data Alignment Factor
+ streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer));
+
+ // Return Address Register
+ streamer.EmitULEB128IntValue(asmInfo.getDwarfRARegNum(true));
+
+ // Augmentation Data Length (optional)
+ MCSymbol *augmentationStart = streamer.getContext().CreateTempSymbol();
+ MCSymbol *augmentationEnd = streamer.getContext().CreateTempSymbol();
+ const MCExpr *augmentationLength = MakeStartMinusEndExpr(streamer,
+ *augmentationStart,
+ *augmentationEnd, 0);
+ streamer.EmitULEB128Value(augmentationLength);
+
+ // Augmentation Data (optional)
+ streamer.EmitLabel(augmentationStart);
+ if (personality) {
+ // Personality Encoding
+ streamer.EmitIntValue(personalityEncoding, 1);
+ // Personality
+ EmitSymbol(streamer, *personality, personalityEncoding);
+ }
+ if (lsda) {
+ // LSDA Encoding
+ streamer.EmitIntValue(lsdaEncoding, 1);
+ }
+ // Encoding of the FDE pointers
+ streamer.EmitIntValue(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4, 1);
+ streamer.EmitLabel(augmentationEnd);
+
+ // Initial Instructions
+
+ const std::vector<MachineMove> Moves = asmInfo.getInitialFrameState();
+ std::vector<MCCFIInstruction> Instructions;
+
+ for (int i = 0, n = Moves.size(); i != n; ++i) {
+ MCSymbol *Label = Moves[i].getLabel();
+ const MachineLocation &Dst =
+ TranslateMachineLocation(asmInfo, Moves[i].getDestination());
+ const MachineLocation &Src =
+ TranslateMachineLocation(asmInfo, Moves[i].getSource());
+ MCCFIInstruction Inst(Label, Dst, Src);
+ Instructions.push_back(Inst);
+ }
+
+ EmitCFIInstructions(streamer, Instructions, NULL);
+
+ // Padding
+ streamer.EmitValueToAlignment(4);
+
+ streamer.EmitLabel(sectionEnd);
+ return *sectionStart;
+}
+
+static MCSymbol *EmitFDE(MCStreamer &streamer,
+ const MCSymbol &cieStart,
+ const MCDwarfFrameInfo &frame) {
+ MCContext &context = streamer.getContext();
+ MCSymbol *fdeStart = context.CreateTempSymbol();
+ MCSymbol *fdeEnd = context.CreateTempSymbol();
+
+ // Length
+ const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
+ streamer.EmitValue(Length, 4);
+
+ streamer.EmitLabel(fdeStart);
+ // CIE Pointer
+ const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
+ 0);
+ streamer.EmitValue(offset, 4);
+
+ // PC Begin
+ streamer.EmitPCRelSymbolValue(frame.Begin, 4);
+
+ // PC Range
+ const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
+ *frame.End, 0);
+ streamer.EmitValue(Range, 4);
+
+ // Augmentation Data Length
+ MCSymbol *augmentationStart = streamer.getContext().CreateTempSymbol();
+ MCSymbol *augmentationEnd = streamer.getContext().CreateTempSymbol();
+ const MCExpr *augmentationLength = MakeStartMinusEndExpr(streamer,
+ *augmentationStart,
+ *augmentationEnd, 0);
+ streamer.EmitULEB128Value(augmentationLength);
+
+ // Augmentation Data
+ streamer.EmitLabel(augmentationStart);
+ if (frame.Lsda)
+ EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding);
+ streamer.EmitLabel(augmentationEnd);
+ // Call Frame Instructions
+
+ EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
+
+ // Padding
+ streamer.EmitValueToAlignment(4);
+
+ return fdeEnd;
+}
+
+namespace {
+ struct CIEKey {
+ static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1); }
+ static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0); }
+
+ CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
+ unsigned LsdaEncoding_) : Personality(Personality_),
+ PersonalityEncoding(PersonalityEncoding_),
+ LsdaEncoding(LsdaEncoding_) {
+ }
+ const MCSymbol* Personality;
+ unsigned PersonalityEncoding;
+ unsigned LsdaEncoding;
+ };
+}
+
+namespace llvm {
+ template <>
+ struct DenseMapInfo<CIEKey> {
+ static CIEKey getEmptyKey() {
+ return CIEKey::getEmptyKey();
+ }
+ static CIEKey getTombstoneKey() {
+ return CIEKey::getTombstoneKey();
+ }
+ static unsigned getHashValue(const CIEKey &Key) {
+ FoldingSetNodeID ID;
+ ID.AddPointer(Key.Personality);
+ ID.AddInteger(Key.PersonalityEncoding);
+ ID.AddInteger(Key.LsdaEncoding);
+ return ID.ComputeHash();
+ }
+ static bool isEqual(const CIEKey &LHS,
+ const CIEKey &RHS) {
+ return LHS.Personality == RHS.Personality &&
+ LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
+ LHS.LsdaEncoding == RHS.LsdaEncoding;
+ }
+ };
+}
+
+void MCDwarfFrameEmitter::Emit(MCStreamer &streamer) {
+ const MCContext &context = streamer.getContext();
+ const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+ MCSymbol *fdeEnd = NULL;
+ DenseMap<CIEKey, const MCSymbol*> CIEStarts;
+
+ for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) {
+ const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i);
+ CIEKey key(frame.Personality, frame.PersonalityEncoding,
+ frame.LsdaEncoding);
+ const MCSymbol *&cieStart = CIEStarts[key];
+ if (!cieStart)
+ cieStart = &EmitCIE(streamer, frame.Personality,
+ frame.PersonalityEncoding, frame.Lsda,
+ frame.LsdaEncoding);
+ fdeEnd = EmitFDE(streamer, *cieStart, frame);
+ if (i != n - 1)
+ streamer.EmitLabel(fdeEnd);
+ }
+
+ streamer.EmitValueToAlignment(asmInfo.getPointerSize());
+ if (fdeEnd)
+ streamer.EmitLabel(fdeEnd);
+}
+
+void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer,
+ uint64_t AddrDelta) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS);
+ Streamer.EmitBytes(OS.str(), /*AddrSpace=*/0);
+}
+
+void MCDwarfFrameEmitter::EncodeAdvanceLoc(uint64_t AddrDelta,
+ raw_ostream &OS) {
+ // FIXME: Assumes the code alignment factor is 1.
+ if (AddrDelta == 0) {
+ } else if (isUIntN(6, AddrDelta)) {
+ uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta;
+ OS << Opcode;
+ } else if (isUInt<8>(AddrDelta)) {
+ OS << uint8_t(dwarf::DW_CFA_advance_loc1);
+ OS << uint8_t(AddrDelta);
+ } else if (isUInt<16>(AddrDelta)) {
+ // FIXME: check what is the correct behavior on a big endian machine.
+ OS << uint8_t(dwarf::DW_CFA_advance_loc2);
+ OS << uint8_t( AddrDelta & 0xff);
+ OS << uint8_t((AddrDelta >> 8) & 0xff);
+ } else {
+ // FIXME: check what is the correct behavior on a big endian machine.
+ assert(isUInt<32>(AddrDelta));
+ OS << uint8_t(dwarf::DW_CFA_advance_loc4);
+ OS << uint8_t( AddrDelta & 0xff);
+ OS << uint8_t((AddrDelta >> 8) & 0xff);
+ OS << uint8_t((AddrDelta >> 16) & 0xff);
+ OS << uint8_t((AddrDelta >> 24) & 0xff);
+
+ }
+}
diff --git a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
new file mode 100644
index 0000000..12a02a9
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp
@@ -0,0 +1,23 @@
+//===-- MCELFObjectTargetWriter.cpp - ELF Target Writer Subclass ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFObjectWriter.h"
+
+using namespace llvm;
+
+MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_,
+ Triple::OSType OSType_,
+ uint16_t EMachine_,
+ bool HasRelocationAddend_)
+ : OSType(OSType_), EMachine(EMachine_),
+ HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_) {
+}
+
+MCELFObjectTargetWriter::~MCELFObjectTargetWriter() {
+}
diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp
index 570c391..e49074d 100644
--- a/contrib/llvm/lib/MC/MCELFStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -23,19 +24,51 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
using namespace llvm;
namespace {
+static void SetBinding(MCSymbolData &SD, unsigned Binding) {
+ assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+ Binding == ELF::STB_WEAK);
+ uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
+ SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
+}
+
+static unsigned GetBinding(const MCSymbolData &SD) {
+ uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
+ assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+ Binding == ELF::STB_WEAK);
+ return Binding;
+}
+
+static void SetType(MCSymbolData &SD, unsigned Type) {
+ assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+ Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+ Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+ Type == ELF::STT_TLS);
+
+ uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
+ SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
+}
+
+static void SetVisibility(MCSymbolData &SD, unsigned Visibility) {
+ assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+ Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+
+ uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STV_Shift);
+ SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift));
+}
+
class MCELFStreamer : public MCObjectStreamer {
- void EmitInstToFragment(const MCInst &Inst);
- void EmitInstToData(const MCInst &Inst);
public:
MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter)
@@ -46,9 +79,13 @@ public:
/// @name MCStreamer Interface
/// @{
+ virtual void InitSections();
+ virtual void ChangeSection(const MCSection *Section);
virtual void EmitLabel(MCSymbol *Symbol);
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitThumbFunc(MCSymbol *Func);
virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
assert(0 && "ELF doesn't support this directive");
@@ -76,9 +113,8 @@ public:
SD.setSize(Value);
}
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
- assert(0 && "ELF doesn't support this directive");
- }
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0) {
assert(0 && "ELF doesn't support this directive");
@@ -88,49 +124,84 @@ public:
assert(0 && "ELF doesn't support this directive");
}
virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
- virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
- virtual void EmitGPRel32Value(const MCExpr *Value) {
- assert(0 && "ELF doesn't support this directive");
- }
virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0);
virtual void EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit = 0);
- virtual void EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value = 0);
virtual void EmitFileDirective(StringRef Filename);
- virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
- DEBUG(dbgs() << "FIXME: MCELFStreamer:EmitDwarfFileDirective not implemented\n");
- }
- virtual void EmitInstruction(const MCInst &Inst);
virtual void Finish();
+private:
+ virtual void EmitInstToFragment(const MCInst &Inst);
+ virtual void EmitInstToData(const MCInst &Inst);
+
+ void fixSymbolsInTLSFixups(const MCExpr *expr);
+
+ struct LocalCommon {
+ MCSymbolData *SD;
+ uint64_t Size;
+ unsigned ByteAlignment;
+ };
+ std::vector<LocalCommon> LocalCommons;
+
+ SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
/// @}
+ void SetSection(StringRef Section, unsigned Type, unsigned Flags,
+ SectionKind Kind) {
+ SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+ }
+
+ void SetSectionData() {
+ SetSection(".data", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ EmitCodeAlignment(4, 0);
+ }
+ void SetSectionText() {
+ SetSection(".text", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_ALLOC, SectionKind::getText());
+ EmitCodeAlignment(4, 0);
+ }
+ void SetSectionBss() {
+ SetSection(".bss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC, SectionKind::getBSS());
+ EmitCodeAlignment(4, 0);
+ }
};
} // end anonymous namespace.
+void MCELFStreamer::InitSections() {
+ // This emulates the same behavior of GNU as. This makes it easier
+ // to compare the output as the major sections are in the same order.
+ SetSectionText();
+ SetSectionData();
+ SetSectionBss();
+ SetSectionText();
+}
+
void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- // FIXME: This is wasteful, we don't necessarily need to create a data
- // fragment. Instead, we should mark the symbol as pointing into the data
- // fragment if it exists, otherwise we should just queue the label and set its
- // fragment pointer when we emit the next fragment.
- MCDataFragment *F = getOrCreateDataFragment();
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
- SD.setFragment(F);
- SD.setOffset(F->getContents().size());
+ MCObjectStreamer::EmitLabel(Symbol);
- Symbol->setSection(*CurSection);
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(Symbol->getSection());
+ MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
+ if (Section.getFlags() & ELF::SHF_TLS)
+ SetType(SD, ELF::STT_TLS);
}
void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
switch (Flag) {
+ case MCAF_SyntaxUnified: return; // no-op here.
+ case MCAF_Code16: return; // no-op here.
+ case MCAF_Code32: return; // no-op here.
case MCAF_SubsectionsViaSymbols:
getAssembler().setSubsectionsViaSymbols(true);
return;
@@ -139,6 +210,10 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
assert(0 && "invalid assembler flag!");
}
+void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
+ // FIXME: Anything needed here to flag the function as thumb?
+}
+
void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
// TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
// MCObjectStreamer.
@@ -147,6 +222,21 @@ void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
Symbol->setVariableValue(AddValueSymbols(Value));
}
+void MCELFStreamer::ChangeSection(const MCSection *Section) {
+ const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup();
+ if (Grp)
+ getAssembler().getOrCreateSymbolData(*Grp);
+ this->MCObjectStreamer::ChangeSection(Section);
+}
+
+void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ MCSymbolData &AliasSD = getAssembler().getOrCreateSymbolData(*Alias);
+ AliasSD.setFlags(AliasSD.getFlags() | ELF_Other_Weakref);
+ const MCExpr *Value = MCSymbolRefExpr::Create(Symbol, getContext());
+ Alias->setVariableValue(Value);
+}
+
void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) {
// Indirect symbols are handled differently, to match how 'as' handles
@@ -176,6 +266,7 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_LazyReference:
case MCSA_Reference:
case MCSA_NoDeadStrip:
+ case MCSA_SymbolResolver:
case MCSA_PrivateExtern:
case MCSA_WeakDefinition:
case MCSA_WeakDefAutoPrivate:
@@ -185,50 +276,59 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
assert(0 && "Invalid symbol attribute for ELF!");
break;
+ case MCSA_ELF_TypeGnuUniqueObject:
+ // Ignore for now.
+ break;
+
case MCSA_Global:
- SD.setFlags(SD.getFlags() | ELF_STB_Global);
+ SetBinding(SD, ELF::STB_GLOBAL);
SD.setExternal(true);
+ BindingExplicitlySet.insert(Symbol);
break;
case MCSA_WeakReference:
case MCSA_Weak:
- SD.setFlags(SD.getFlags() | ELF_STB_Weak);
+ SetBinding(SD, ELF::STB_WEAK);
+ SD.setExternal(true);
+ BindingExplicitlySet.insert(Symbol);
break;
case MCSA_Local:
- SD.setFlags(SD.getFlags() | ELF_STB_Local);
+ SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ BindingExplicitlySet.insert(Symbol);
break;
case MCSA_ELF_TypeFunction:
- SD.setFlags(SD.getFlags() | ELF_STT_Func);
+ SetType(SD, ELF::STT_FUNC);
break;
case MCSA_ELF_TypeObject:
- SD.setFlags(SD.getFlags() | ELF_STT_Object);
+ SetType(SD, ELF::STT_OBJECT);
break;
case MCSA_ELF_TypeTLS:
- SD.setFlags(SD.getFlags() | ELF_STT_Tls);
+ SetType(SD, ELF::STT_TLS);
break;
case MCSA_ELF_TypeCommon:
- SD.setFlags(SD.getFlags() | ELF_STT_Common);
+ SetType(SD, ELF::STT_COMMON);
break;
case MCSA_ELF_TypeNoType:
- SD.setFlags(SD.getFlags() | ELF_STT_Notype);
+ SetType(SD, ELF::STT_NOTYPE);
break;
case MCSA_Protected:
- SD.setFlags(SD.getFlags() | ELF_STV_Protected);
+ SetVisibility(SD, ELF::STV_PROTECTED);
break;
case MCSA_Hidden:
- SD.setFlags(SD.getFlags() | ELF_STV_Hidden);
+ SetVisibility(SD, ELF::STV_HIDDEN);
break;
case MCSA_Internal:
- SD.setFlags(SD.getFlags() | ELF_STV_Internal);
+ SetVisibility(SD, ELF::STV_INTERNAL);
break;
}
}
@@ -237,24 +337,38 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- if ((SD.getFlags() & (0xf << ELF_STB_Shift)) == ELF_STB_Local) {
+ if (!BindingExplicitlySet.count(Symbol)) {
+ SetBinding(SD, ELF::STB_GLOBAL);
+ SD.setExternal(true);
+ }
+
+ SetType(SD, ELF::STT_OBJECT);
+
+ if (GetBinding(SD) == ELF_STB_Local) {
const MCSection *Section = getAssembler().getContext().getELFSection(".bss",
- MCSectionELF::SHT_NOBITS,
- MCSectionELF::SHF_WRITE |
- MCSectionELF::SHF_ALLOC,
+ ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
SectionKind::getBSS());
-
- MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section);
- MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
- SD.setFragment(F);
Symbol->setSection(*Section);
- SD.setSize(MCConstantExpr::Create(Size, getContext()));
+
+ struct LocalCommon L = {&SD, Size, ByteAlignment};
+ LocalCommons.push_back(L);
+ } else {
+ SD.setCommon(Size, ByteAlignment);
}
- SD.setFlags(SD.getFlags() | ELF_STB_Global);
- SD.setExternal(true);
+ SD.setSize(MCConstantExpr::Create(Size, getContext()));
+}
- SD.setCommon(Size, ByteAlignment);
+void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ // FIXME: Should this be caught and done earlier?
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SetBinding(SD, ELF::STB_LOCAL);
+ SD.setExternal(false);
+ BindingExplicitlySet.insert(Symbol);
+ // FIXME: ByteAlignment is not needed here, but is required.
+ EmitCommonSymbol(Symbol, Size, 1);
}
void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
@@ -263,25 +377,6 @@ void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
}
-void MCELFStreamer::EmitValue(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace) {
- // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
- // MCObjectStreamer.
- MCDataFragment *DF = getOrCreateDataFragment();
-
- // Avoid fixups when possible.
- int64_t AbsValue;
- if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
- // FIXME: Endianness assumption.
- for (unsigned i = 0; i != Size; ++i)
- DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
- } else {
- DF->addFixup(MCFixup::Create(DF->getContents().size(), AddValueSymbols(Value),
- MCFixup::getKindForSize(Size)));
- DF->getContents().resize(DF->getContents().size() + Size, 0);
- }
-}
-
void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
int64_t Value, unsigned ValueSize,
unsigned MaxBytesToEmit) {
@@ -312,18 +407,11 @@ void MCELFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
getCurrentSectionData()->setAlignment(ByteAlignment);
}
-void MCELFStreamer::EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value) {
- // TODO: This is exactly the same as MCMachOStreamer. Consider merging into
- // MCObjectStreamer.
- new MCOrgFragment(*Offset, Value, getCurrentSectionData());
-}
-
// Add a symbol for the file name of this module. This is the second
// entry in the module's symbol table (the first being the null symbol).
void MCELFStreamer::EmitFileDirective(StringRef Filename) {
MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
- Symbol->setSection(*CurSection);
+ Symbol->setSection(*getCurrentSection());
Symbol->setAbsolute();
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
@@ -331,21 +419,52 @@ void MCELFStreamer::EmitFileDirective(StringRef Filename) {
SD.setFlags(ELF_STT_File | ELF_STB_Local | ELF_STV_Default);
}
-void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) {
- MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
+ switch (expr->getKind()) {
+ case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
+ case MCExpr::Constant:
+ break;
- // Add the fixups and data.
- //
- // FIXME: Revisit this design decision when relaxation is done, we may be
- // able to get away with not storing any extra data in the MCInst.
- SmallVector<MCFixup, 4> Fixups;
- SmallString<256> Code;
- raw_svector_ostream VecOS(Code);
- getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
- VecOS.flush();
+ case MCExpr::Binary: {
+ const MCBinaryExpr *be = cast<MCBinaryExpr>(expr);
+ fixSymbolsInTLSFixups(be->getLHS());
+ fixSymbolsInTLSFixups(be->getRHS());
+ break;
+ }
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr &symRef = *cast<MCSymbolRefExpr>(expr);
+ switch (symRef.getKind()) {
+ default:
+ return;
+ case MCSymbolRefExpr::VK_NTPOFF:
+ case MCSymbolRefExpr::VK_GOTNTPOFF:
+ case MCSymbolRefExpr::VK_TLSGD:
+ case MCSymbolRefExpr::VK_TLSLDM:
+ case MCSymbolRefExpr::VK_TPOFF:
+ case MCSymbolRefExpr::VK_DTPOFF:
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ case MCSymbolRefExpr::VK_TLSLD:
+ case MCSymbolRefExpr::VK_ARM_TLSGD:
+ break;
+ }
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol());
+ SetType(SD, ELF::STT_TLS);
+ break;
+ }
+
+ case MCExpr::Unary:
+ fixSymbolsInTLSFixups(cast<MCUnaryExpr>(expr)->getSubExpr());
+ break;
+ }
+}
+
+void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) {
+ this->MCObjectStreamer::EmitInstToFragment(Inst);
+ MCInstFragment &F = *cast<MCInstFragment>(getCurrentFragment());
- IF->getCode() = Code;
- IF->getFixups() = Fixups;
+ for (unsigned i = 0, e = F.getFixups().size(); i != e; ++i)
+ fixSymbolsInTLSFixups(F.getFixups()[i].getValue());
}
void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
@@ -357,6 +476,9 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
VecOS.flush();
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+ fixSymbolsInTLSFixups(Fixups[i].getValue());
+
// Add the fixups and data.
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
@@ -365,44 +487,40 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
DF->getContents().append(Code.begin(), Code.end());
}
-void MCELFStreamer::EmitInstruction(const MCInst &Inst) {
- // Scan for values.
- for (unsigned i = 0; i != Inst.getNumOperands(); ++i)
- if (Inst.getOperand(i).isExpr())
- AddValueSymbols(Inst.getOperand(i).getExpr());
+void MCELFStreamer::Finish() {
+ if (getNumFrameInfos())
+ MCDwarfFrameEmitter::Emit(*this);
- getCurrentSectionData()->setHasInstructions(true);
+ for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
+ e = LocalCommons.end();
+ i != e; ++i) {
+ MCSymbolData *SD = i->SD;
+ uint64_t Size = i->Size;
+ unsigned ByteAlignment = i->ByteAlignment;
+ const MCSymbol &Symbol = SD->getSymbol();
+ const MCSection &Section = Symbol.getSection();
- // If this instruction doesn't need relaxation, just emit it as data.
- if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
- EmitInstToData(Inst);
- return;
- }
+ MCSectionData &SectData = getAssembler().getOrCreateSectionData(Section);
+ new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &SectData);
- // Otherwise, if we are relaxing everything, relax the instruction as much as
- // possible and emit it as data.
- if (getAssembler().getRelaxAll()) {
- MCInst Relaxed;
- getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
- while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
- getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
- EmitInstToData(Relaxed);
- return;
- }
+ MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+ SD->setFragment(F);
- // Otherwise emit to a separate fragment.
- EmitInstToFragment(Inst);
-}
+ // Update the maximum alignment of the section if necessary.
+ if (ByteAlignment > SectData.getAlignment())
+ SectData.setAlignment(ByteAlignment);
+ }
-void MCELFStreamer::Finish() {
- getAssembler().Finish();
+ this->MCObjectStreamer::Finish();
}
MCStreamer *llvm::createELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *CE,
- bool RelaxAll) {
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll, bool NoExecStack) {
MCELFStreamer *S = new MCELFStreamer(Context, TAB, OS, CE);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
+ if (NoExecStack)
+ S->getAssembler().setNoExecStack(true);
return S;
}
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
index 343f334..54d3743 100644
--- a/contrib/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -38,21 +38,31 @@ void MCExpr::print(raw_ostream &OS) const {
case MCExpr::SymbolRef: {
const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
const MCSymbol &Sym = SRE.getSymbol();
+ // Parenthesize names that start with $ so that they don't look like
+ // absolute names.
+ bool UseParens = Sym.getName()[0] == '$';
- if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_HI16 ||
- SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16)
+ if (SRE.getKind() == MCSymbolRefExpr::VK_PPC_HA16 ||
+ SRE.getKind() == MCSymbolRefExpr::VK_PPC_LO16) {
OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+ UseParens = true;
+ }
- // Parenthesize names that start with $ so that they don't look like
- // absolute names.
- if (Sym.getName()[0] == '$')
+ if (UseParens)
OS << '(' << Sym << ')';
else
OS << Sym;
- if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
- SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 &&
- SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16)
+ if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_PLT ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_TLSGD ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOT ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF)
+ OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+ else if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
+ SRE.getKind() != MCSymbolRefExpr::VK_PPC_HA16 &&
+ SRE.getKind() != MCSymbolRefExpr::VK_PPC_LO16)
OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
return;
@@ -172,12 +182,23 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_GOTTPOFF: return "GOTTPOFF";
case VK_INDNTPOFF: return "INDNTPOFF";
case VK_NTPOFF: return "NTPOFF";
+ case VK_GOTNTPOFF: return "GOTNTPOFF";
case VK_PLT: return "PLT";
case VK_TLSGD: return "TLSGD";
+ case VK_TLSLD: return "TLSLD";
+ case VK_TLSLDM: return "TLSLDM";
case VK_TPOFF: return "TPOFF";
- case VK_ARM_HI16: return ":upper16:";
- case VK_ARM_LO16: return ":lower16:";
+ case VK_DTPOFF: return "DTPOFF";
case VK_TLVP: return "TLVP";
+ case VK_ARM_PLT: return "(PLT)";
+ case VK_ARM_GOT: return "(GOT)";
+ case VK_ARM_GOTOFF: return "(GOTOFF)";
+ case VK_ARM_TPOFF: return "(tpoff)";
+ case VK_ARM_GOTTPOFF: return "(gottpoff)";
+ case VK_ARM_TLSGD: return "(tlsgd)";
+ case VK_PPC_TOC: return "toc";
+ case VK_PPC_HA16: return "ha16";
+ case VK_PPC_LO16: return "lo16";
}
}
@@ -185,15 +206,33 @@ MCSymbolRefExpr::VariantKind
MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
return StringSwitch<VariantKind>(Name)
.Case("GOT", VK_GOT)
+ .Case("got", VK_GOT)
.Case("GOTOFF", VK_GOTOFF)
+ .Case("gotoff", VK_GOTOFF)
.Case("GOTPCREL", VK_GOTPCREL)
+ .Case("gotpcrel", VK_GOTPCREL)
.Case("GOTTPOFF", VK_GOTTPOFF)
+ .Case("gottpoff", VK_GOTTPOFF)
.Case("INDNTPOFF", VK_INDNTPOFF)
+ .Case("indntpoff", VK_INDNTPOFF)
.Case("NTPOFF", VK_NTPOFF)
+ .Case("ntpoff", VK_NTPOFF)
+ .Case("GOTNTPOFF", VK_GOTNTPOFF)
+ .Case("gotntpoff", VK_GOTNTPOFF)
.Case("PLT", VK_PLT)
+ .Case("plt", VK_PLT)
.Case("TLSGD", VK_TLSGD)
+ .Case("tlsgd", VK_TLSGD)
+ .Case("TLSLD", VK_TLSLD)
+ .Case("tlsld", VK_TLSLD)
+ .Case("TLSLDM", VK_TLSLDM)
+ .Case("tlsldm", VK_TLSLDM)
.Case("TPOFF", VK_TPOFF)
+ .Case("tpoff", VK_TPOFF)
+ .Case("DTPOFF", VK_DTPOFF)
+ .Case("dtpoff", VK_DTPOFF)
.Case("TLVP", VK_TLVP)
+ .Case("tlvp", VK_TLVP)
.Default(VK_Invalid);
}
@@ -203,7 +242,28 @@ void MCTargetExpr::Anchor() {}
/* *** */
-bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const {
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res) const {
+ return EvaluateAsAbsolute(Res, 0, 0, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+ const MCAsmLayout &Layout) const {
+ return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+ const MCAsmLayout &Layout,
+ const SectionAddrMap &Addrs) const {
+ return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
+ return EvaluateAsAbsolute(Res, &Asm, 0, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs) const {
MCValue Value;
// Fast path constants.
@@ -212,37 +272,159 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const {
return true;
}
- if (!EvaluateAsRelocatable(Value, Layout) || !Value.isAbsolute())
- return false;
+ // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us
+ // absolutize differences across sections and that is what the MachO writer
+ // uses Addrs for.
+ bool IsRelocatable =
+ EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs, /*InSet*/ Addrs);
+ // Record the current value.
Res = Value.getConstant();
- return true;
+
+ return IsRelocatable && Value.isAbsolute();
+}
+
+/// \brief Helper method for \see EvaluateSymbolAdd().
+static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs,
+ bool InSet,
+ const MCSymbolRefExpr *&A,
+ const MCSymbolRefExpr *&B,
+ int64_t &Addend) {
+ if (!A || !B)
+ return;
+
+ const MCSymbol &SA = A->getSymbol();
+ const MCSymbol &SB = B->getSymbol();
+
+ if (SA.isUndefined() || SB.isUndefined())
+ return;
+
+ if (!Asm->getWriter().IsSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
+ return;
+
+ MCSymbolData &AD = Asm->getSymbolData(SA);
+ MCSymbolData &BD = Asm->getSymbolData(SB);
+
+ if (AD.getFragment() == BD.getFragment()) {
+ Addend += (AD.getOffset() - BD.getOffset());
+
+ // Clear the symbol expr pointers to indicate we have folded these
+ // operands.
+ A = B = 0;
+ return;
+ }
+
+ if (!Layout)
+ return;
+
+ const MCSectionData &SecA = *AD.getFragment()->getParent();
+ const MCSectionData &SecB = *BD.getFragment()->getParent();
+
+ if ((&SecA != &SecB) && !Addrs)
+ return;
+
+ // Eagerly evaluate.
+ Addend += (Layout->getSymbolOffset(&Asm->getSymbolData(A->getSymbol())) -
+ Layout->getSymbolOffset(&Asm->getSymbolData(B->getSymbol())));
+ if (Addrs && (&SecA != &SecB))
+ Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
+
+ // Clear the symbol expr pointers to indicate we have folded these
+ // operands.
+ A = B = 0;
}
-static bool EvaluateSymbolicAdd(const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
+/// \brief Evaluate the result of an add between (conceptually) two MCValues.
+///
+/// This routine conceptually attempts to construct an MCValue:
+/// Result = (Result_A - Result_B + Result_Cst)
+/// from two MCValue's LHS and RHS where
+/// Result = LHS + RHS
+/// and
+/// Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+///
+/// This routine attempts to aggresively fold the operands such that the result
+/// is representable in an MCValue, but may not always succeed.
+///
+/// \returns True on success, false if the result is not representable in an
+/// MCValue.
+
+/// NOTE: It is really important to have both the Asm and Layout arguments.
+/// They might look redundant, but this function can be used before layout
+/// is done (see the object streamer for example) and having the Asm argument
+/// lets us avoid relaxations early.
+static bool EvaluateSymbolicAdd(const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs,
+ bool InSet,
+ const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst,
MCValue &Res) {
- // We can't add or subtract two symbols.
- if ((LHS.getSymA() && RHS_A) ||
- (LHS.getSymB() && RHS_B))
+ // FIXME: This routine (and other evaluation parts) are *incredibly* sloppy
+ // about dealing with modifiers. This will ultimately bite us, one day.
+ const MCSymbolRefExpr *LHS_A = LHS.getSymA();
+ const MCSymbolRefExpr *LHS_B = LHS.getSymB();
+ int64_t LHS_Cst = LHS.getConstant();
+
+ // Fold the result constant immediately.
+ int64_t Result_Cst = LHS_Cst + RHS_Cst;
+
+ assert((!Layout || Asm) &&
+ "Must have an assembler object if layout is given!");
+
+ // If we have a layout, we can fold resolved differences.
+ if (Asm) {
+ // First, fold out any differences which are fully resolved. By
+ // reassociating terms in
+ // Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+ // we have the four possible differences:
+ // (LHS_A - LHS_B),
+ // (LHS_A - RHS_B),
+ // (RHS_A - LHS_B),
+ // (RHS_A - RHS_B).
+ // Since we are attempting to be as aggresive as possible about folding, we
+ // attempt to evaluate each possible alternative.
+ AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, LHS_B,
+ Result_Cst);
+ AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, RHS_B,
+ Result_Cst);
+ AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, LHS_B,
+ Result_Cst);
+ AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, RHS_B,
+ Result_Cst);
+ }
+
+ // We can't represent the addition or subtraction of two symbols.
+ if ((LHS_A && RHS_A) || (LHS_B && RHS_B))
return false;
- const MCSymbolRefExpr *A = LHS.getSymA() ? LHS.getSymA() : RHS_A;
- const MCSymbolRefExpr *B = LHS.getSymB() ? LHS.getSymB() : RHS_B;
- if (B) {
- // If we have a negated symbol, then we must have also have a non-negated
- // symbol in order to encode the expression. We can do this check later to
- // permit expressions which eventually fold to a representable form -- such
- // as (a + (0 - b)) -- if necessary.
- if (!A)
- return false;
- }
- Res = MCValue::get(A, B, LHS.getConstant() + RHS_Cst);
+ // At this point, we have at most one additive symbol and one subtractive
+ // symbol -- find them.
+ const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A;
+ const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B;
+
+ // If we have a negated symbol, then we must have also have a non-negated
+ // symbol in order to encode the expression.
+ if (B && !A)
+ return false;
+
+ Res = MCValue::get(A, B, Result_Cst);
return true;
}
bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
- const MCAsmLayout *Layout) const {
+ const MCAsmLayout &Layout) const {
+ return EvaluateAsRelocatableImpl(Res, &Layout.getAssembler(), &Layout,
+ 0, false);
+}
+
+bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAssembler *Asm,
+ const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs,
+ bool InSet) const {
++stats::MCExprEvaluate;
switch (getKind()) {
@@ -258,26 +440,15 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
const MCSymbol &Sym = SRE->getSymbol();
// Evaluate recursively if this is a variable.
- if (Sym.isVariable()) {
- if (!Sym.getVariableValue()->EvaluateAsRelocatable(Res, Layout))
- return false;
-
- // Absolutize symbol differences between defined symbols when we have a
- // layout object and the target requests it.
- if (Layout && Res.getSymB() &&
- Layout->getAssembler().getBackend().hasAbsolutizedSet() &&
- Res.getSymA()->getSymbol().isDefined() &&
- Res.getSymB()->getSymbol().isDefined()) {
- MCSymbolData &A =
- Layout->getAssembler().getSymbolData(Res.getSymA()->getSymbol());
- MCSymbolData &B =
- Layout->getAssembler().getSymbolData(Res.getSymB()->getSymbol());
- Res = MCValue::get(+ Layout->getSymbolAddress(&A)
- - Layout->getSymbolAddress(&B)
- + Res.getConstant());
- }
-
- return true;
+ if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None) {
+ bool Ret = Sym.getVariableValue()->EvaluateAsRelocatableImpl(Res, Asm,
+ Layout,
+ Addrs,
+ true);
+ // If we failed to simplify this to a constant, let the target
+ // handle it.
+ if (Ret && !Res.getSymA() && !Res.getSymB())
+ return true;
}
Res = MCValue::get(SRE, 0, 0);
@@ -288,7 +459,8 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
MCValue Value;
- if (!AUE->getSubExpr()->EvaluateAsRelocatable(Value, Layout))
+ if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout,
+ Addrs, InSet))
return false;
switch (AUE->getOpcode()) {
@@ -321,8 +493,10 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
MCValue LHSValue, RHSValue;
- if (!ABE->getLHS()->EvaluateAsRelocatable(LHSValue, Layout) ||
- !ABE->getRHS()->EvaluateAsRelocatable(RHSValue, Layout))
+ if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout,
+ Addrs, InSet) ||
+ !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout,
+ Addrs, InSet))
return false;
// We only support a few operations on non-constant expressions, handle
@@ -333,13 +507,13 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
return false;
case MCBinaryExpr::Sub:
// Negate RHS and add.
- return EvaluateSymbolicAdd(LHSValue,
+ return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
RHSValue.getSymB(), RHSValue.getSymA(),
-RHSValue.getConstant(),
Res);
case MCBinaryExpr::Add:
- return EvaluateSymbolicAdd(LHSValue,
+ return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
RHSValue.getSymA(), RHSValue.getSymB(),
RHSValue.getConstant(),
Res);
diff --git a/contrib/llvm/lib/MC/MCLoggingStreamer.cpp b/contrib/llvm/lib/MC/MCLoggingStreamer.cpp
index b96040a..012c7f6 100644
--- a/contrib/llvm/lib/MC/MCLoggingStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCLoggingStreamer.cpp
@@ -48,10 +48,14 @@ public:
return Child->AddBlankLine();
}
- virtual void SwitchSection(const MCSection *Section) {
- CurSection = Section;
- LogCall("SwitchSection");
- return Child->SwitchSection(Section);
+ virtual void ChangeSection(const MCSection *Section) {
+ LogCall("ChangeSection");
+ return Child->ChangeSection(Section);
+ }
+
+ virtual void InitSections() {
+ LogCall("InitSections");
+ return Child->InitSections();
}
virtual void EmitLabel(MCSymbol *Symbol) {
@@ -64,11 +68,28 @@ public:
return Child->EmitAssemblerFlag(Flag);
}
+ virtual void EmitThumbFunc(MCSymbol *Func) {
+ LogCall("EmitThumbFunc");
+ return Child->EmitThumbFunc(Func);
+ }
+
virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
LogCall("EmitAssignment");
return Child->EmitAssignment(Symbol, Value);
}
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+ LogCall("EmitWeakReference");
+ return Child->EmitWeakReference(Alias, Symbol);
+ }
+
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label) {
+ LogCall("EmitDwarfAdvanceLineAddr");
+ return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label);
+ }
+
virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
LogCall("EmitSymbolAttribute");
return Child->EmitSymbolAttribute(Symbol, Attribute);
@@ -132,14 +153,22 @@ public:
return Child->EmitBytes(Data, AddrSpace);
}
- virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace){
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ bool isPCRel, unsigned AddrSpace){
LogCall("EmitValue");
- return Child->EmitValue(Value, Size, AddrSpace);
+ return Child->EmitValueImpl(Value, Size, isPCRel, AddrSpace);
+ }
+
+ virtual void EmitULEB128Value(const MCExpr *Value,
+ unsigned AddrSpace = 0) {
+ LogCall("EmitULEB128Value");
+ return Child->EmitULEB128Value(Value, AddrSpace);
}
- virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace) {
- LogCall("EmitIntValue");
- return Child->EmitIntValue(Value, Size, AddrSpace);
+ virtual void EmitSLEB128Value(const MCExpr *Value,
+ unsigned AddrSpace = 0) {
+ LogCall("EmitSLEB128Value");
+ return Child->EmitSLEB128Value(Value, AddrSpace);
}
virtual void EmitGPRel32Value(const MCExpr *Value) {
@@ -178,12 +207,23 @@ public:
return Child->EmitFileDirective(Filename);
}
- virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
LogCall("EmitDwarfFileDirective",
"FileNo:" + Twine(FileNo) + " Filename:" + Filename);
return Child->EmitDwarfFileDirective(FileNo, Filename);
}
+ virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa, unsigned Discriminator) {
+ LogCall("EmitDwarfLocDirective",
+ "FileNo:" + Twine(FileNo) + " Line:" + Twine(Line) +
+ " Column:" + Twine(Column) + " Flags:" + Twine(Flags) +
+ " Isa:" + Twine(Isa) + " Discriminator:" + Twine(Discriminator));
+ return Child->EmitDwarfLocDirective(FileNo, Line, Column, Flags,
+ Isa, Discriminator);
+ }
+
virtual void EmitInstruction(const MCInst &Inst) {
LogCall("EmitInstruction");
return Child->EmitInstruction(Inst);
diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
index 671874d..d1f9f5c 100644
--- a/contrib/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
@@ -20,9 +20,11 @@
#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
using namespace llvm;
@@ -30,13 +32,7 @@ namespace {
class MCMachOStreamer : public MCObjectStreamer {
private:
- void EmitInstToFragment(const MCInst &Inst);
- void EmitInstToData(const MCInst &Inst);
- // FIXME: These will likely moved to a better place.
- void MakeLineEntryForSection(const MCSection *Section);
- const MCExpr * MakeStartMinusEndExpr(MCSymbol *Start, MCSymbol *End,
- int IntVal);
- void EmitDwarfFileTable(void);
+ virtual void EmitInstToData(const MCInst &Inst);
public:
MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
@@ -46,8 +42,10 @@ public:
/// @name MCStreamer Interface
/// @{
+ virtual void InitSections();
virtual void EmitLabel(MCSymbol *Symbol);
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitThumbFunc(MCSymbol *Func);
virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
@@ -76,17 +74,11 @@ public:
virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment = 0);
virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
- virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
- virtual void EmitGPRel32Value(const MCExpr *Value) {
- assert(0 && "macho doesn't support this directive");
- }
virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0);
virtual void EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit = 0);
- virtual void EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value = 0);
virtual void EmitFileDirective(StringRef Filename) {
// FIXME: Just ignore the .file; it isn't important enough to fail the
@@ -94,14 +86,6 @@ public:
//report_fatal_error("unsupported directive: '.file'");
}
- virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
- // FIXME: Just ignore the .file; it isn't important enough to fail the
- // entire assembly.
-
- //report_fatal_error("unsupported directive: '.file'");
- }
-
- virtual void EmitInstruction(const MCInst &Inst);
virtual void Finish();
@@ -110,31 +94,26 @@ public:
} // end anonymous namespace.
-void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
- // TODO: This is almost exactly the same as WinCOFFStreamer. Consider merging
- // into MCObjectStreamer.
- assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
- assert(CurSection && "Cannot emit before setting section!");
+void MCMachOStreamer::InitSections() {
+ SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 0, SectionKind::getText()));
- Symbol->setSection(*CurSection);
+}
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ // isSymbolLinkerVisible uses the section.
+ Symbol->setSection(*getCurrentSection());
// We have to create a new fragment if this is an atom defining symbol,
// fragments cannot span atoms.
- if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
+ if (getAssembler().isSymbolLinkerVisible(*Symbol))
new MCDataFragment(getCurrentSectionData());
- // FIXME: This is wasteful, we don't necessarily need to create a data
- // fragment. Instead, we should mark the symbol as pointing into the data
- // fragment if it exists, otherwise we should just queue the label and set its
- // fragment pointer when we emit the next fragment.
- MCDataFragment *F = getOrCreateDataFragment();
- assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
- SD.setFragment(F);
- SD.setOffset(F->getContents().size());
+ MCObjectStreamer::EmitLabel(Symbol);
+ MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
// This causes the reference type flag to be cleared. Darwin 'as' was "trying"
// to clear the weak reference and weak definition bits too, but the
// implementation was buggy. For now we just try to match 'as', for
@@ -146,13 +125,31 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
}
void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ // Let the target do whatever target specific stuff it needs to do.
+ getAssembler().getBackend().HandleAssemblerFlag(Flag);
+ // Do any generic stuff we need to do.
switch (Flag) {
+ case MCAF_SyntaxUnified: return; // no-op here.
+ case MCAF_Code16: return; // no-op here.
+ case MCAF_Code32: return; // no-op here.
case MCAF_SubsectionsViaSymbols:
getAssembler().setSubsectionsViaSymbols(true);
return;
+ default:
+ llvm_unreachable("invalid assembler flag!");
}
+}
+
+void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
+ // FIXME: Flag the function ISA as thumb with DW_AT_APPLE_isa.
- assert(0 && "invalid assembler flag!");
+ // Remember that the function is a thumb function. Fixup and relocation
+ // values will need adjusted.
+ getAssembler().setIsThumbFunc(Symbol);
+
+ // Mark the thumb bit on the symbol.
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.setFlags(SD.getFlags() | SF_ThumbFunc);
}
void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
@@ -196,6 +193,7 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_ELF_TypeTLS:
case MCSA_ELF_TypeCommon:
case MCSA_ELF_TypeNoType:
+ case MCSA_ELF_TypeGnuUniqueObject:
case MCSA_IndirectSymbol:
case MCSA_Hidden:
case MCSA_Internal:
@@ -230,6 +228,10 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
break;
+ case MCSA_SymbolResolver:
+ SD.setFlags(SD.getFlags() | SF_SymbolResolver);
+ break;
+
case MCSA_PrivateExtern:
SD.setExternal(true);
SD.setPrivateExtern(true);
@@ -313,26 +315,6 @@ void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
}
-void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace) {
- // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
- // MCObjectStreamer.
- MCDataFragment *DF = getOrCreateDataFragment();
-
- // Avoid fixups when possible.
- int64_t AbsValue;
- if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
- // FIXME: Endianness assumption.
- for (unsigned i = 0; i != Size; ++i)
- DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
- } else {
- DF->addFixup(MCFixup::Create(DF->getContents().size(),
- AddValueSymbols(Value),
- MCFixup::getKindForSize(Size)));
- DF->getContents().resize(DF->getContents().size() + Size, 0);
- }
-}
-
void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
int64_t Value, unsigned ValueSize,
unsigned MaxBytesToEmit) {
@@ -363,28 +345,6 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
getCurrentSectionData()->setAlignment(ByteAlignment);
}
-void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value) {
- new MCOrgFragment(*Offset, Value, getCurrentSectionData());
-}
-
-void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) {
- MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
-
- // Add the fixups and data.
- //
- // FIXME: Revisit this design decision when relaxation is done, we may be
- // able to get away with not storing any extra data in the MCInst.
- SmallVector<MCFixup, 4> Fixups;
- SmallString<256> Code;
- raw_svector_ostream VecOS(Code);
- getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
- VecOS.flush();
-
- IF->getCode() = Code;
- IF->getFixups() = Fixups;
-}
-
void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
MCDataFragment *DF = getOrCreateDataFragment();
@@ -402,240 +362,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
DF->getContents().append(Code.begin(), Code.end());
}
-void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
- // Scan for values.
- for (unsigned i = Inst.getNumOperands(); i--; )
- if (Inst.getOperand(i).isExpr())
- AddValueSymbols(Inst.getOperand(i).getExpr());
-
- getCurrentSectionData()->setHasInstructions(true);
-
- // Now that a machine instruction has been assembled into this section, make
- // a line entry for any .loc directive that has been seen.
- MakeLineEntryForSection(getCurrentSection());
-
- // If this instruction doesn't need relaxation, just emit it as data.
- if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
- EmitInstToData(Inst);
- return;
- }
-
- // Otherwise, if we are relaxing everything, relax the instruction as much as
- // possible and emit it as data.
- if (getAssembler().getRelaxAll()) {
- MCInst Relaxed;
- getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
- while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
- getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
- EmitInstToData(Relaxed);
- return;
- }
-
- // Otherwise emit to a separate fragment.
- EmitInstToFragment(Inst);
-}
-
-//
-// This is called when an instruction is assembled into the specified section
-// and if there is information from the last .loc directive that has yet to have
-// a line entry made for it is made.
-//
-void MCMachOStreamer::MakeLineEntryForSection(const MCSection *Section) {
- if (!getContext().getDwarfLocSeen())
- return;
-
- // Create a symbol at in the current section for use in the line entry.
- MCSymbol *LineSym = getContext().CreateTempSymbol();
- // Set the value of the symbol to use for the MCLineEntry.
- EmitLabel(LineSym);
-
- // Get the current .loc info saved in the context.
- const MCDwarfLoc &DwarfLoc = getContext().getCurrentDwarfLoc();
-
- // Create a (local) line entry with the symbol and the current .loc info.
- MCLineEntry LineEntry(LineSym, DwarfLoc);
-
- // clear DwarfLocSeen saying the current .loc info is now used.
- getContext().clearDwarfLocSeen();
-
- // Get the MCLineSection for this section, if one does not exist for this
- // section create it.
- DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
- getContext().getMCLineSections();
- MCLineSection *LineSection = MCLineSections[Section];
- if (!LineSection) {
- // Create a new MCLineSection. This will be deleted after the dwarf line
- // table is created using it by iterating through the MCLineSections
- // DenseMap.
- LineSection = new MCLineSection;
- // Save a pointer to the new LineSection into the MCLineSections DenseMap.
- MCLineSections[Section] = LineSection;
- }
-
- // Add the line entry to this section's entries.
- LineSection->addLineEntry(LineEntry);
-}
-
-//
-// This helper routine returns an expression of End - Start + IntVal for use
-// by EmitDwarfFileTable() below.
-//
-const MCExpr * MCMachOStreamer::MakeStartMinusEndExpr(MCSymbol *Start,
- MCSymbol *End,
- int IntVal) {
- MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- const MCExpr *Res =
- MCSymbolRefExpr::Create(End, Variant, getContext());
- const MCExpr *RHS =
- MCSymbolRefExpr::Create(Start, Variant, getContext());
- const MCExpr *Res1 =
- MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS,getContext());
- const MCExpr *Res2 =
- MCConstantExpr::Create(IntVal, getContext());
- const MCExpr *Res3 =
- MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, getContext());
- return Res3;
-}
-
-//
-// This emits the Dwarf file (and eventually the line) table.
-//
-void MCMachOStreamer::EmitDwarfFileTable(void) {
- // For now make sure we don't put out the Dwarf file table if no .file
- // directives were seen.
- const std::vector<MCDwarfFile *> &MCDwarfFiles =
- getContext().getMCDwarfFiles();
- if (MCDwarfFiles.size() == 0)
- return;
-
- // This is the Mach-O section, for ELF it is the .debug_line section.
- SwitchSection(getContext().getMachOSection("__DWARF", "__debug_line",
- MCSectionMachO::S_ATTR_DEBUG,
- 0, SectionKind::getDataRelLocal()));
-
- // Create a symbol at the beginning of this section.
- MCSymbol *LineStartSym = getContext().CreateTempSymbol();
- // Set the value of the symbol, as we are at the start of the section.
- EmitLabel(LineStartSym);
-
- // Create a symbol for the end of the section (to be set when we get there).
- MCSymbol *LineEndSym = getContext().CreateTempSymbol();
-
- // The first 4 bytes is the total length of the information for this
- // compilation unit (not including these 4 bytes for the length).
- EmitValue(MakeStartMinusEndExpr(LineStartSym, LineEndSym, 4), 4, 0);
-
- // Next 2 bytes is the Version, which is Dwarf 2.
- EmitIntValue(2, 2);
-
- // Create a symbol for the end of the prologue (to be set when we get there).
- MCSymbol *ProEndSym = getContext().CreateTempSymbol(); // Lprologue_end
-
- // Length of the prologue, is the next 4 bytes. Which is the start of the
- // section to the end of the prologue. Not including the 4 bytes for the
- // total length, the 2 bytes for the version, and these 4 bytes for the
- // length of the prologue.
- EmitValue(MakeStartMinusEndExpr(LineStartSym, ProEndSym, (4 + 2 + 4)), 4, 0);
-
- // Parameters of the state machine, are next.
- // Define the architecture-dependent minimum instruction length (in
- // bytes). This value should be rather too small than too big. */
- // DWARF2_LINE_MIN_INSN_LENGTH
- EmitIntValue(1, 1);
- // Flag that indicates the initial value of the is_stmt_start flag.
- // DWARF2_LINE_DEFAULT_IS_STMT
- EmitIntValue(1, 1);
- // Minimum line offset in a special line info. opcode. This value
- // was chosen to give a reasonable range of values. */
- // DWARF2_LINE_BASE
- EmitIntValue(uint64_t(-5), 1);
- // Range of line offsets in a special line info. opcode.
- // DWARF2_LINE_RANGE
- EmitIntValue(14, 1);
- // First special line opcode - leave room for the standard opcodes.
- // DWARF2_LINE_OPCODE_BASE
- EmitIntValue(13, 1);
-
- // Standard opcode lengths
- EmitIntValue(0, 1); // length of DW_LNS_copy
- EmitIntValue(1, 1); // length of DW_LNS_advance_pc
- EmitIntValue(1, 1); // length of DW_LNS_advance_line
- EmitIntValue(1, 1); // length of DW_LNS_set_file
- EmitIntValue(1, 1); // length of DW_LNS_set_column
- EmitIntValue(0, 1); // length of DW_LNS_negate_stmt
- EmitIntValue(0, 1); // length of DW_LNS_set_basic_block
- EmitIntValue(0, 1); // length of DW_LNS_const_add_pc
- EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc
- EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end
- EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin
- EmitIntValue(1, 1); // DW_LNS_set_isa
-
- // Put out the directory and file tables.
-
- // First the directory table.
- const std::vector<StringRef> &MCDwarfDirs =
- getContext().getMCDwarfDirs();
- for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
- EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName
- EmitBytes(StringRef("\0", 1), 0); // the null termination of the string
- }
- EmitIntValue(0, 1); // Terminate the directory list
-
- // Second the file table.
- for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
- EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName
- EmitBytes(StringRef("\0", 1), 0); // the null termination of the string
- // FIXME the Directory number should be a .uleb128 not a .byte
- EmitIntValue(MCDwarfFiles[i]->getDirIndex(), 1);
- EmitIntValue(0, 1); // last modification timestamp (always 0)
- EmitIntValue(0, 1); // filesize (always 0)
- }
- EmitIntValue(0, 1); // Terminate the file list
-
- // This is the end of the prologue, so set the value of the symbol at the
- // end of the prologue (that was used in a previous expression).
- EmitLabel(ProEndSym);
-
- // TODO: This is the point where the line tables would be emitted.
-
- // Delete the MCLineSections that were created in
- // MCMachOStreamer::MakeLineEntryForSection() and used to emit the line
- // tables.
- DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
- getContext().getMCLineSections();
- for (DenseMap<const MCSection *, MCLineSection *>::iterator it =
- MCLineSections.begin(), ie = MCLineSections.end(); it != ie; ++it) {
- delete it->second;
- }
-
- // If there are no line tables emited then we emit:
- // The following DW_LNE_set_address sequence to set the address to zero
- // TODO test for 32-bit or 64-bit output
- // This is the sequence for 32-bit code
- EmitIntValue(0, 1);
- EmitIntValue(5, 1);
- EmitIntValue(2, 1);
- EmitIntValue(0, 1);
- EmitIntValue(0, 1);
- EmitIntValue(0, 1);
- EmitIntValue(0, 1);
-
- // Lastly emit the DW_LNE_end_sequence which consists of 3 bytes '00 01 01'
- // (00 is the code for extended opcodes, followed by a ULEB128 length of the
- // extended opcode (01), and the DW_LNE_end_sequence (01).
- EmitIntValue(0, 1); // DW_LNS_extended_op
- EmitIntValue(1, 1); // ULEB128 length of the extended opcode
- EmitIntValue(1, 1); // DW_LNE_end_sequence
-
- // This is the end of the section, so set the value of the symbol at the end
- // of this section (that was used in a previous expression).
- EmitLabel(LineEndSym);
-}
-
void MCMachOStreamer::Finish() {
- // Dump out the dwarf file and directory tables (soon to include line table)
- EmitDwarfFileTable();
-
// We have to set the fragment atom associations so we can relax properly for
// Mach-O.
diff --git a/contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp
new file mode 100644
index 0000000..146cebf
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCMachObjectTargetWriter.cpp
@@ -0,0 +1,22 @@
+//===-- MCMachObjectTargetWriter.cpp - Mach-O Target Writer Subclass ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCMachObjectWriter.h"
+
+using namespace llvm;
+
+MCMachObjectTargetWriter::MCMachObjectTargetWriter(
+ bool Is64Bit_, uint32_t CPUType_, uint32_t CPUSubtype_,
+ bool UseAggressiveSymbolFolding_)
+ : Is64Bit(Is64Bit_), CPUType(CPUType_), CPUSubtype(CPUSubtype_),
+ UseAggressiveSymbolFolding(UseAggressiveSymbolFolding_) {
+}
+
+MCMachObjectTargetWriter::~MCMachObjectTargetWriter() {
+}
diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp
index f7a2f20..08ddf01 100644
--- a/contrib/llvm/lib/MC/MCNullStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp
@@ -25,20 +25,26 @@ namespace {
/// @name MCStreamer Interface
/// @{
- virtual void SwitchSection(const MCSection *Section) {
- PrevSection = CurSection;
- CurSection = Section;
+ virtual void InitSections() {
+ }
+
+ virtual void ChangeSection(const MCSection *Section) {
}
virtual void EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- assert(CurSection && "Cannot emit before setting section!");
- Symbol->setSection(*CurSection);
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+ Symbol->setSection(*getCurrentSection());
}
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+ virtual void EmitThumbFunc(MCSymbol *Func) {}
virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol){}
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label) {}
virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){}
@@ -60,8 +66,12 @@ namespace {
uint64_t Size, unsigned ByteAlignment) {}
virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {}
- virtual void EmitValue(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace) {}
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ bool isPCRel, unsigned AddrSpace) {}
+ virtual void EmitULEB128Value(const MCExpr *Value,
+ unsigned AddrSpace = 0) {}
+ virtual void EmitSLEB128Value(const MCExpr *Value,
+ unsigned AddrSpace = 0) {}
virtual void EmitGPRel32Value(const MCExpr *Value) {}
virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
unsigned ValueSize = 1,
@@ -74,7 +84,12 @@ namespace {
unsigned char Value = 0) {}
virtual void EmitFileDirective(StringRef Filename) {}
- virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {}
+ virtual bool EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {
+ return false;
+ }
+ virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa, unsigned Discriminator) {}
virtual void EmitInstruction(const MCInst &Inst) {}
virtual void Finish() {}
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
index 2b2385e..0358266 100644
--- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -7,19 +7,26 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
using namespace llvm;
MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
- raw_ostream &_OS, MCCodeEmitter *_Emitter)
- : MCStreamer(Context), Assembler(new MCAssembler(Context, TAB,
- *_Emitter, _OS)),
+ raw_ostream &OS, MCCodeEmitter *Emitter_)
+ : MCStreamer(Context),
+ Assembler(new MCAssembler(Context, TAB,
+ *Emitter_, *TAB.createObjectWriter(OS),
+ OS)),
CurSectionData(0)
{
}
@@ -27,6 +34,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
MCObjectStreamer::~MCObjectStreamer() {
delete &Assembler->getBackend();
delete &Assembler->getEmitter();
+ delete &Assembler->getWriter();
delete Assembler;
}
@@ -48,7 +56,10 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
switch (Value->getKind()) {
- case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
+ case MCExpr::Target:
+ cast<MCTargetExpr>(Value)->AddValueSymbols(Assembler);
+ break;
+
case MCExpr::Constant:
break;
@@ -71,17 +82,173 @@ const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
return Value;
}
-void MCObjectStreamer::SwitchSection(const MCSection *Section) {
- assert(Section && "Cannot switch to a null section!");
+void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ bool isPCRel, unsigned AddrSpace) {
+ assert(AddrSpace == 0 && "Address space must be 0!");
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ // Avoid fixups when possible.
+ int64_t AbsValue;
+ if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue, getAssembler())) {
+ EmitIntValue(AbsValue, Size, AddrSpace);
+ return;
+ }
+ DF->addFixup(MCFixup::Create(DF->getContents().size(),
+ Value,
+ MCFixup::getKindForSize(Size, isPCRel)));
+ DF->getContents().resize(DF->getContents().size() + Size, 0);
+}
+
+void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+
+ Symbol->setSection(*getCurrentSection());
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // FIXME: This is wasteful, we don't necessarily need to create a data
+ // fragment. Instead, we should mark the symbol as pointing into the data
+ // fragment if it exists, otherwise we should just queue the label and set its
+ // fragment pointer when we emit the next fragment.
+ MCDataFragment *F = getOrCreateDataFragment();
+ assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+ SD.setFragment(F);
+ SD.setOffset(F->getContents().size());
+}
+
+void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value,
+ unsigned AddrSpace) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+ EmitULEB128IntValue(IntValue, AddrSpace);
+ return;
+ }
+ new MCLEBFragment(*Value, false, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value,
+ unsigned AddrSpace) {
+ int64_t IntValue;
+ if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+ EmitSLEB128IntValue(IntValue, AddrSpace);
+ return;
+ }
+ new MCLEBFragment(*Value, true, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
+ const MCSymbol *Symbol) {
+ report_fatal_error("This file format doesn't support weak aliases.");
+}
- // If already in this section, then this is a noop.
- if (Section == CurSection) return;
+void MCObjectStreamer::ChangeSection(const MCSection *Section) {
+ assert(Section && "Cannot switch to a null section!");
- PrevSection = CurSection;
- CurSection = Section;
CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
}
+void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
+ // Scan for values.
+ for (unsigned i = Inst.getNumOperands(); i--; )
+ if (Inst.getOperand(i).isExpr())
+ AddValueSymbols(Inst.getOperand(i).getExpr());
+
+ getCurrentSectionData()->setHasInstructions(true);
+
+ // Now that a machine instruction has been assembled into this section, make
+ // a line entry for any .loc directive that has been seen.
+ MCLineEntry::Make(this, getCurrentSection());
+
+ // If this instruction doesn't need relaxation, just emit it as data.
+ if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
+ EmitInstToData(Inst);
+ return;
+ }
+
+ // Otherwise, if we are relaxing everything, relax the instruction as much as
+ // possible and emit it as data.
+ if (getAssembler().getRelaxAll()) {
+ MCInst Relaxed;
+ getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
+ while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
+ getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
+ EmitInstToData(Relaxed);
+ return;
+ }
+
+ // Otherwise emit to a separate fragment.
+ EmitInstToFragment(Inst);
+}
+
+void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
+ MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+
+ raw_svector_ostream VecOS(IF->getCode());
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, IF->getFixups());
+}
+
+static const MCExpr *BuildSymbolDiff(MCContext &Context,
+ const MCSymbol *A, const MCSymbol *B) {
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ const MCExpr *ARef =
+ MCSymbolRefExpr::Create(A, Variant, Context);
+ const MCExpr *BRef =
+ MCSymbolRefExpr::Create(B, Variant, Context);
+ const MCExpr *AddrDelta =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context);
+ return AddrDelta;
+}
+
+static const MCExpr *ForceExpAbs(MCObjectStreamer *Streamer,
+ MCContext &Context, const MCExpr* Expr) {
+ if (Context.getAsmInfo().hasAggressiveSymbolFolding())
+ return Expr;
+
+ MCSymbol *ABS = Context.CreateTempSymbol();
+ Streamer->EmitAssignment(ABS, Expr);
+ return MCSymbolRefExpr::Create(ABS, Context);
+}
+
+void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label) {
+ if (!LastLabel) {
+ int PointerSize = getContext().getTargetAsmInfo().getPointerSize();
+ EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
+ return;
+ }
+ const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+ int64_t Res;
+ if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+ MCDwarfLineAddr::Emit(this, LineDelta, Res);
+ return;
+ }
+ AddrDelta = ForceExpAbs(this, getContext(), AddrDelta);
+ new MCDwarfLineAddrFragment(LineDelta, *AddrDelta, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+ const MCSymbol *Label) {
+ const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+ int64_t Res;
+ if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+ MCDwarfFrameEmitter::EmitAdvanceLoc(*this, Res);
+ return;
+ }
+ AddrDelta = ForceExpAbs(this, getContext(), AddrDelta);
+ new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {
+ new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+}
+
void MCObjectStreamer::Finish() {
+ // Dump out the dwarf file & directory tables and line tables.
+ if (getContext().hasDwarfFiles())
+ MCDwarfFileTable::Emit(this);
+
getAssembler().Finish();
}
diff --git a/contrib/llvm/lib/MC/MCObjectWriter.cpp b/contrib/llvm/lib/MC/MCObjectWriter.cpp
index d117e82..efe9f68 100644
--- a/contrib/llvm/lib/MC/MCObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/MCObjectWriter.cpp
@@ -7,9 +7,74 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
using namespace llvm;
MCObjectWriter::~MCObjectWriter() {
}
+
+/// Utility function to encode a SLEB128 value.
+void MCObjectWriter::EncodeSLEB128(int64_t Value, raw_ostream &OS) {
+ bool More;
+ do {
+ uint8_t Byte = Value & 0x7f;
+ // NOTE: this assumes that this signed shift is an arithmetic right shift.
+ Value >>= 7;
+ More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) ||
+ ((Value == -1) && ((Byte & 0x40) != 0))));
+ if (More)
+ Byte |= 0x80; // Mark this byte that that more bytes will follow.
+ OS << char(Byte);
+ } while (More);
+}
+
+/// Utility function to encode a ULEB128 value.
+void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS) {
+ do {
+ uint8_t Byte = Value & 0x7f;
+ Value >>= 7;
+ if (Value != 0)
+ Byte |= 0x80; // Mark this byte that that more bytes will follow.
+ OS << char(Byte);
+ } while (Value != 0);
+}
+
+bool
+MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
+ const MCSymbolRefExpr *A,
+ const MCSymbolRefExpr *B,
+ bool InSet) const {
+ // Modified symbol references cannot be resolved.
+ if (A->getKind() != MCSymbolRefExpr::VK_None ||
+ B->getKind() != MCSymbolRefExpr::VK_None)
+ return false;
+
+ const MCSymbol &SA = A->getSymbol();
+ const MCSymbol &SB = B->getSymbol();
+ if (SA.AliasedSymbol().isUndefined() || SB.AliasedSymbol().isUndefined())
+ return false;
+
+ const MCSymbolData &DataA = Asm.getSymbolData(SA);
+ const MCSymbolData &DataB = Asm.getSymbolData(SB);
+
+ return IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA,
+ *DataB.getFragment(),
+ InSet,
+ false);
+}
+
+bool
+MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbolData &DataA,
+ const MCFragment &FB,
+ bool InSet,
+ bool IsPCRel) const {
+ const MCSection &SecA = DataA.getSymbol().AliasedSymbol().getSection();
+ const MCSection &SecB = FB.getParent()->getSection();
+ // On ELF and COFF A - B is absolute if A and B are in the same section.
+ return &SecA == &SecB;
+}
diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
index 086df08..89374d0 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -15,6 +15,7 @@
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/MC/MCAsmInfo.h"
+#include <cctype>
#include <cerrno>
#include <cstdio>
#include <cstdlib>
@@ -30,12 +31,12 @@ AsmLexer::~AsmLexer() {
void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
CurBuf = buf;
-
+
if (ptr)
CurPtr = ptr;
else
CurPtr = CurBuf->getBufferStart();
-
+
TokStart = 0;
}
@@ -43,7 +44,7 @@ void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
/// location. This is defined to always return AsmToken::Error.
AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
SetError(SMLoc::getFromPointer(Loc), Msg);
-
+
return AsmToken(AsmToken::Error, StringRef(Loc, 0));
}
@@ -57,23 +58,59 @@ int AsmLexer::getNextChar() {
// a random nul in the file. Disambiguate that here.
if (CurPtr-1 != CurBuf->getBufferEnd())
return 0; // Just whitespace.
-
+
// Otherwise, return end of file.
- --CurPtr; // Another call to lex will return EOF again.
+ --CurPtr; // Another call to lex will return EOF again.
return EOF;
}
}
+/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
+///
+/// The leading integral digit sequence and dot should have already been
+/// consumed, some or all of the fractional digit sequence *can* have been
+/// consumed.
+AsmToken AsmLexer::LexFloatLiteral() {
+ // Skip the fractional digit sequence.
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+
+ // Check for exponent; we intentionally accept a slighlty wider set of
+ // literals here and rely on the upstream client to reject invalid ones (e.g.,
+ // "1e+").
+ if (*CurPtr == 'e' || *CurPtr == 'E') {
+ ++CurPtr;
+ if (*CurPtr == '-' || *CurPtr == '+')
+ ++CurPtr;
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+ }
+
+ return AsmToken(AsmToken::Real,
+ StringRef(TokStart, CurPtr - TokStart));
+}
+
/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+static bool IsIdentifierChar(char c) {
+ return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
+}
AsmToken AsmLexer::LexIdentifier() {
- while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
- *CurPtr == '.' || *CurPtr == '@')
+ // Check for floating point literals.
+ if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
+ // Disambiguate a .1243foo identifier from a floating literal.
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+ if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr))
+ return LexFloatLiteral();
+ }
+
+ while (IsIdentifierChar(*CurPtr))
++CurPtr;
-
+
// Handle . as a special case.
if (CurPtr == TokStart+1 && TokStart[0] == '.')
return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
-
+
return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
}
@@ -83,7 +120,7 @@ AsmToken AsmLexer::LexSlash() {
switch (*CurPtr) {
case '*': break; // C style comment.
case '/': return ++CurPtr, LexLineComment();
- default: return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1));
+ default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1));
}
// C Style comment.
@@ -96,7 +133,7 @@ AsmToken AsmLexer::LexSlash() {
case '*':
// End of the comment?
if (CurPtr[0] != '/') break;
-
+
++CurPtr; // End the */.
return LexToken();
}
@@ -111,7 +148,7 @@ AsmToken AsmLexer::LexLineComment() {
int CurChar = getNextChar();
while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
CurChar = getNextChar();
-
+
if (CurChar == EOF)
return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
@@ -124,7 +161,6 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
CurPtr += 3;
}
-
/// LexDigit: First character is [0-9].
/// Local Label: [0-9][:]
/// Forward/Backward Label: [0-9][fb]
@@ -132,32 +168,37 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
/// Octal integer: 0[0-7]+
/// Hex integer: 0x[0-9a-fA-F]+
/// Decimal integer: [1-9][0-9]*
-/// TODO: FP literal.
AsmToken AsmLexer::LexDigit() {
// Decimal integer: [1-9][0-9]*
- if (CurPtr[-1] != '0') {
+ if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
while (isdigit(*CurPtr))
++CurPtr;
-
+
+ // Check for floating point literals.
+ if (*CurPtr == '.' || *CurPtr == 'e') {
+ ++CurPtr;
+ return LexFloatLiteral();
+ }
+
StringRef Result(TokStart, CurPtr - TokStart);
long long Value;
if (Result.getAsInteger(10, Value)) {
- // We have to handle minint_as_a_positive_value specially, because
- // - minint_as_a_positive_value = minint and it is valid.
- if (Result == "9223372036854775808")
- Value = -9223372036854775808ULL;
- else
- return ReturnError(TokStart, "Invalid decimal number");
+ // Allow positive values that are too large to fit into a signed 64-bit
+ // integer, but that do fit in an unsigned one, we just convert them over.
+ unsigned long long UValue;
+ if (Result.getAsInteger(10, UValue))
+ return ReturnError(TokStart, "invalid decimal number");
+ Value = (long long)UValue;
}
-
+
// The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
// suffixes on integer literals.
SkipIgnoredIntegerSuffix(CurPtr);
-
+
return AsmToken(AsmToken::Integer, Result, Value);
}
-
+
if (*CurPtr == 'b') {
++CurPtr;
// See if we actually have "0b" as part of something like "jmp 0b\n"
@@ -169,30 +210,30 @@ AsmToken AsmLexer::LexDigit() {
const char *NumStart = CurPtr;
while (CurPtr[0] == '0' || CurPtr[0] == '1')
++CurPtr;
-
+
// Requires at least one binary digit.
if (CurPtr == NumStart)
return ReturnError(TokStart, "Invalid binary number");
-
+
StringRef Result(TokStart, CurPtr - TokStart);
-
+
long long Value;
if (Result.substr(2).getAsInteger(2, Value))
return ReturnError(TokStart, "Invalid binary number");
-
+
// The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
// suffixes on integer literals.
SkipIgnoredIntegerSuffix(CurPtr);
-
+
return AsmToken(AsmToken::Integer, Result, Value);
}
-
+
if (*CurPtr == 'x') {
++CurPtr;
const char *NumStart = CurPtr;
while (isxdigit(CurPtr[0]))
++CurPtr;
-
+
// Requires at least one hex digit.
if (CurPtr == NumStart)
return ReturnError(CurPtr-2, "Invalid hexadecimal number");
@@ -200,31 +241,67 @@ AsmToken AsmLexer::LexDigit() {
unsigned long long Result;
if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
return ReturnError(TokStart, "Invalid hexadecimal number");
-
+
// The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
// suffixes on integer literals.
SkipIgnoredIntegerSuffix(CurPtr);
-
+
return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
(int64_t)Result);
}
-
+
// Must be an octal number, it starts with 0.
while (*CurPtr >= '0' && *CurPtr <= '7')
++CurPtr;
-
+
StringRef Result(TokStart, CurPtr - TokStart);
long long Value;
if (Result.getAsInteger(8, Value))
return ReturnError(TokStart, "Invalid octal number");
-
+
// The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
// suffixes on integer literals.
SkipIgnoredIntegerSuffix(CurPtr);
-
+
return AsmToken(AsmToken::Integer, Result, Value);
}
+/// LexSingleQuote: Integer: 'b'
+AsmToken AsmLexer::LexSingleQuote() {
+ int CurChar = getNextChar();
+
+ if (CurChar == '\\')
+ CurChar = getNextChar();
+
+ if (CurChar == EOF)
+ return ReturnError(TokStart, "unterminated single quote");
+
+ CurChar = getNextChar();
+
+ if (CurChar != '\'')
+ return ReturnError(TokStart, "single quote way too long");
+
+ // The idea here being that 'c' is basically just an integral
+ // constant.
+ StringRef Res = StringRef(TokStart,CurPtr - TokStart);
+ long long Value;
+
+ if (Res.startswith("\'\\")) {
+ char theChar = Res[2];
+ switch (theChar) {
+ default: Value = theChar; break;
+ case '\'': Value = '\''; break;
+ case 't': Value = '\t'; break;
+ case 'n': Value = '\n'; break;
+ case 'b': Value = '\b'; break;
+ }
+ } else
+ Value = TokStart[1];
+
+ return AsmToken(AsmToken::Integer, Res, Value);
+}
+
+
/// LexQuote: String: "..."
AsmToken AsmLexer::LexQuote() {
int CurChar = getNextChar();
@@ -234,13 +311,13 @@ AsmToken AsmLexer::LexQuote() {
// Allow \", etc.
CurChar = getNextChar();
}
-
+
if (CurChar == EOF)
return ReturnError(TokStart, "unterminated string constant");
CurChar = getNextChar();
}
-
+
return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
}
@@ -266,7 +343,7 @@ AsmToken AsmLexer::LexToken() {
TokStart = CurPtr;
// This always consumes at least one character.
int CurChar = getNextChar();
-
+
if (isAtStartOfComment(CurChar))
return LexLineComment();
@@ -275,7 +352,7 @@ AsmToken AsmLexer::LexToken() {
// Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
return LexIdentifier();
-
+
// Unknown character, emit an error.
return ReturnError(TokStart, "invalid character in input");
case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
@@ -301,49 +378,50 @@ AsmToken AsmLexer::LexToken() {
case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
- case '=':
+ case '=':
if (*CurPtr == '=')
return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
- case '|':
+ case '|':
if (*CurPtr == '|')
return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
- case '&':
+ case '&':
if (*CurPtr == '&')
return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
- case '!':
+ case '!':
if (*CurPtr == '=')
return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
case '/': return LexSlash();
case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+ case '\'': return LexSingleQuote();
case '"': return LexQuote();
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return LexDigit();
case '<':
switch (*CurPtr) {
- case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
+ case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
StringRef(TokStart, 2));
- case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
+ case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
StringRef(TokStart, 2));
- case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
+ case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
StringRef(TokStart, 2));
default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
}
case '>':
switch (*CurPtr) {
- case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
+ case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
StringRef(TokStart, 2));
- case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
+ case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
StringRef(TokStart, 2));
default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
}
-
+
// TODO: Quoted identifiers (objc methods etc)
// local labels: [0-9][:]
// Forward/backward labels: [0-9][fb]
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
index f83cd5e..c6d0da6 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSwitch.h"
@@ -18,7 +19,6 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/AsmCond.h"
#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
@@ -27,11 +27,12 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCDwarf.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Target/TargetAsmParser.h"
+#include <cctype>
#include <vector>
using namespace llvm;
@@ -102,6 +103,9 @@ private:
/// Boolean tracking whether macro substitution is enabled.
unsigned MacrosEnabled : 1;
+ /// Flag tracking whether any errors have been encountered.
+ unsigned HadError : 1;
+
public:
AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
const MCAsmInfo &MAI);
@@ -137,14 +141,18 @@ public:
/// }
private:
+ void CheckForValidSection();
+
bool ParseStatement();
bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M);
void HandleMacroExit();
void PrintMacroInstantiations();
- void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const;
-
+ void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type) const {
+ SrcMgr.PrintMessage(Loc, Msg, Type);
+ }
+
/// EnterIncludeFile - Enter the specified file. This returns true on failure.
bool EnterIncludeFile(const std::string &Filename);
@@ -160,22 +168,27 @@ private:
/// will be either the EndOfStatement or EOF.
StringRef ParseStringToEndOfStatement();
- bool ParseAssignment(StringRef Name);
+ bool ParseAssignment(StringRef Name, bool allow_redef);
bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
+ bool ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
/// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
/// and set \arg Res to the identifier contents.
bool ParseIdentifier(StringRef &Res);
-
+
// Directive Parsing.
- bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz"
+
+ // ".ascii", ".asciiz", ".string"
+ bool ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
+ bool ParseDirectiveRealValue(const fltSemantics &); // ".single", ...
bool ParseDirectiveFill(); // ".fill"
bool ParseDirectiveSpace(); // ".space"
- bool ParseDirectiveSet(); // ".set"
+ bool ParseDirectiveZero(); // ".zero"
+ bool ParseDirectiveSet(StringRef IDVal, bool allow_redef); // ".set", ".equ", ".equiv"
bool ParseDirectiveOrg(); // ".org"
// ".align{,32}", ".p2align{,w,l}"
bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize);
@@ -183,7 +196,6 @@ private:
/// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
/// accepts a single symbol (which should be a label or an external).
bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
- bool ParseDirectiveELFType(); // ELF specific ".type"
bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
@@ -191,6 +203,8 @@ private:
bool ParseDirectiveInclude(); // ".include"
bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
+ // ".ifdef" or ".ifndef", depending on expect_defined
+ bool ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else"
bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
@@ -198,6 +212,9 @@ private:
/// ParseEscapedString - Parse the current token as a string which may include
/// escaped characters and return the string contents.
bool ParseEscapedString(std::string &Data);
+
+ const MCExpr *ApplyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant);
};
/// \brief Generic implementations of directive handling, etc. which is shared
@@ -208,7 +225,6 @@ class GenericAsmParser : public MCAsmParserExtension {
getParser().AddDirectiveHandler(this, Directive,
HandleDirective<GenericAsmParser, Handler>);
}
-
public:
GenericAsmParser() {}
@@ -224,6 +240,29 @@ public:
AddDirectiveHandler<&GenericAsmParser::ParseDirectiveFile>(".file");
AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLine>(".line");
AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLoc>(".loc");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveStabs>(".stabs");
+
+ // CFI directives.
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIStartProc>(
+ ".cfi_startproc");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIEndProc>(
+ ".cfi_endproc");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfa>(
+ ".cfi_def_cfa");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaOffset>(
+ ".cfi_def_cfa_offset");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaRegister>(
+ ".cfi_def_cfa_register");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIOffset>(
+ ".cfi_offset");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_personality");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_lsda");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFIRememberState>(".cfi_remember_state");
+ AddDirectiveHandler<
+ &GenericAsmParser::ParseDirectiveCFIRestoreState>(".cfi_restore_state");
// Macro directives.
AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
@@ -233,15 +272,32 @@ public:
AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacro>(".macro");
AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endm");
AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endmacro");
+
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".sleb128");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".uleb128");
}
+ bool ParseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
+
bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveStabs(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIStartProc(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIDefCfa(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIDefCfaOffset(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIDefCfaRegister(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIPersonalityOrLsda(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIRememberState(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveCFIRestoreState(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc);
bool ParseDirectiveEndMacro(StringRef, SMLoc DirectiveLoc);
+
+ bool ParseDirectiveLEB128(StringRef, SMLoc);
};
}
@@ -250,6 +306,7 @@ namespace llvm {
extern MCAsmParserExtension *createDarwinAsmParser();
extern MCAsmParserExtension *createELFAsmParser();
+extern MCAsmParserExtension *createCOFFAsmParser();
}
@@ -269,7 +326,10 @@ AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
//
// FIXME: This is a hack, we need to (majorly) cleanup how these objects are
// created.
- if (_MAI.hasSubsectionsViaSymbols()) {
+ if (_MAI.hasMicrosoftFastStdCallMangling()) {
+ PlatformParser = createCOFFAsmParser();
+ PlatformParser->Initialize(*this);
+ } else if (_MAI.hasSubsectionsViaSymbols()) {
PlatformParser = createDarwinAsmParser();
PlatformParser->Initialize(*this);
} else {
@@ -299,30 +359,26 @@ void AsmParser::PrintMacroInstantiations() {
}
void AsmParser::Warning(SMLoc L, const Twine &Msg) {
- PrintMessage(L, Msg.str(), "warning");
+ PrintMessage(L, Msg, "warning");
PrintMacroInstantiations();
}
bool AsmParser::Error(SMLoc L, const Twine &Msg) {
- PrintMessage(L, Msg.str(), "error");
+ HadError = true;
+ PrintMessage(L, Msg, "error");
PrintMacroInstantiations();
return true;
}
-void AsmParser::PrintMessage(SMLoc Loc, const std::string &Msg,
- const char *Type) const {
- SrcMgr.PrintMessage(Loc, Msg, Type);
-}
-
bool AsmParser::EnterIncludeFile(const std::string &Filename) {
int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc());
if (NewBuf == -1)
return true;
-
+
CurBuffer = NewBuf;
-
+
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
-
+
return false;
}
@@ -333,7 +389,7 @@ void AsmParser::JumpToLoc(SMLoc Loc) {
const AsmToken &AsmParser::Lex() {
const AsmToken *tok = &Lexer.Lex();
-
+
if (tok->is(AsmToken::Eof)) {
// If this is the end of an included file, pop the parent file off the
// include stack.
@@ -343,35 +399,31 @@ const AsmToken &AsmParser::Lex() {
tok = &Lexer.Lex();
}
}
-
+
if (tok->is(AsmToken::Error))
Error(Lexer.getErrLoc(), Lexer.getErr());
-
+
return *tok;
}
bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// Create the initial section, if requested.
- //
- // FIXME: Target hook & command line option for initial section.
if (!NoInitialTextSection)
- Out.SwitchSection(Ctx.getMachOSection("__TEXT", "__text",
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
- 0, SectionKind::getText()));
+ Out.InitSections();
// Prime the lexer.
Lex();
-
- bool HadError = false;
-
+
+ HadError = false;
AsmCond StartingCondState = TheCondState;
// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof)) {
if (!ParseStatement()) continue;
-
- // We had an error, remember it and recover by skipping to the next line.
- HadError = true;
+
+ // We had an error, validate that one was emitted and recover by skipping to
+ // the next line.
+ assert(HadError && "Parse statement returned an error, but none emitted!");
EatToEndOfStatement();
}
@@ -383,26 +435,34 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
const std::vector<MCDwarfFile *> &MCDwarfFiles =
getContext().getMCDwarfFiles();
for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
- if (!MCDwarfFiles[i]){
+ if (!MCDwarfFiles[i])
TokError("unassigned file number: " + Twine(i) + " for .file directives");
- HadError = true;
- }
}
-
+
// Finalize the output stream if there are no errors and if the client wants
// us to.
- if (!HadError && !NoFinalize)
+ if (!HadError && !NoFinalize)
Out.Finish();
return HadError;
}
+void AsmParser::CheckForValidSection() {
+ if (!getStreamer().getCurrentSection()) {
+ TokError("expected section directive before assembly directive");
+ Out.SwitchSection(Ctx.getMachOSection(
+ "__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 0, SectionKind::getText()));
+ }
+}
+
/// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
void AsmParser::EatToEndOfStatement() {
while (Lexer.isNot(AsmToken::EndOfStatement) &&
Lexer.isNot(AsmToken::Eof))
Lex();
-
+
// Eat EOL.
if (Lexer.is(AsmToken::EndOfStatement))
Lex();
@@ -433,6 +493,20 @@ bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return false;
}
+/// ParseBracketExpr - Parse a bracket expression and return it.
+/// NOTE: This assumes the leading '[' has already been consumed.
+///
+/// bracketexpr ::= expr]
+///
+bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+ if (ParseExpression(Res)) return true;
+ if (Lexer.isNot(AsmToken::RBrac))
+ return TokError("expected ']' in brackets expression");
+ EndLoc = Lexer.getLoc();
+ Lex();
+ return false;
+}
+
/// ParsePrimaryExpr - Parse a primary expression and return it.
/// primaryexpr ::= (parenexpr
/// primaryexpr ::= symbol
@@ -462,19 +536,21 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
std::pair<StringRef, StringRef> Split = Identifier.split('@');
MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
- // Mark the symbol as used in an expression.
- Sym->setUsedInExpr(true);
-
// Lookup the symbol variant if used.
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- if (Split.first.size() != Identifier.size())
+ if (Split.first.size() != Identifier.size()) {
Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
+ if (Variant == MCSymbolRefExpr::VK_Invalid) {
+ Variant = MCSymbolRefExpr::VK_None;
+ return TokError("invalid variant '" + Split.second + "'");
+ }
+ }
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
if (Variant)
- return Error(EndLoc, "unexpected modified on variable reference");
+ return Error(EndLoc, "unexpected modifier on variable reference");
Res = Sym->getVariableValue();
return false;
@@ -506,6 +582,13 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
}
return false;
}
+ case AsmToken::Real: {
+ APFloat RealVal(APFloat::IEEEdouble, getTok().getString());
+ uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+ Res = MCConstantExpr::Create(IntVal, getContext());
+ Lex(); // Eat token.
+ return false;
+ }
case AsmToken::Dot: {
// This is a '.' reference, which references the current PC. Emit a
// temporary label to the streamer and refer to it.
@@ -516,10 +599,12 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
Lex(); // Eat identifier.
return false;
}
-
case AsmToken::LParen:
Lex(); // Eat the '('.
return ParseParenExpr(Res, EndLoc);
+ case AsmToken::LBrac:
+ Lex(); // Eat the '['.
+ return ParseBracketExpr(Res, EndLoc);
case AsmToken::Minus:
Lex(); // Eat the operator.
if (ParsePrimaryExpr(Res, EndLoc))
@@ -546,8 +631,57 @@ bool AsmParser::ParseExpression(const MCExpr *&Res) {
return ParseExpression(Res, EndLoc);
}
+const MCExpr *
+AsmParser::ApplyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant) {
+ // Recurse over the given expression, rebuilding it to apply the given variant
+ // if there is exactly one symbol.
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return 0;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+ if (SRE->getKind() != MCSymbolRefExpr::VK_None) {
+ TokError("invalid variant on expression '" +
+ getTok().getIdentifier() + "' (already modified)");
+ return E;
+ }
+
+ return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+ const MCExpr *Sub = ApplyModifierToExpr(UE->getSubExpr(), Variant);
+ if (!Sub)
+ return 0;
+ return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext());
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ const MCExpr *LHS = ApplyModifierToExpr(BE->getLHS(), Variant);
+ const MCExpr *RHS = ApplyModifierToExpr(BE->getRHS(), Variant);
+
+ if (!LHS && !RHS)
+ return 0;
+
+ if (!LHS) LHS = BE->getLHS();
+ if (!RHS) RHS = BE->getRHS();
+
+ return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+ }
+ }
+
+ assert(0 && "Invalid expression kind!");
+ return 0;
+}
+
/// ParseExpression - Parse an expression and return it.
-///
+///
/// expr ::= expr +,- expr -> lowest.
/// expr ::= expr |,^,&,! expr -> middle.
/// expr ::= expr *,/,%,<<,>> expr -> highest.
@@ -559,6 +693,31 @@ bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
return true;
+ // As a special case, we support 'a op b @ modifier' by rewriting the
+ // expression to include the modifier. This is inefficient, but in general we
+ // expect users to use 'a@modifier op b'.
+ if (Lexer.getKind() == AsmToken::At) {
+ Lex();
+
+ if (Lexer.isNot(AsmToken::Identifier))
+ return TokError("unexpected symbol modifier following '@'");
+
+ MCSymbolRefExpr::VariantKind Variant =
+ MCSymbolRefExpr::getVariantKindForName(getTok().getIdentifier());
+ if (Variant == MCSymbolRefExpr::VK_Invalid)
+ return TokError("invalid variant '" + getTok().getIdentifier() + "'");
+
+ const MCExpr *ModifiedRes = ApplyModifierToExpr(Res, Variant);
+ if (!ModifiedRes) {
+ return TokError("invalid modifier '" + getTok().getIdentifier() +
+ "' (no symbols present)");
+ return true;
+ }
+
+ Res = ModifiedRes;
+ Lex();
+ }
+
// Try to constant fold it up front, if possible.
int64_t Value;
if (Res->EvaluateAsAbsolute(Value))
@@ -575,7 +734,7 @@ bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
const MCExpr *Expr;
-
+
SMLoc StartLoc = Lexer.getLoc();
if (ParseExpression(Expr))
return true;
@@ -586,13 +745,13 @@ bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
return false;
}
-static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
+static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
MCBinaryExpr::Opcode &Kind) {
switch (K) {
default:
return 0; // not a binop.
- // Lowest Precedence: &&, ||
+ // Lowest Precedence: &&, ||, @
case AsmToken::AmpAmp:
Kind = MCBinaryExpr::LAnd;
return 1;
@@ -600,62 +759,65 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
Kind = MCBinaryExpr::LOr;
return 1;
- // Low Precedence: +, -, ==, !=, <>, <, <=, >, >=
- case AsmToken::Plus:
- Kind = MCBinaryExpr::Add;
+
+ // Low Precedence: |, &, ^
+ //
+ // FIXME: gas seems to support '!' as an infix operator?
+ case AsmToken::Pipe:
+ Kind = MCBinaryExpr::Or;
return 2;
- case AsmToken::Minus:
- Kind = MCBinaryExpr::Sub;
+ case AsmToken::Caret:
+ Kind = MCBinaryExpr::Xor;
+ return 2;
+ case AsmToken::Amp:
+ Kind = MCBinaryExpr::And;
return 2;
+
+ // Low Intermediate Precedence: ==, !=, <>, <, <=, >, >=
case AsmToken::EqualEqual:
Kind = MCBinaryExpr::EQ;
- return 2;
+ return 3;
case AsmToken::ExclaimEqual:
case AsmToken::LessGreater:
Kind = MCBinaryExpr::NE;
- return 2;
+ return 3;
case AsmToken::Less:
Kind = MCBinaryExpr::LT;
- return 2;
+ return 3;
case AsmToken::LessEqual:
Kind = MCBinaryExpr::LTE;
- return 2;
+ return 3;
case AsmToken::Greater:
Kind = MCBinaryExpr::GT;
- return 2;
+ return 3;
case AsmToken::GreaterEqual:
Kind = MCBinaryExpr::GTE;
- return 2;
-
- // Intermediate Precedence: |, &, ^
- //
- // FIXME: gas seems to support '!' as an infix operator?
- case AsmToken::Pipe:
- Kind = MCBinaryExpr::Or;
- return 3;
- case AsmToken::Caret:
- Kind = MCBinaryExpr::Xor;
- return 3;
- case AsmToken::Amp:
- Kind = MCBinaryExpr::And;
return 3;
+ // High Intermediate Precedence: +, -
+ case AsmToken::Plus:
+ Kind = MCBinaryExpr::Add;
+ return 4;
+ case AsmToken::Minus:
+ Kind = MCBinaryExpr::Sub;
+ return 4;
+
// Highest Precedence: *, /, %, <<, >>
case AsmToken::Star:
Kind = MCBinaryExpr::Mul;
- return 4;
+ return 5;
case AsmToken::Slash:
Kind = MCBinaryExpr::Div;
- return 4;
+ return 5;
case AsmToken::Percent:
Kind = MCBinaryExpr::Mod;
- return 4;
+ return 5;
case AsmToken::LessLess:
Kind = MCBinaryExpr::Shl;
- return 4;
+ return 5;
case AsmToken::GreaterGreater:
Kind = MCBinaryExpr::Shr;
- return 4;
+ return 5;
}
}
@@ -667,18 +829,18 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
while (1) {
MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
-
+
// If the next token is lower precedence than we are allowed to eat, return
// successfully with what we ate already.
if (TokPrec < Precedence)
return false;
-
+
Lex();
-
+
// Eat the next primary expression.
const MCExpr *RHS;
if (ParsePrimaryExpr(RHS, EndLoc)) return true;
-
+
// If BinOp binds less tightly with RHS than the operator after RHS, let
// the pending operator take RHS as its LHS.
MCBinaryExpr::Opcode Dummy;
@@ -692,9 +854,9 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
}
}
-
-
-
+
+
+
/// ParseStatement:
/// ::= EndOfStatement
/// ::= Label* Directive ...Operands... EndOfStatement
@@ -706,12 +868,17 @@ bool AsmParser::ParseStatement() {
return false;
}
- // Statements always start with an identifier.
+ // Statements always start with an identifier or are a full line comment.
AsmToken ID = getTok();
SMLoc IDLoc = ID.getLoc();
StringRef IDVal;
int64_t LocalLabelVal = -1;
- // GUESS allow an integer followed by a ':' as a directional local label
+ // A full line comment is a '#' as the first token.
+ if (Lexer.is(AsmToken::Hash)) {
+ EatToEndOfStatement();
+ return false;
+ }
+ // Allow an integer followed by a ':' as a directional local label.
if (Lexer.is(AsmToken::Integer)) {
LocalLabelVal = getTok().getIntVal();
if (LocalLabelVal < 0) {
@@ -739,24 +906,30 @@ bool AsmParser::ParseStatement() {
// example.
if (IDVal == ".if")
return ParseDirectiveIf(IDLoc);
+ if (IDVal == ".ifdef")
+ return ParseDirectiveIfdef(IDLoc, true);
+ if (IDVal == ".ifndef" || IDVal == ".ifnotdef")
+ return ParseDirectiveIfdef(IDLoc, false);
if (IDVal == ".elseif")
return ParseDirectiveElseIf(IDLoc);
if (IDVal == ".else")
return ParseDirectiveElse(IDLoc);
if (IDVal == ".endif")
return ParseDirectiveEndIf(IDLoc);
-
+
// If we are in a ".if 0" block, ignore this statement.
if (TheCondState.Ignore) {
EatToEndOfStatement();
return false;
}
-
+
// FIXME: Recurse on local labels?
// See what kind of statement we have.
switch (Lexer.getKind()) {
case AsmToken::Colon: {
+ CheckForValidSection();
+
// identifier ':' -> Label.
Lex();
@@ -772,10 +945,10 @@ bool AsmParser::ParseStatement() {
Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
if (!Sym->isUndefined() || Sym->isVariable())
return Error(IDLoc, "invalid symbol redefinition");
-
+
// Emit the label.
Out.EmitLabel(Sym);
-
+
// Consume any end of statement token, if present, to avoid spurious
// AddBlankLine calls().
if (Lexer.is(AsmToken::EndOfStatement)) {
@@ -791,7 +964,7 @@ bool AsmParser::ParseStatement() {
// identifier '=' ... -> assignment statement
Lex();
- return ParseAssignment(IDVal);
+ return ParseAssignment(IDVal, true);
default: // Normal instruction or directive.
break;
@@ -802,27 +975,43 @@ bool AsmParser::ParseStatement() {
if (const Macro *M = MacroMap.lookup(IDVal))
return HandleMacroEntry(IDVal, IDLoc, M);
- // Otherwise, we have a normal instruction or directive.
+ // Otherwise, we have a normal instruction or directive.
if (IDVal[0] == '.') {
// Assembler features
- if (IDVal == ".set")
- return ParseDirectiveSet();
+ if (IDVal == ".set" || IDVal == ".equ")
+ return ParseDirectiveSet(IDVal, true);
+ if (IDVal == ".equiv")
+ return ParseDirectiveSet(IDVal, false);
// Data directives
if (IDVal == ".ascii")
- return ParseDirectiveAscii(false);
- if (IDVal == ".asciz")
- return ParseDirectiveAscii(true);
+ return ParseDirectiveAscii(IDVal, false);
+ if (IDVal == ".asciz" || IDVal == ".string")
+ return ParseDirectiveAscii(IDVal, true);
if (IDVal == ".byte")
return ParseDirectiveValue(1);
if (IDVal == ".short")
return ParseDirectiveValue(2);
+ if (IDVal == ".value")
+ return ParseDirectiveValue(2);
+ if (IDVal == ".2byte")
+ return ParseDirectiveValue(2);
if (IDVal == ".long")
return ParseDirectiveValue(4);
+ if (IDVal == ".int")
+ return ParseDirectiveValue(4);
+ if (IDVal == ".4byte")
+ return ParseDirectiveValue(4);
if (IDVal == ".quad")
return ParseDirectiveValue(8);
+ if (IDVal == ".8byte")
+ return ParseDirectiveValue(8);
+ if (IDVal == ".single" || IDVal == ".float")
+ return ParseDirectiveRealValue(APFloat::IEEEsingle);
+ if (IDVal == ".double")
+ return ParseDirectiveRealValue(APFloat::IEEEdouble);
if (IDVal == ".align") {
bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
@@ -852,11 +1041,16 @@ bool AsmParser::ParseStatement() {
return ParseDirectiveFill();
if (IDVal == ".space")
return ParseDirectiveSpace();
+ if (IDVal == ".zero")
+ return ParseDirectiveZero();
// Symbol attribute directives
if (IDVal == ".globl" || IDVal == ".global")
return ParseDirectiveSymbolAttribute(MCSA_Global);
+ // ELF only? Should it be here?
+ if (IDVal == ".local")
+ return ParseDirectiveSymbolAttribute(MCSA_Local);
if (IDVal == ".hidden")
return ParseDirectiveSymbolAttribute(MCSA_Hidden);
if (IDVal == ".indirect_symbol")
@@ -867,14 +1061,14 @@ bool AsmParser::ParseStatement() {
return ParseDirectiveSymbolAttribute(MCSA_LazyReference);
if (IDVal == ".no_dead_strip")
return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
+ if (IDVal == ".symbol_resolver")
+ return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver);
if (IDVal == ".private_extern")
return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern);
if (IDVal == ".protected")
return ParseDirectiveSymbolAttribute(MCSA_Protected);
if (IDVal == ".reference")
return ParseDirectiveSymbolAttribute(MCSA_Reference);
- if (IDVal == ".type")
- return ParseDirectiveELFType();
if (IDVal == ".weak")
return ParseDirectiveSymbolAttribute(MCSA_Weak);
if (IDVal == ".weak_definition")
@@ -894,6 +1088,9 @@ bool AsmParser::ParseStatement() {
if (IDVal == ".include")
return ParseDirectiveInclude();
+ if (IDVal == ".code16" || IDVal == ".code32" || IDVal == ".code64")
+ return TokError(Twine(IDVal) + " not supported yet");
+
// Look up the handler in the handler table.
std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
DirectiveMap.lookup(IDVal);
@@ -909,16 +1106,16 @@ bool AsmParser::ParseStatement() {
return false;
}
+ CheckForValidSection();
+
// Canonicalize the opcode to lower case.
SmallString<128> Opcode;
for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
Opcode.push_back(tolower(IDVal[i]));
-
+
SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc,
ParsedOperands);
- if (!HadError && Lexer.isNot(AsmToken::EndOfStatement))
- HadError = TokError("unexpected token in argument list");
// Dump the parsed representation, if requested.
if (getShowParsedOperands()) {
@@ -936,25 +1133,17 @@ bool AsmParser::ParseStatement() {
}
// If parsing succeeded, match the instruction.
- if (!HadError) {
- MCInst Inst;
- if (!getTargetParser().MatchInstruction(IDLoc, ParsedOperands, Inst)) {
- // Emit the instruction on success.
- Out.EmitInstruction(Inst);
- } else
- HadError = true;
- }
-
- // If there was no error, consume the end-of-statement token. Otherwise this
- // will be done by our caller.
if (!HadError)
- Lex();
+ HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, ParsedOperands,
+ Out);
// Free any parsed operands.
for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
delete ParsedOperands[i];
- return HadError;
+ // Don't skip the rest of the line, the instruction parser is responsible for
+ // that.
+ return false;
}
MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
@@ -1083,14 +1272,35 @@ void AsmParser::HandleMacroExit() {
ActiveMacros.pop_back();
}
-bool AsmParser::ParseAssignment(StringRef Name) {
+static void MarkUsed(const MCExpr *Value) {
+ switch (Value->getKind()) {
+ case MCExpr::Binary:
+ MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getLHS());
+ MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getRHS());
+ break;
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ break;
+ case MCExpr::SymbolRef: {
+ static_cast<const MCSymbolRefExpr*>(Value)->getSymbol().setUsed(true);
+ break;
+ }
+ case MCExpr::Unary:
+ MarkUsed(static_cast<const MCUnaryExpr*>(Value)->getSubExpr());
+ break;
+ }
+}
+
+bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
// FIXME: Use better location, we should use proper tokens.
SMLoc EqualLoc = Lexer.getLoc();
const MCExpr *Value;
if (ParseExpression(Value))
return true;
-
+
+ MarkUsed(Value);
+
if (Lexer.isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in assignment");
@@ -1105,22 +1315,23 @@ bool AsmParser::ParseAssignment(StringRef Name) {
//
// FIXME: Diagnostics. Note the location of the definition as a label.
// FIXME: Diagnose assignment to protected identifier (e.g., register name).
- if (Sym->isUndefined() && !Sym->isUsedInExpr())
+ if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
; // Allow redefinitions of undefined symbols only used in directives.
- else if (!Sym->isUndefined() && !Sym->isAbsolute())
+ else if (!Sym->isUndefined() && (!Sym->isAbsolute() || !allow_redef))
return Error(EqualLoc, "redefinition of '" + Name + "'");
else if (!Sym->isVariable())
return Error(EqualLoc, "invalid assignment to '" + Name + "'");
else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
Name + "'");
+
+ // Don't count these checks as uses.
+ Sym->setUsed(false);
} else
Sym = getContext().GetOrCreateSymbol(Name);
// FIXME: Handle '.'.
- Sym->setUsedInExpr(true);
-
// Do the assignment.
Out.EmitAssignment(Sym, Value);
@@ -1167,18 +1378,20 @@ bool AsmParser::ParseIdentifier(StringRef &Res) {
}
/// ParseDirectiveSet:
+/// ::= .equ identifier ',' expression
+/// ::= .equiv identifier ',' expression
/// ::= .set identifier ',' expression
-bool AsmParser::ParseDirectiveSet() {
+bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) {
StringRef Name;
if (ParseIdentifier(Name))
- return TokError("expected identifier after '.set' directive");
-
+ return TokError("expected identifier after '" + Twine(IDVal) + "'");
+
if (getLexer().isNot(AsmToken::Comma))
- return TokError("unexpected token in '.set'");
+ return TokError("unexpected token in '" + Twine(IDVal) + "'");
Lex();
- return ParseAssignment(Name);
+ return ParseAssignment(Name, allow_redef);
}
bool AsmParser::ParseEscapedString(std::string &Data) {
@@ -1240,12 +1453,14 @@ bool AsmParser::ParseEscapedString(std::string &Data) {
}
/// ParseDirectiveAscii:
-/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
-bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
+/// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
+bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ CheckForValidSection();
+
for (;;) {
if (getLexer().isNot(AsmToken::String))
- return TokError("expected string in '.ascii' or '.asciz' directive");
+ return TokError("expected string in '" + Twine(IDVal) + "' directive");
std::string Data;
if (ParseEscapedString(Data))
@@ -1261,7 +1476,7 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
break;
if (getLexer().isNot(AsmToken::Comma))
- return TokError("unexpected token in '.ascii' or '.asciz' directive");
+ return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
Lex();
}
}
@@ -1274,9 +1489,10 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
/// ::= (.byte | .short | ... ) [ expression (, expression)* ]
bool AsmParser::ParseDirectiveValue(unsigned Size) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ CheckForValidSection();
+
for (;;) {
const MCExpr *Value;
- SMLoc ATTRIBUTE_UNUSED StartLoc = getLexer().getLoc();
if (ParseExpression(Value))
return true;
@@ -1288,7 +1504,7 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) {
if (getLexer().is(AsmToken::EndOfStatement))
break;
-
+
// FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
@@ -1300,9 +1516,61 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) {
return false;
}
+/// ParseDirectiveRealValue
+/// ::= (.single | .double) [ expression (, expression)* ]
+bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ CheckForValidSection();
+
+ for (;;) {
+ // We don't truly support arithmetic on floating point expressions, so we
+ // have to manually parse unary prefixes.
+ bool IsNeg = false;
+ if (getLexer().is(AsmToken::Minus)) {
+ Lex();
+ IsNeg = true;
+ } else if (getLexer().is(AsmToken::Plus))
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Integer) &&
+ getLexer().isNot(AsmToken::Real))
+ return TokError("unexpected token in directive");
+
+ // Convert to an APFloat.
+ APFloat Value(Semantics);
+ if (Value.convertFromString(getTok().getString(),
+ APFloat::rmNearestTiesToEven) ==
+ APFloat::opInvalidOp)
+ return TokError("invalid floating point literal");
+ if (IsNeg)
+ Value.changeSign();
+
+ // Consume the numeric token.
+ Lex();
+
+ // Emit the value as an integer.
+ APInt AsInt = Value.bitcastToAPInt();
+ getStreamer().EmitIntValue(AsInt.getLimitedValue(),
+ AsInt.getBitWidth() / 8, DEFAULT_ADDRSPACE);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
/// ParseDirectiveSpace
/// ::= .space expression [ , expression ]
bool AsmParser::ParseDirectiveSpace() {
+ CheckForValidSection();
+
int64_t NumBytes;
if (ParseAbsoluteExpression(NumBytes))
return true;
@@ -1312,7 +1580,7 @@ bool AsmParser::ParseDirectiveSpace() {
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.space' directive");
Lex();
-
+
if (ParseAbsoluteExpression(FillExpr))
return true;
@@ -1331,9 +1599,37 @@ bool AsmParser::ParseDirectiveSpace() {
return false;
}
+/// ParseDirectiveZero
+/// ::= .zero expression
+bool AsmParser::ParseDirectiveZero() {
+ CheckForValidSection();
+
+ int64_t NumBytes;
+ if (ParseAbsoluteExpression(NumBytes))
+ return true;
+
+ int64_t Val = 0;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ if (ParseAbsoluteExpression(Val))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.zero' directive");
+
+ Lex();
+
+ getStreamer().EmitFill(NumBytes, Val, DEFAULT_ADDRSPACE);
+
+ return false;
+}
+
/// ParseDirectiveFill
/// ::= .fill expression , expression , expression
bool AsmParser::ParseDirectiveFill() {
+ CheckForValidSection();
+
int64_t NumValues;
if (ParseAbsoluteExpression(NumValues))
return true;
@@ -1341,7 +1637,7 @@ bool AsmParser::ParseDirectiveFill() {
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.fill' directive");
Lex();
-
+
int64_t FillSize;
if (ParseAbsoluteExpression(FillSize))
return true;
@@ -1349,14 +1645,14 @@ bool AsmParser::ParseDirectiveFill() {
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.fill' directive");
Lex();
-
+
int64_t FillExpr;
if (ParseAbsoluteExpression(FillExpr))
return true;
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.fill' directive");
-
+
Lex();
if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8)
@@ -1371,6 +1667,8 @@ bool AsmParser::ParseDirectiveFill() {
/// ParseDirectiveOrg
/// ::= .org expression [ , expression ]
bool AsmParser::ParseDirectiveOrg() {
+ CheckForValidSection();
+
const MCExpr *Offset;
if (ParseExpression(Offset))
return true;
@@ -1381,7 +1679,7 @@ bool AsmParser::ParseDirectiveOrg() {
if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.org' directive");
Lex();
-
+
if (ParseAbsoluteExpression(FillExpr))
return true;
@@ -1401,6 +1699,8 @@ bool AsmParser::ParseDirectiveOrg() {
/// ParseDirectiveAlign
/// ::= {.align, ...} expression [ , expression [ , expression ]]
bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
+ CheckForValidSection();
+
SMLoc AlignmentLoc = getLexer().getLoc();
int64_t Alignment;
if (ParseAbsoluteExpression(Alignment))
@@ -1432,7 +1732,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
MaxBytesLoc = getLexer().getLoc();
if (ParseAbsoluteExpression(MaxBytesToFill))
return true;
-
+
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in directive");
}
@@ -1471,12 +1771,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
// Check whether we should use optimal code alignment for this .align
// directive.
- //
- // FIXME: This should be using a target hook.
- bool UseCodeAlign = false;
- if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>(
- getStreamer().getCurrentSection()))
- UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+ bool UseCodeAlign = getStreamer().getCurrentSection()->UseCodeAlign();
if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
ValueSize == 1 && UseCodeAlign) {
getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
@@ -1498,7 +1793,7 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
if (ParseIdentifier(Name))
return TokError("expected identifier in directive");
-
+
MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
getStreamer().EmitSymbolAttribute(Sym, Attr);
@@ -1513,63 +1808,19 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
}
Lex();
- return false;
-}
-
-/// ParseDirectiveELFType
-/// ::= .type identifier , @attribute
-bool AsmParser::ParseDirectiveELFType() {
- StringRef Name;
- if (ParseIdentifier(Name))
- return TokError("expected identifier in directive");
-
- // Handle the identifier as the key symbol.
- MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
-
- if (getLexer().isNot(AsmToken::Comma))
- return TokError("unexpected token in '.type' directive");
- Lex();
-
- if (getLexer().isNot(AsmToken::At))
- return TokError("expected '@' before type");
- Lex();
-
- StringRef Type;
- SMLoc TypeLoc;
-
- TypeLoc = getLexer().getLoc();
- if (ParseIdentifier(Type))
- return TokError("expected symbol type in directive");
-
- MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
- .Case("function", MCSA_ELF_TypeFunction)
- .Case("object", MCSA_ELF_TypeObject)
- .Case("tls_object", MCSA_ELF_TypeTLS)
- .Case("common", MCSA_ELF_TypeCommon)
- .Case("notype", MCSA_ELF_TypeNoType)
- .Default(MCSA_Invalid);
-
- if (Attr == MCSA_Invalid)
- return Error(TypeLoc, "unsupported attribute in '.type' directive");
-
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '.type' directive");
-
- Lex();
-
- getStreamer().EmitSymbolAttribute(Sym, Attr);
-
return false;
}
/// ParseDirectiveComm
/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
bool AsmParser::ParseDirectiveComm(bool IsLocal) {
+ CheckForValidSection();
+
SMLoc IDLoc = getLexer().getLoc();
StringRef Name;
if (ParseIdentifier(Name))
return TokError("expected identifier in directive");
-
+
// Handle the identifier as the key symbol.
MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
@@ -1589,7 +1840,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
Pow2AlignmentLoc = getLexer().getLoc();
if (ParseAbsoluteExpression(Pow2Alignment))
return true;
-
+
// If this target takes alignments in bytes (not log) validate and convert.
if (Lexer.getMAI().getAlignmentIsInBytes()) {
if (!isPowerOf2_64(Pow2Alignment))
@@ -1597,10 +1848,10 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
Pow2Alignment = Log2_64(Pow2Alignment);
}
}
-
+
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.comm' or '.lcomm' directive");
-
+
Lex();
// NOTE: a size of zero for a .comm should create a undefined symbol
@@ -1659,17 +1910,17 @@ bool AsmParser::ParseDirectiveAbort() {
bool AsmParser::ParseDirectiveInclude() {
if (getLexer().isNot(AsmToken::String))
return TokError("expected string in '.include' directive");
-
+
std::string Filename = getTok().getString();
SMLoc IncludeLoc = getLexer().getLoc();
Lex();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.include' directive");
-
+
// Strip the quotes.
Filename = Filename.substr(1, Filename.size()-2);
-
+
// Attempt to switch the lexer to the included file before consuming the end
// of statement to avoid losing it when we switch.
if (EnterIncludeFile(Filename)) {
@@ -1695,7 +1946,7 @@ bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.if' directive");
-
+
Lex();
TheCondState.CondMet = ExprValue;
@@ -1705,6 +1956,31 @@ bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
return false;
}
+bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
+ StringRef Name;
+ TheCondStack.push_back(TheCondState);
+ TheCondState.TheCond = AsmCond::IfCond;
+
+ if (TheCondState.Ignore) {
+ EatToEndOfStatement();
+ } else {
+ if (ParseIdentifier(Name))
+ return TokError("expected identifier after '.ifdef'");
+
+ Lex();
+
+ MCSymbol *Sym = getContext().LookupSymbol(Name);
+
+ if (expect_defined)
+ TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined());
+ else
+ TheCondState.CondMet = (Sym == NULL || Sym->isUndefined());
+ TheCondState.Ignore = !TheCondState.CondMet;
+ }
+
+ return false;
+}
+
/// ParseDirectiveElseIf
/// ::= .elseif expression
bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
@@ -1728,7 +2004,7 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.elseif' directive");
-
+
Lex();
TheCondState.CondMet = ExprValue;
TheCondState.Ignore = !TheCondState.CondMet;
@@ -1742,7 +2018,7 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.else' directive");
-
+
Lex();
if (TheCondState.TheCond != AsmCond::IfCond &&
@@ -1766,7 +2042,7 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.endif' directive");
-
+
Lex();
if ((TheCondState.TheCond == AsmCond::NoCond) ||
@@ -1808,9 +2084,8 @@ bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
if (FileNumber == -1)
getStreamer().EmitFileDirective(Filename);
else {
- if (getContext().GetDwarfFile(Filename, FileNumber) == 0)
- Error(FileNumberLoc, "file number already allocated");
- getStreamer().EmitDwarfFileDirective(FileNumber, Filename);
+ if (getStreamer().EmitDwarfFileDirective(FileNumber, Filename))
+ Error(FileNumberLoc, "file number already allocated");
}
return false;
@@ -1851,7 +2126,7 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
int64_t FileNumber = getTok().getIntVal();
if (FileNumber < 1)
return TokError("file number less than one in '.loc' directive");
- if (!getContext().ValidateDwarfFileNumber(FileNumber))
+ if (!getContext().isValidDwarfFileNumber(FileNumber))
return TokError("unassigned file number in '.loc' directive");
Lex();
@@ -1871,8 +2146,9 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
Lex();
}
- unsigned Flags = 0;
+ unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
unsigned Isa = 0;
+ int64_t Discriminator = 0;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
if (getLexer().is(AsmToken::EndOfStatement))
@@ -1903,7 +2179,7 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
Flags |= DWARF2_FLAG_IS_STMT;
else
return Error(Loc, "is_stmt value not 0 or 1");
- }
+ }
else {
return Error(Loc, "is_stmt value not the constant value of 0 or 1");
}
@@ -1919,11 +2195,15 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
if (Value < 0)
return Error(Loc, "isa number less than zero");
Isa = Value;
- }
+ }
else {
return Error(Loc, "isa number not a constant value");
}
}
+ else if (Name == "discriminator") {
+ if (getParser().ParseAbsoluteExpression(Discriminator))
+ return true;
+ }
else {
return Error(Loc, "unknown sub-directive in '.loc' directive");
}
@@ -1933,11 +2213,176 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
}
}
- getContext().setCurrentDwarfLoc(FileNumber, LineNumber, ColumnPos, Flags,Isa);
+ getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
+ Isa, Discriminator);
return false;
}
+/// ParseDirectiveStabs
+/// ::= .stabs string, number, number, number
+bool GenericAsmParser::ParseDirectiveStabs(StringRef Directive,
+ SMLoc DirectiveLoc) {
+ return TokError("unsupported directive '" + Directive + "'");
+}
+
+/// ParseDirectiveCFIStartProc
+/// ::= .cfi_startproc
+bool GenericAsmParser::ParseDirectiveCFIStartProc(StringRef,
+ SMLoc DirectiveLoc) {
+ return getStreamer().EmitCFIStartProc();
+}
+
+/// ParseDirectiveCFIEndProc
+/// ::= .cfi_endproc
+bool GenericAsmParser::ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc) {
+ return getStreamer().EmitCFIEndProc();
+}
+
+/// ParseRegisterOrRegisterNumber - parse register name or number.
+bool GenericAsmParser::ParseRegisterOrRegisterNumber(int64_t &Register,
+ SMLoc DirectiveLoc) {
+ unsigned RegNo;
+
+ if (getLexer().is(AsmToken::Percent)) {
+ if (getParser().getTargetParser().ParseRegister(RegNo, DirectiveLoc,
+ DirectiveLoc))
+ return true;
+ Register = getContext().getTargetAsmInfo().getDwarfRegNum(RegNo, true);
+ } else
+ return getParser().ParseAbsoluteExpression(Register);
+
+ return false;
+}
+
+/// ParseDirectiveCFIDefCfa
+/// ::= .cfi_def_cfa register, offset
+bool GenericAsmParser::ParseDirectiveCFIDefCfa(StringRef,
+ SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Offset = 0;
+ if (getParser().ParseAbsoluteExpression(Offset))
+ return true;
+
+ return getStreamer().EmitCFIDefCfa(Register, Offset);
+}
+
+/// ParseDirectiveCFIDefCfaOffset
+/// ::= .cfi_def_cfa_offset offset
+bool GenericAsmParser::ParseDirectiveCFIDefCfaOffset(StringRef,
+ SMLoc DirectiveLoc) {
+ int64_t Offset = 0;
+ if (getParser().ParseAbsoluteExpression(Offset))
+ return true;
+
+ return getStreamer().EmitCFIDefCfaOffset(Offset);
+}
+
+/// ParseDirectiveCFIDefCfaRegister
+/// ::= .cfi_def_cfa_register register
+bool GenericAsmParser::ParseDirectiveCFIDefCfaRegister(StringRef,
+ SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ return getStreamer().EmitCFIDefCfaRegister(Register);
+}
+
+/// ParseDirectiveCFIOffset
+/// ::= .cfi_off register, offset
+bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) {
+ int64_t Register = 0;
+ int64_t Offset = 0;
+
+ if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ if (getParser().ParseAbsoluteExpression(Offset))
+ return true;
+
+ return getStreamer().EmitCFIOffset(Register, Offset);
+}
+
+static bool isValidEncoding(int64_t Encoding) {
+ if (Encoding & ~0xff)
+ return false;
+
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return true;
+
+ const unsigned Format = Encoding & 0xf;
+ if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
+ Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
+ Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
+ Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
+ return false;
+
+ const unsigned Application = Encoding & 0x70;
+ if (Application != dwarf::DW_EH_PE_absptr &&
+ Application != dwarf::DW_EH_PE_pcrel)
+ return false;
+
+ return true;
+}
+
+/// ParseDirectiveCFIPersonalityOrLsda
+/// ::= .cfi_personality encoding, [symbol_name]
+/// ::= .cfi_lsda encoding, [symbol_name]
+bool GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda(StringRef IDVal,
+ SMLoc DirectiveLoc) {
+ int64_t Encoding = 0;
+ if (getParser().ParseAbsoluteExpression(Encoding))
+ return true;
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return false;
+
+ if (!isValidEncoding(Encoding))
+ return TokError("unsupported encoding.");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (IDVal == ".cfi_personality")
+ return getStreamer().EmitCFIPersonality(Sym, Encoding);
+ else {
+ assert(IDVal == ".cfi_lsda");
+ return getStreamer().EmitCFILsda(Sym, Encoding);
+ }
+}
+
+/// ParseDirectiveCFIRememberState
+/// ::= .cfi_remember_state
+bool GenericAsmParser::ParseDirectiveCFIRememberState(StringRef IDVal,
+ SMLoc DirectiveLoc) {
+ return getStreamer().EmitCFIRememberState();
+}
+
+/// ParseDirectiveCFIRestoreState
+/// ::= .cfi_remember_state
+bool GenericAsmParser::ParseDirectiveCFIRestoreState(StringRef IDVal,
+ SMLoc DirectiveLoc) {
+ return getStreamer().EmitCFIRestoreState();
+}
+
/// ParseDirectiveMacrosOnOff
/// ::= .macros_on
/// ::= .macros_off
@@ -2022,6 +2467,26 @@ bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive,
"no current macro definition");
}
+bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) {
+ getParser().CheckForValidSection();
+
+ const MCExpr *Value;
+
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ if (DirName[1] == 's')
+ getStreamer().EmitSLEB128Value(Value);
+ else
+ getStreamer().EmitULEB128Value(Value);
+
+ return false;
+}
+
+
/// \brief Create an MCAsmParser instance.
MCAsmParser *llvm::createMCAsmParser(const Target &T, SourceMgr &SM,
MCContext &C, MCStreamer &Out,
diff --git a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
new file mode 100644
index 0000000..5ecab03
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -0,0 +1,144 @@
+//===- COFFAsmParser.cpp - COFF Assembly Parser ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/COFF.h"
+using namespace llvm;
+
+namespace {
+
+class COFFAsmParser : public MCAsmParserExtension {
+ template<bool (COFFAsmParser::*Handler)(StringRef, SMLoc)>
+ void AddDirectiveHandler(StringRef Directive) {
+ getParser().AddDirectiveHandler(this, Directive,
+ HandleDirective<COFFAsmParser, Handler>);
+ }
+
+ bool ParseSectionSwitch(StringRef Section,
+ unsigned Characteristics,
+ SectionKind Kind);
+
+ virtual void Initialize(MCAsmParser &Parser) {
+ // Call the base implementation.
+ MCAsmParserExtension::Initialize(Parser);
+
+ AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveText>(".text");
+ AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveData>(".data");
+ AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveBSS>(".bss");
+ AddDirectiveHandler<&COFFAsmParser::ParseDirectiveDef>(".def");
+ AddDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl");
+ AddDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type");
+ AddDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef");
+ }
+
+ bool ParseSectionDirectiveText(StringRef, SMLoc) {
+ return ParseSectionSwitch(".text",
+ COFF::IMAGE_SCN_CNT_CODE
+ | COFF::IMAGE_SCN_MEM_EXECUTE
+ | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getText());
+ }
+ bool ParseSectionDirectiveData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ }
+ bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
+ return ParseSectionSwitch(".bss",
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getBSS());
+ }
+
+ bool ParseDirectiveDef(StringRef, SMLoc);
+ bool ParseDirectiveScl(StringRef, SMLoc);
+ bool ParseDirectiveType(StringRef, SMLoc);
+ bool ParseDirectiveEndef(StringRef, SMLoc);
+
+public:
+ COFFAsmParser() {}
+};
+
+} // end annonomous namespace.
+
+bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
+ unsigned Characteristics,
+ SectionKind Kind) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in section switching directive");
+ Lex();
+
+ getStreamer().SwitchSection(getContext().getCOFFSection(
+ Section, Characteristics, Kind));
+
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) {
+ StringRef SymbolName;
+
+ if (getParser().ParseIdentifier(SymbolName))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
+ getStreamer().BeginCOFFSymbolDef(Sym);
+
+ Lex();
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) {
+ int64_t SymbolStorageClass;
+ if (getParser().ParseAbsoluteExpression(SymbolStorageClass))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitCOFFSymbolStorageClass(SymbolStorageClass);
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
+ int64_t Type;
+ if (getParser().ParseAbsoluteExpression(Type))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ Lex();
+ getStreamer().EmitCOFFSymbolType(Type);
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) {
+ Lex();
+ getStreamer().EndCOFFSymbolDef();
+ return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createCOFFAsmParser() {
+ return new COFFAsmParser;
+}
+
+}
diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index 54ddb44..44f2345 100644
--- a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -305,7 +305,7 @@ bool DarwinAsmParser::ParseSectionSwitch(const char *Segment,
//
// FIXME: This isn't really what 'as' does; I think it just uses the implicit
// alignment on the section (e.g., if one manually inserts bytes into the
- // section, then just issueing the section switch directive will not realign
+ // section, then just issuing the section switch directive will not realign
// the section. However, this is arguably more reasonable behavior, and there
// is no good reason for someone to intentionally emit incorrectly sized
// values into the implicitly aligned sections.
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index f982fda..bfaf36a 100644
--- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -8,13 +8,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
namespace {
@@ -47,72 +49,86 @@ public:
AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectivePushSection>(".pushsection");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection");
AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size");
- AddDirectiveHandler<&ELFAsmParser::ParseDirectiveLEB128>(".sleb128");
- AddDirectiveHandler<&ELFAsmParser::ParseDirectiveLEB128>(".uleb128");
AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveType>(".type");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref");
}
+ // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is
+ // the best way for us to get access to it?
bool ParseSectionDirectiveData(StringRef, SMLoc) {
- return ParseSectionSwitch(".data", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+ return ParseSectionSwitch(".data", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getDataRel());
}
bool ParseSectionDirectiveText(StringRef, SMLoc) {
- return ParseSectionSwitch(".text", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_EXECINSTR |
- MCSectionELF::SHF_ALLOC, SectionKind::getText());
+ return ParseSectionSwitch(".text", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_ALLOC, SectionKind::getText());
}
bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
- return ParseSectionSwitch(".bss", MCSectionELF::SHT_NOBITS,
- MCSectionELF::SHF_WRITE |
- MCSectionELF::SHF_ALLOC, SectionKind::getBSS());
+ return ParseSectionSwitch(".bss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC, SectionKind::getBSS());
}
bool ParseSectionDirectiveRoData(StringRef, SMLoc) {
- return ParseSectionSwitch(".rodata", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC,
+ return ParseSectionSwitch(".rodata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
SectionKind::getReadOnly());
}
bool ParseSectionDirectiveTData(StringRef, SMLoc) {
- return ParseSectionSwitch(".tdata", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |
- MCSectionELF::SHF_TLS | MCSectionELF::SHF_WRITE,
+ return ParseSectionSwitch(".tdata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_TLS | ELF::SHF_WRITE,
SectionKind::getThreadData());
}
bool ParseSectionDirectiveTBSS(StringRef, SMLoc) {
- return ParseSectionSwitch(".tbss", MCSectionELF::SHT_NOBITS,
- MCSectionELF::SHF_ALLOC |
- MCSectionELF::SHF_TLS | MCSectionELF::SHF_WRITE,
+ return ParseSectionSwitch(".tbss", ELF::SHT_NOBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_TLS | ELF::SHF_WRITE,
SectionKind::getThreadBSS());
}
bool ParseSectionDirectiveDataRel(StringRef, SMLoc) {
- return ParseSectionSwitch(".data.rel", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |
- MCSectionELF::SHF_WRITE,
+ return ParseSectionSwitch(".data.rel", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_WRITE,
SectionKind::getDataRel());
}
bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) {
- return ParseSectionSwitch(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |
- MCSectionELF::SHF_WRITE,
+ return ParseSectionSwitch(".data.rel.ro", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_WRITE,
SectionKind::getReadOnlyWithRel());
}
bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) {
- return ParseSectionSwitch(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |
- MCSectionELF::SHF_WRITE,
+ return ParseSectionSwitch(".data.rel.ro.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_WRITE,
SectionKind::getReadOnlyWithRelLocal());
}
bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) {
- return ParseSectionSwitch(".eh_frame", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |
- MCSectionELF::SHF_WRITE,
+ return ParseSectionSwitch(".eh_frame", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_WRITE,
SectionKind::getDataRel());
}
- bool ParseDirectiveLEB128(StringRef, SMLoc);
+ bool ParseDirectivePushSection(StringRef, SMLoc);
+ bool ParseDirectivePopSection(StringRef, SMLoc);
bool ParseDirectiveSection(StringRef, SMLoc);
bool ParseDirectiveSize(StringRef, SMLoc);
bool ParseDirectivePrevious(StringRef, SMLoc);
+ bool ParseDirectiveType(StringRef, SMLoc);
+ bool ParseDirectiveIdent(StringRef, SMLoc);
+ bool ParseDirectiveSymver(StringRef, SMLoc);
+ bool ParseDirectiveWeakref(StringRef, SMLoc);
+
+private:
+ bool ParseSectionName(StringRef &SectionName);
};
}
@@ -150,135 +166,359 @@ bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
return false;
}
-// FIXME: This is a work in progress.
-bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
- StringRef SectionName;
- // FIXME: This doesn't parse section names like ".note.GNU-stack" correctly.
- if (getParser().ParseIdentifier(SectionName))
- return TokError("expected identifier in directive");
-
- std::string FlagsStr;
- StringRef TypeName;
- int64_t Size = 0;
- if (getLexer().is(AsmToken::Comma)) {
- Lex();
-
- if (getLexer().isNot(AsmToken::String))
- return TokError("expected string in directive");
+bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
+ // A section name can contain -, so we cannot just use
+ // ParseIdentifier.
+ SMLoc FirstLoc = getLexer().getLoc();
+ unsigned Size = 0;
- FlagsStr = getTok().getStringContents();
+ if (getLexer().is(AsmToken::String)) {
+ SectionName = getTok().getIdentifier();
Lex();
+ return false;
+ }
- AsmToken::TokenKind TypeStartToken;
- if (getContext().getAsmInfo().getCommentString()[0] == '@')
- TypeStartToken = AsmToken::Percent;
- else
- TypeStartToken = AsmToken::At;
+ for (;;) {
+ StringRef Tmp;
+ unsigned CurSize;
- if (getLexer().is(AsmToken::Comma)) {
+ SMLoc PrevLoc = getLexer().getLoc();
+ if (getLexer().is(AsmToken::Minus)) {
+ CurSize = 1;
+ Lex(); // Consume the "-".
+ } else if (getLexer().is(AsmToken::String)) {
+ CurSize = getTok().getIdentifier().size() + 2;
Lex();
- if (getLexer().is(TypeStartToken)) {
- Lex();
- if (getParser().ParseIdentifier(TypeName))
- return TokError("expected identifier in directive");
-
- if (getLexer().is(AsmToken::Comma)) {
- Lex();
+ } else if (getLexer().is(AsmToken::Identifier)) {
+ CurSize = getTok().getIdentifier().size();
+ Lex();
+ } else {
+ break;
+ }
- if (getParser().ParseAbsoluteExpression(Size))
- return true;
+ Size += CurSize;
+ SectionName = StringRef(FirstLoc.getPointer(), Size);
- if (Size <= 0)
- return TokError("section size must be positive");
- }
- }
- }
+ // Make sure the following token is adjacent.
+ if (PrevLoc.getPointer() + CurSize != getTok().getLoc().getPointer())
+ break;
}
+ if (Size == 0)
+ return true;
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in directive");
+ return false;
+}
- unsigned Flags = 0;
- for (unsigned i = 0; i < FlagsStr.size(); i++) {
- switch (FlagsStr[i]) {
+static SectionKind computeSectionKind(unsigned Flags) {
+ if (Flags & ELF::SHF_EXECINSTR)
+ return SectionKind::getText();
+ if (Flags & ELF::SHF_TLS)
+ return SectionKind::getThreadData();
+ return SectionKind::getDataRel();
+}
+
+static int parseSectionFlags(StringRef flagsStr) {
+ int flags = 0;
+
+ for (unsigned i = 0; i < flagsStr.size(); i++) {
+ switch (flagsStr[i]) {
case 'a':
- Flags |= MCSectionELF::SHF_ALLOC;
+ flags |= ELF::SHF_ALLOC;
break;
case 'x':
- Flags |= MCSectionELF::SHF_EXECINSTR;
+ flags |= ELF::SHF_EXECINSTR;
break;
case 'w':
- Flags |= MCSectionELF::SHF_WRITE;
+ flags |= ELF::SHF_WRITE;
break;
case 'M':
- Flags |= MCSectionELF::SHF_MERGE;
+ flags |= ELF::SHF_MERGE;
break;
case 'S':
- Flags |= MCSectionELF::SHF_STRINGS;
+ flags |= ELF::SHF_STRINGS;
break;
case 'T':
- Flags |= MCSectionELF::SHF_TLS;
+ flags |= ELF::SHF_TLS;
break;
case 'c':
- Flags |= MCSectionELF::XCORE_SHF_CP_SECTION;
+ flags |= ELF::XCORE_SHF_CP_SECTION;
break;
case 'd':
- Flags |= MCSectionELF::XCORE_SHF_DP_SECTION;
+ flags |= ELF::XCORE_SHF_DP_SECTION;
+ break;
+ case 'G':
+ flags |= ELF::SHF_GROUP;
break;
default:
+ return -1;
+ }
+ }
+
+ return flags;
+}
+
+bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
+ getStreamer().PushSection();
+
+ if (ParseDirectiveSection(s, loc)) {
+ getStreamer().PopSection();
+ return true;
+ }
+
+ return false;
+}
+
+bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
+ if (!getStreamer().PopSection())
+ return TokError(".popsection without corresponding .pushsection");
+ return false;
+}
+
+// FIXME: This is a work in progress.
+bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+ StringRef SectionName;
+
+ if (ParseSectionName(SectionName))
+ return TokError("expected identifier in directive");
+
+ StringRef TypeName;
+ int64_t Size = 0;
+ StringRef GroupName;
+ unsigned Flags = 0;
+
+ // Set the defaults first.
+ if (SectionName == ".fini" || SectionName == ".init" ||
+ SectionName == ".rodata")
+ Flags |= ELF::SHF_ALLOC;
+ if (SectionName == ".fini" || SectionName == ".init")
+ Flags |= ELF::SHF_EXECINSTR;
+
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in directive");
+
+ StringRef FlagsStr = getTok().getStringContents();
+ Lex();
+
+ int extraFlags = parseSectionFlags(FlagsStr);
+ if (extraFlags < 0)
return TokError("unknown flag");
+ Flags |= extraFlags;
+
+ bool Mergeable = Flags & ELF::SHF_MERGE;
+ bool Group = Flags & ELF::SHF_GROUP;
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ if (Mergeable)
+ return TokError("Mergeable section must specify the type");
+ if (Group)
+ return TokError("Group section must specify the type");
+ } else {
+ Lex();
+ if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
+ return TokError("expected '@' or '%' before type");
+
+ Lex();
+ if (getParser().ParseIdentifier(TypeName))
+ return TokError("expected identifier in directive");
+
+ if (Mergeable) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected the entry size");
+ Lex();
+ if (getParser().ParseAbsoluteExpression(Size))
+ return true;
+ if (Size <= 0)
+ return TokError("entry size must be positive");
+ }
+
+ if (Group) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected group name");
+ Lex();
+ if (getParser().ParseIdentifier(GroupName))
+ return true;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ StringRef Linkage;
+ if (getParser().ParseIdentifier(Linkage))
+ return true;
+ if (Linkage != "comdat")
+ return TokError("Linkage must be 'comdat'");
+ }
+ }
}
}
- unsigned Type = MCSectionELF::SHT_NULL;
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ unsigned Type = ELF::SHT_PROGBITS;
+
if (!TypeName.empty()) {
if (TypeName == "init_array")
- Type = MCSectionELF::SHT_INIT_ARRAY;
+ Type = ELF::SHT_INIT_ARRAY;
else if (TypeName == "fini_array")
- Type = MCSectionELF::SHT_FINI_ARRAY;
+ Type = ELF::SHT_FINI_ARRAY;
else if (TypeName == "preinit_array")
- Type = MCSectionELF::SHT_PREINIT_ARRAY;
+ Type = ELF::SHT_PREINIT_ARRAY;
else if (TypeName == "nobits")
- Type = MCSectionELF::SHT_NOBITS;
+ Type = ELF::SHT_NOBITS;
else if (TypeName == "progbits")
- Type = MCSectionELF::SHT_PROGBITS;
+ Type = ELF::SHT_PROGBITS;
+ else if (TypeName == "note")
+ Type = ELF::SHT_NOTE;
+ else if (TypeName == "unwind")
+ Type = ELF::SHT_X86_64_UNWIND;
else
return TokError("unknown section type");
}
- SectionKind Kind = (Flags & MCSectionELF::SHF_EXECINSTR)
- ? SectionKind::getText()
- : SectionKind::getDataRel();
+ SectionKind Kind = computeSectionKind(Flags);
getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
- Flags, Kind, false));
+ Flags, Kind, Size,
+ GroupName));
return false;
}
-bool ELFAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) {
- int64_t Value;
- if (getParser().ParseAbsoluteExpression(Value))
- return true;
+bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
+ const MCSection *PreviousSection = getStreamer().getPreviousSection();
+ if (PreviousSection == NULL)
+ return TokError(".previous without corresponding .section");
+ getStreamer().SwitchSection(PreviousSection);
+
+ return false;
+}
+
+/// ParseDirectiveELFType
+/// ::= .type identifier , @attribute
+bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.type' directive");
+ Lex();
+
+ if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
+ return TokError("expected '@' or '%' before type");
+ Lex();
+
+ StringRef Type;
+ SMLoc TypeLoc;
+
+ TypeLoc = getLexer().getLoc();
+ if (getParser().ParseIdentifier(Type))
+ return TokError("expected symbol type in directive");
+
+ MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
+ .Case("function", MCSA_ELF_TypeFunction)
+ .Case("object", MCSA_ELF_TypeObject)
+ .Case("tls_object", MCSA_ELF_TypeTLS)
+ .Case("common", MCSA_ELF_TypeCommon)
+ .Case("notype", MCSA_ELF_TypeNoType)
+ .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+ .Default(MCSA_Invalid);
+
+ if (Attr == MCSA_Invalid)
+ return Error(TypeLoc, "unsupported attribute in '.type' directive");
if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in directive");
+ return TokError("unexpected token in '.type' directive");
- // FIXME: Add proper MC support.
- if (getContext().getAsmInfo().hasLEB128()) {
- if (DirName[1] == 's')
- getStreamer().EmitRawText("\t.sleb128\t" + Twine(Value));
- else
- getStreamer().EmitRawText("\t.uleb128\t" + Twine(Value));
- return false;
- }
- // FIXME: This shouldn't be an error!
- return TokError("LEB128 not supported yet");
+ Lex();
+
+ getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+ return false;
}
-bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
- const MCSection *PreviousSection = getStreamer().getPreviousSection();
- if (PreviousSection != NULL)
- getStreamer().SwitchSection(PreviousSection);
+/// ParseDirectiveIdent
+/// ::= .ident string
+bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("unexpected token in '.ident' directive");
+
+ StringRef Data = getTok().getIdentifier();
+
+ Lex();
+
+ const MCSection *Comment =
+ getContext().getELFSection(".comment", ELF::SHT_PROGBITS,
+ ELF::SHF_MERGE |
+ ELF::SHF_STRINGS,
+ SectionKind::getReadOnly(),
+ 1, "");
+
+ static bool First = true;
+
+ getStreamer().PushSection();
+ getStreamer().SwitchSection(Comment);
+ if (First)
+ getStreamer().EmitIntValue(0, 1);
+ First = false;
+ getStreamer().EmitBytes(Data, 0);
+ getStreamer().EmitIntValue(0, 1);
+ getStreamer().PopSection();
+ return false;
+}
+
+/// ParseDirectiveSymver
+/// ::= .symver foo, bar2@zed
+bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected a comma");
+
+ Lex();
+
+ StringRef AliasName;
+ if (getParser().ParseIdentifier(AliasName))
+ return TokError("expected identifier in directive");
+
+ if (AliasName.find('@') == StringRef::npos)
+ return TokError("expected a '@' in the name");
+
+ MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ const MCExpr *Value = MCSymbolRefExpr::Create(Sym, getContext());
+
+ getStreamer().EmitAssignment(Alias, Value);
+ return false;
+}
+
+/// ParseDirectiveWeakref
+/// ::= .weakref foo, bar
+bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
+ // FIXME: Share code with the other alias building directives.
+
+ StringRef AliasName;
+ if (getParser().ParseIdentifier(AliasName))
+ return TokError("expected identifier in directive");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected a comma");
+
+ Lex();
+
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ getStreamer().EmitWeakReference(Alias, Sym);
return false;
}
diff --git a/contrib/llvm/lib/MC/MCPureStreamer.cpp b/contrib/llvm/lib/MC/MCPureStreamer.cpp
new file mode 100644
index 0000000..6098e6b
--- /dev/null
+++ b/contrib/llvm/lib/MC/MCPureStreamer.cpp
@@ -0,0 +1,234 @@
+//===- lib/MC/MCPureStreamer.cpp - MC "Pure" Object Output ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectStreamer.h"
+// FIXME: Remove this.
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+
+class MCPureStreamer : public MCObjectStreamer {
+private:
+ virtual void EmitInstToFragment(const MCInst &Inst);
+ virtual void EmitInstToData(const MCInst &Inst);
+
+public:
+ MCPureStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter)
+ : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void InitSections();
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0);
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0);
+ virtual void Finish();
+
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitThumbFunc(MCSymbol *Func) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitCOFFSymbolType(int Type) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EndCOFFSymbolDef() {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual void EmitFileDirective(StringRef Filename) {
+ report_fatal_error("unsupported directive in pure streamer");
+ }
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+ report_fatal_error("unsupported directive in pure streamer");
+ return false;
+ }
+
+ /// @}
+};
+
+} // end anonymous namespace.
+
+void MCPureStreamer::InitSections() {
+ // FIMXE: To what!?
+ SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ 0, SectionKind::getText()));
+
+}
+
+void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+
+ Symbol->setSection(*getCurrentSection());
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // We have to create a new fragment if this is an atom defining symbol,
+ // fragments cannot span atoms.
+ if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
+ new MCDataFragment(getCurrentSectionData());
+
+ // FIXME: This is wasteful, we don't necessarily need to create a data
+ // fragment. Instead, we should mark the symbol as pointing into the data
+ // fragment if it exists, otherwise we should just queue the label and set its
+ // fragment pointer when we emit the next fragment.
+ MCDataFragment *F = getOrCreateDataFragment();
+ assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+ SD.setFragment(F);
+ SD.setOffset(F->getContents().size());
+}
+
+void MCPureStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ // FIXME: Lift context changes into super class.
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCPureStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ unsigned Size, unsigned ByteAlignment) {
+ report_fatal_error("not yet implemented in pure streamer");
+}
+
+void MCPureStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCPureStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value, unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+ getCurrentSectionData());
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+ getCurrentSectionData());
+ F->setEmitNops(true);
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {
+ new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+}
+
+void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
+ MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+
+ // Add the fixups and data.
+ //
+ // FIXME: Revisit this design decision when relaxation is done, we may be
+ // able to get away with not storing any extra data in the MCInst.
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ IF->getCode() = Code;
+ IF->getFixups() = Fixups;
+}
+
+void MCPureStreamer::EmitInstToData(const MCInst &Inst) {
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ // Add the fixups and data.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->addFixup(Fixups[i]);
+ }
+ DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCPureStreamer::Finish() {
+ // FIXME: Handle DWARF tables?
+
+ this->MCObjectStreamer::Finish();
+}
+
+MCStreamer *llvm::createPureStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *CE) {
+ return new MCPureStreamer(Context, TAB, OS, CE);
+}
diff --git a/contrib/llvm/lib/MC/MCSectionCOFF.cpp b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
index eb53160..90091f0 100644
--- a/contrib/llvm/lib/MC/MCSectionCOFF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
@@ -74,3 +74,11 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
}
}
}
+
+bool MCSectionCOFF::UseCodeAlign() const {
+ return getKind().isText();
+}
+
+bool MCSectionCOFF::isVirtualSection() const {
+ return getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+}
diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp
index a7599de..d32aea1 100644
--- a/contrib/llvm/lib/MC/MCSectionELF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionELF.cpp
@@ -11,7 +11,9 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
MCSectionELF::~MCSectionELF() {} // anchor.
@@ -29,14 +31,6 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
return false;
}
-// ShouldPrintSectionType - Only prints the section type if supported
-bool MCSectionELF::ShouldPrintSectionType(unsigned Ty) const {
- if (IsExplicit && !(Ty == SHT_NOBITS || Ty == SHT_PROGBITS))
- return false;
-
- return true;
-}
-
void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
raw_ostream &OS) const {
@@ -49,87 +43,88 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
// Handle the weird solaris syntax if desired.
if (MAI.usesSunStyleELFSectionSwitchSyntax() &&
- !(Flags & MCSectionELF::SHF_MERGE)) {
- if (Flags & MCSectionELF::SHF_ALLOC)
+ !(Flags & ELF::SHF_MERGE)) {
+ if (Flags & ELF::SHF_ALLOC)
OS << ",#alloc";
- if (Flags & MCSectionELF::SHF_EXECINSTR)
+ if (Flags & ELF::SHF_EXECINSTR)
OS << ",#execinstr";
- if (Flags & MCSectionELF::SHF_WRITE)
+ if (Flags & ELF::SHF_WRITE)
OS << ",#write";
- if (Flags & MCSectionELF::SHF_TLS)
+ if (Flags & ELF::SHF_TLS)
OS << ",#tls";
OS << '\n';
return;
}
OS << ",\"";
- if (Flags & MCSectionELF::SHF_ALLOC)
+ if (Flags & ELF::SHF_ALLOC)
OS << 'a';
- if (Flags & MCSectionELF::SHF_EXECINSTR)
+ if (Flags & ELF::SHF_EXECINSTR)
OS << 'x';
- if (Flags & MCSectionELF::SHF_WRITE)
+ if (Flags & ELF::SHF_GROUP)
+ OS << 'G';
+ if (Flags & ELF::SHF_WRITE)
OS << 'w';
- if (Flags & MCSectionELF::SHF_MERGE)
+ if (Flags & ELF::SHF_MERGE)
OS << 'M';
- if (Flags & MCSectionELF::SHF_STRINGS)
+ if (Flags & ELF::SHF_STRINGS)
OS << 'S';
- if (Flags & MCSectionELF::SHF_TLS)
+ if (Flags & ELF::SHF_TLS)
OS << 'T';
// If there are target-specific flags, print them.
- if (Flags & MCSectionELF::XCORE_SHF_CP_SECTION)
+ if (Flags & ELF::XCORE_SHF_CP_SECTION)
OS << 'c';
- if (Flags & MCSectionELF::XCORE_SHF_DP_SECTION)
+ if (Flags & ELF::XCORE_SHF_DP_SECTION)
OS << 'd';
OS << '"';
- if (ShouldPrintSectionType(Type)) {
- OS << ',';
-
- // If comment string is '@', e.g. as on ARM - use '%' instead
- if (MAI.getCommentString()[0] == '@')
- OS << '%';
- else
- OS << '@';
-
- if (Type == MCSectionELF::SHT_INIT_ARRAY)
- OS << "init_array";
- else if (Type == MCSectionELF::SHT_FINI_ARRAY)
- OS << "fini_array";
- else if (Type == MCSectionELF::SHT_PREINIT_ARRAY)
- OS << "preinit_array";
- else if (Type == MCSectionELF::SHT_NOBITS)
- OS << "nobits";
- else if (Type == MCSectionELF::SHT_PROGBITS)
- OS << "progbits";
-
- if (getKind().isMergeable1ByteCString()) {
- OS << ",1";
- } else if (getKind().isMergeable2ByteCString()) {
- OS << ",2";
- } else if (getKind().isMergeable4ByteCString() ||
- getKind().isMergeableConst4()) {
- OS << ",4";
- } else if (getKind().isMergeableConst8()) {
- OS << ",8";
- } else if (getKind().isMergeableConst16()) {
- OS << ",16";
- }
+ OS << ',';
+
+ // If comment string is '@', e.g. as on ARM - use '%' instead
+ if (MAI.getCommentString()[0] == '@')
+ OS << '%';
+ else
+ OS << '@';
+
+ if (Type == ELF::SHT_INIT_ARRAY)
+ OS << "init_array";
+ else if (Type == ELF::SHT_FINI_ARRAY)
+ OS << "fini_array";
+ else if (Type == ELF::SHT_PREINIT_ARRAY)
+ OS << "preinit_array";
+ else if (Type == ELF::SHT_NOBITS)
+ OS << "nobits";
+ else if (Type == ELF::SHT_NOTE)
+ OS << "note";
+ else if (Type == ELF::SHT_PROGBITS)
+ OS << "progbits";
+
+ if (EntrySize) {
+ assert(Flags & ELF::SHF_MERGE);
+ OS << "," << EntrySize;
}
-
+
+ if (Flags & ELF::SHF_GROUP)
+ OS << "," << Group->getName() << ",comdat";
OS << '\n';
}
-// HasCommonSymbols - True if this section holds common symbols, this is
-// indicated on the ELF object file by a symbol with SHN_COMMON section
-// header index.
-bool MCSectionELF::HasCommonSymbols() const {
-
- if (StringRef(SectionName).startswith(".gnu.linkonce."))
- return true;
-
- return false;
+bool MCSectionELF::UseCodeAlign() const {
+ return getFlags() & ELF::SHF_EXECINSTR;
}
+bool MCSectionELF::isVirtualSection() const {
+ return getType() == ELF::SHT_NOBITS;
+}
+unsigned MCSectionELF::DetermineEntrySize(SectionKind Kind) {
+ if (Kind.isMergeable1ByteCString()) return 1;
+ if (Kind.isMergeable2ByteCString()) return 2;
+ if (Kind.isMergeable4ByteCString()) return 4;
+ if (Kind.isMergeableConst4()) return 4;
+ if (Kind.isMergeableConst8()) return 8;
+ if (Kind.isMergeableConst16()) return 16;
+ return 0;
+}
diff --git a/contrib/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm/lib/MC/MCSectionMachO.cpp
index ded3b20..b897c0b 100644
--- a/contrib/llvm/lib/MC/MCSectionMachO.cpp
+++ b/contrib/llvm/lib/MC/MCSectionMachO.cpp
@@ -10,6 +10,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/raw_ostream.h"
+#include <cctype>
using namespace llvm;
/// SectionTypeDescriptors - These are strings that describe the various section
@@ -81,18 +82,18 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
SegmentName[i] = Segment[i];
else
SegmentName[i] = 0;
-
+
if (i < Section.size())
SectionName[i] = Section[i];
else
SectionName[i] = 0;
- }
+ }
}
void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
raw_ostream &OS) const {
OS << "\t.section\t" << getSegmentName() << ',' << getSectionName();
-
+
// Get the section type and attributes.
unsigned TAA = getTypeAndAttributes();
if (TAA == 0) {
@@ -101,7 +102,7 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
}
OS << ',';
-
+
unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE;
assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE &&
"Invalid SectionType specified!");
@@ -110,7 +111,7 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << SectionTypeDescriptors[SectionType].AssemblerName;
else
OS << "<<" << SectionTypeDescriptors[SectionType].EnumName << ">>";
-
+
// If we don't have any attributes, we're done.
unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES;
if (SectionAttrs == 0) {
@@ -128,10 +129,10 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
// Check to see if we have this attribute.
if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0)
continue;
-
+
// Yep, clear it and print it.
SectionAttrs &= ~SectionAttrDescriptors[i].AttrFlag;
-
+
OS << Separator;
if (SectionAttrDescriptors[i].AssemblerName)
OS << SectionAttrDescriptors[i].AssemblerName;
@@ -139,15 +140,25 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << "<<" << SectionAttrDescriptors[i].EnumName << ">>";
Separator = '+';
}
-
+
assert(SectionAttrs == 0 && "Unknown section attributes!");
-
+
// If we have a S_SYMBOL_STUBS size specified, print it.
if (Reserved2 != 0)
OS << ',' << Reserved2;
OS << '\n';
}
+bool MCSectionMachO::UseCodeAlign() const {
+ return hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+}
+
+bool MCSectionMachO::isVirtualSection() const {
+ return (getType() == MCSectionMachO::S_ZEROFILL ||
+ getType() == MCSectionMachO::S_GB_ZEROFILL ||
+ getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
+}
+
/// StripSpaces - This removes leading and trailing spaces from the StringRef.
static void StripSpaces(StringRef &Str) {
while (!Str.empty() && isspace(Str[0]))
@@ -168,12 +179,12 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In.
unsigned &StubSize) { // Out.
// Find the first comma.
std::pair<StringRef, StringRef> Comma = Spec.split(',');
-
+
// If there is no comma, we fail.
if (Comma.second.empty())
return "mach-o section specifier requires a segment and section "
"separated by a comma";
-
+
// Capture segment, remove leading and trailing whitespace.
Segment = Comma.first;
StripSpaces(Segment);
@@ -182,14 +193,14 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In.
if (Segment.empty() || Segment.size() > 16)
return "mach-o section specifier requires a segment whose length is "
"between 1 and 16 characters";
-
+
// Split the section name off from any attributes if present.
Comma = Comma.second.split(',');
// Capture section, remove leading and trailing whitespace.
Section = Comma.first;
StripSpaces(Section);
-
+
// Verify that the section is present and not too long.
if (Section.empty() || Section.size() > 16)
return "mach-o section specifier requires a section whose length is "
@@ -200,25 +211,25 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In.
StubSize = 0;
if (Comma.second.empty())
return "";
-
+
// Otherwise, we need to parse the section type and attributes.
Comma = Comma.second.split(',');
-
+
// Get the section type.
StringRef SectionType = Comma.first;
StripSpaces(SectionType);
-
+
// Figure out which section type it is.
unsigned TypeID;
for (TypeID = 0; TypeID !=MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1; ++TypeID)
if (SectionTypeDescriptors[TypeID].AssemblerName &&
SectionType == SectionTypeDescriptors[TypeID].AssemblerName)
break;
-
+
// If we didn't find the section type, reject it.
if (TypeID > MCSectionMachO::LAST_KNOWN_SECTION_TYPE)
return "mach-o section specifier uses an unknown section type";
-
+
// Remember the TypeID.
TAA = TypeID;
@@ -235,10 +246,10 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In.
// present.
Comma = Comma.second.split(',');
StringRef Attrs = Comma.first;
-
+
// The attribute list is a '+' separated list of attributes.
std::pair<StringRef, StringRef> Plus = Attrs.split('+');
-
+
while (1) {
StringRef Attr = Plus.first;
StripSpaces(Attr);
@@ -247,14 +258,14 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In.
for (unsigned i = 0; ; ++i) {
if (SectionAttrDescriptors[i].AttrFlag == AttrFlagEnd)
return "mach-o section specifier has invalid attribute";
-
+
if (SectionAttrDescriptors[i].AssemblerName &&
Attr == SectionAttrDescriptors[i].AssemblerName) {
TAA |= SectionAttrDescriptors[i].AttrFlag;
break;
}
}
-
+
if (Plus.second.empty()) break;
Plus = Plus.second.split('+');
};
@@ -272,15 +283,14 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In.
if ((TAA & MCSectionMachO::SECTION_TYPE) != MCSectionMachO::S_SYMBOL_STUBS)
return "mach-o section specifier cannot have a stub size specified because "
"it does not have type 'symbol_stubs'";
-
+
// Okay, if we do, it must be a number.
StringRef StubSizeStr = Comma.second;
StripSpaces(StubSizeStr);
-
+
// Convert the stub size from a string to an integer.
if (StubSizeStr.getAsInteger(0, StubSize))
return "mach-o section specifier has a malformed stub size";
-
+
return "";
}
-
diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp
index 3e9d02e..3dcdba1 100644
--- a/contrib/llvm/lib/MC/MCStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCStreamer.cpp
@@ -7,16 +7,21 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include <cstdlib>
using namespace llvm;
-MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx), CurSection(0),
- PrevSection(0) {
+MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx) {
+ PrevSectionStack.push_back(NULL);
+ CurSectionStack.push_back(NULL);
}
MCStreamer::~MCStreamer() {
@@ -27,17 +32,90 @@ raw_ostream &MCStreamer::GetCommentOS() {
return nulls();
}
+void MCStreamer::EmitDwarfSetLineAddr(int64_t LineDelta,
+ const MCSymbol *Label, int PointerSize) {
+ // emit the sequence to set the address
+ EmitIntValue(dwarf::DW_LNS_extended_op, 1);
+ EmitULEB128IntValue(PointerSize + 1);
+ EmitIntValue(dwarf::DW_LNE_set_address, 1);
+ EmitSymbolValue(Label, PointerSize);
+
+ // emit the sequence for the LineDelta (from 1) and a zero address delta.
+ MCDwarfLineAddr::Emit(this, LineDelta, 0);
+}
/// EmitIntValue - Special case of EmitValue that avoids the client having to
/// pass in a MCExpr for constant integers.
void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size,
unsigned AddrSpace) {
- EmitValue(MCConstantExpr::Create(Value, getContext()), Size, AddrSpace);
+ assert(Size <= 8 && "Invalid size");
+ assert((isUIntN(8 * Size, Value) || isIntN(8 * Size, Value)) &&
+ "Invalid size");
+ char buf[8];
+ // FIXME: Endianness assumption.
+ for (unsigned i = 0; i != Size; ++i)
+ buf[i] = uint8_t(Value >> (i * 8));
+ EmitBytes(StringRef(buf, Size), AddrSpace);
+}
+
+/// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
+/// client having to pass in a MCExpr for constant integers.
+void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace) {
+ SmallString<32> Tmp;
+ raw_svector_ostream OSE(Tmp);
+ MCObjectWriter::EncodeULEB128(Value, OSE);
+ EmitBytes(OSE.str(), AddrSpace);
+}
+
+/// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the
+/// client having to pass in a MCExpr for constant integers.
+void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) {
+ SmallString<32> Tmp;
+ raw_svector_ostream OSE(Tmp);
+ MCObjectWriter::EncodeSLEB128(Value, OSE);
+ EmitBytes(OSE.str(), AddrSpace);
+}
+
+void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ if (getContext().getAsmInfo().hasAggressiveSymbolFolding()) {
+ EmitValue(Value, Size, AddrSpace);
+ return;
+ }
+ MCSymbol *ABS = getContext().CreateTempSymbol();
+ EmitAssignment(ABS, Value);
+ EmitSymbolValue(ABS, Size, AddrSpace);
+}
+
+
+void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitValueImpl(Value, Size, false, AddrSpace);
+}
+
+void MCStreamer::EmitPCRelValue(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ EmitValueImpl(Value, Size, true, AddrSpace);
+}
+
+void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
+ bool isPCRel, unsigned AddrSpace) {
+ EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size, isPCRel,
+ AddrSpace);
}
void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
unsigned AddrSpace) {
- EmitValue(MCSymbolRefExpr::Create(Sym, getContext()), Size, AddrSpace);
+ EmitSymbolValue(Sym, Size, false, AddrSpace);
+}
+
+void MCStreamer::EmitPCRelSymbolValue(const MCSymbol *Sym, unsigned Size,
+ unsigned AddrSpace) {
+ EmitSymbolValue(Sym, Size, true, AddrSpace);
+}
+
+void MCStreamer::EmitGPRel32Value(const MCExpr *Value) {
+ report_fatal_error("unsupported directive in streamer");
}
/// EmitFill - Emit NumBytes bytes worth of the value specified by
@@ -49,6 +127,138 @@ void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
EmitValue(E, 1, AddrSpace);
}
+bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo,
+ StringRef Filename) {
+ return getContext().GetDwarfFile(Filename, FileNo) == 0;
+}
+
+void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+ unsigned Column, unsigned Flags,
+ unsigned Isa,
+ unsigned Discriminator) {
+ getContext().setCurrentDwarfLoc(FileNo, Line, Column, Flags, Isa,
+ Discriminator);
+}
+
+MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
+ if (FrameInfos.empty())
+ return NULL;
+ return &FrameInfos.back();
+}
+
+void MCStreamer::EnsureValidFrame() {
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ if (!CurFrame || CurFrame->End)
+ report_fatal_error("No open frame");
+}
+
+bool MCStreamer::EmitCFIStartProc() {
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ if (CurFrame && !CurFrame->End) {
+ report_fatal_error("Starting a frame before finishing the previous one!");
+ return true;
+ }
+ MCDwarfFrameInfo Frame;
+ Frame.Begin = getContext().CreateTempSymbol();
+ EmitLabel(Frame.Begin);
+ FrameInfos.push_back(Frame);
+ return false;
+}
+
+bool MCStreamer::EmitCFIEndProc() {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->End = getContext().CreateTempSymbol();
+ EmitLabel(CurFrame->End);
+ return false;
+}
+
+bool MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MachineLocation Dest(MachineLocation::VirtualFP);
+ MachineLocation Source(Register, -Offset);
+ MCCFIInstruction Instruction(Label, Dest, Source);
+ CurFrame->Instructions.push_back(Instruction);
+ return false;
+}
+
+bool MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MachineLocation Dest(MachineLocation::VirtualFP);
+ MachineLocation Source(MachineLocation::VirtualFP, -Offset);
+ MCCFIInstruction Instruction(Label, Dest, Source);
+ CurFrame->Instructions.push_back(Instruction);
+ return false;
+}
+
+bool MCStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MachineLocation Dest(Register);
+ MachineLocation Source(MachineLocation::VirtualFP);
+ MCCFIInstruction Instruction(Label, Dest, Source);
+ CurFrame->Instructions.push_back(Instruction);
+ return false;
+}
+
+bool MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MachineLocation Dest(Register, Offset);
+ MachineLocation Source(Register, Offset);
+ MCCFIInstruction Instruction(Label, Dest, Source);
+ CurFrame->Instructions.push_back(Instruction);
+ return false;
+}
+
+bool MCStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+ unsigned Encoding) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Personality = Sym;
+ CurFrame->PersonalityEncoding = Encoding;
+ return false;
+}
+
+bool MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ CurFrame->Lsda = Sym;
+ CurFrame->LsdaEncoding = Encoding;
+ return false;
+}
+
+bool MCStreamer::EmitCFIRememberState() {
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MCCFIInstruction Instruction(MCCFIInstruction::Remember, Label);
+ CurFrame->Instructions.push_back(Instruction);
+ return false;
+}
+
+bool MCStreamer::EmitCFIRestoreState() {
+ // FIXME: Error if there is no matching cfi_remember_state.
+ EnsureValidFrame();
+ MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+ MCSymbol *Label = getContext().CreateTempSymbol();
+ EmitLabel(Label);
+ MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label);
+ CurFrame->Instructions.push_back(Instruction);
+ return false;
+}
+
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
diff --git a/contrib/llvm/lib/MC/MCSymbol.cpp b/contrib/llvm/lib/MC/MCSymbol.cpp
index 07751f7..1c71f26 100644
--- a/contrib/llvm/lib/MC/MCSymbol.cpp
+++ b/contrib/llvm/lib/MC/MCSymbol.cpp
@@ -39,7 +39,20 @@ static bool NameNeedsQuoting(StringRef Str) {
return false;
}
+const MCSymbol &MCSymbol::AliasedSymbol() const {
+ const MCSymbol *S = this;
+ while (S->isVariable()) {
+ const MCExpr *Value = S->getVariableValue();
+ if (Value->getKind() != MCExpr::SymbolRef)
+ return *S;
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Value);
+ S = &Ref->getSymbol();
+ }
+ return *S;
+}
+
void MCSymbol::setVariableValue(const MCExpr *Value) {
+ assert(!IsUsed && "Cannot set a variable that has already been used.");
assert(Value && "Invalid variable value!");
assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) &&
"Invalid redefinition!");
diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp
index cffabfa..8af07c7 100644
--- a/contrib/llvm/lib/MC/MachObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MachObjectWriter.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
@@ -18,49 +19,37 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
#include "llvm/Target/TargetAsmBackend.h"
// FIXME: Gross.
+#include "../Target/ARM/ARMFixupKinds.h"
#include "../Target/X86/X86FixupKinds.h"
#include <vector>
using namespace llvm;
+using namespace llvm::object;
+// FIXME: this has been copied from (or to) X86AsmBackend.cpp
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
- default: llvm_unreachable("invalid fixup kind!");
- case X86::reloc_pcrel_1byte:
+ default:
+ llvm_unreachable("invalid fixup kind!");
+ case FK_PCRel_1:
case FK_Data_1: return 0;
- case X86::reloc_pcrel_2byte:
+ case FK_PCRel_2:
case FK_Data_2: return 1;
- case X86::reloc_pcrel_4byte:
+ case FK_PCRel_4:
+ // FIXME: Remove these!!!
case X86::reloc_riprel_4byte:
case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_signed_4byte:
case FK_Data_4: return 2;
case FK_Data_8: return 3;
}
}
-static bool isFixupKindPCRel(unsigned Kind) {
- switch (Kind) {
- default:
- return false;
- case X86::reloc_pcrel_1byte:
- case X86::reloc_pcrel_2byte:
- case X86::reloc_pcrel_4byte:
- case X86::reloc_riprel_4byte:
- case X86::reloc_riprel_4byte_movq_load:
- return true;
- }
-}
-
-static bool isFixupKindRIPRel(unsigned Kind) {
- return Kind == X86::reloc_riprel_4byte ||
- Kind == X86::reloc_riprel_4byte_movq_load;
-}
-
static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
// Undefined symbols are always extern.
if (SD->Symbol->isUndefined())
@@ -77,94 +66,7 @@ static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
namespace {
-class MachObjectWriterImpl {
- // See <mach-o/loader.h>.
- enum {
- Header_Magic32 = 0xFEEDFACE,
- Header_Magic64 = 0xFEEDFACF
- };
-
- enum {
- Header32Size = 28,
- Header64Size = 32,
- SegmentLoadCommand32Size = 56,
- SegmentLoadCommand64Size = 72,
- Section32Size = 68,
- Section64Size = 80,
- SymtabLoadCommandSize = 24,
- DysymtabLoadCommandSize = 80,
- Nlist32Size = 12,
- Nlist64Size = 16,
- RelocationInfoSize = 8
- };
-
- enum HeaderFileType {
- HFT_Object = 0x1
- };
-
- enum HeaderFlags {
- HF_SubsectionsViaSymbols = 0x2000
- };
-
- enum LoadCommandType {
- LCT_Segment = 0x1,
- LCT_Symtab = 0x2,
- LCT_Dysymtab = 0xb,
- LCT_Segment64 = 0x19
- };
-
- // See <mach-o/nlist.h>.
- enum SymbolTypeType {
- STT_Undefined = 0x00,
- STT_Absolute = 0x02,
- STT_Section = 0x0e
- };
-
- enum SymbolTypeFlags {
- // If any of these bits are set, then the entry is a stab entry number (see
- // <mach-o/stab.h>. Otherwise the other masks apply.
- STF_StabsEntryMask = 0xe0,
-
- STF_TypeMask = 0x0e,
- STF_External = 0x01,
- STF_PrivateExtern = 0x10
- };
-
- /// IndirectSymbolFlags - Flags for encoding special values in the indirect
- /// symbol entry.
- enum IndirectSymbolFlags {
- ISF_Local = 0x80000000,
- ISF_Absolute = 0x40000000
- };
-
- /// RelocationFlags - Special flags for addresses.
- enum RelocationFlags {
- RF_Scattered = 0x80000000
- };
-
- enum RelocationInfoType {
- RIT_Vanilla = 0,
- RIT_Pair = 1,
- RIT_Difference = 2,
- RIT_PreboundLazyPointer = 3,
- RIT_LocalDifference = 4,
- RIT_TLV = 5
- };
-
- /// X86_64 uses its own relocation types.
- enum RelocationInfoTypeX86_64 {
- RIT_X86_64_Unsigned = 0,
- RIT_X86_64_Signed = 1,
- RIT_X86_64_Branch = 2,
- RIT_X86_64_GOTLoad = 3,
- RIT_X86_64_GOT = 4,
- RIT_X86_64_Subtractor = 5,
- RIT_X86_64_Signed1 = 6,
- RIT_X86_64_Signed2 = 7,
- RIT_X86_64_Signed4 = 8,
- RIT_X86_64_TLV = 9
- };
-
+class MachObjectWriter : public MCObjectWriter {
/// MachSymbolData - Helper struct for containing some precomputed information
/// on symbols.
struct MachSymbolData {
@@ -179,16 +81,14 @@ class MachObjectWriterImpl {
}
};
+ /// The target specific Mach-O writer instance.
+ llvm::OwningPtr<MCMachObjectTargetWriter> TargetObjectWriter;
+
/// @name Relocation Data
/// @{
- struct MachRelocationEntry {
- uint32_t Word0;
- uint32_t Word1;
- };
-
llvm::DenseMap<const MCSectionData*,
- std::vector<MachRelocationEntry> > Relocations;
+ std::vector<macho::RelocationEntry> > Relocations;
llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase;
/// @}
@@ -202,32 +102,70 @@ class MachObjectWriterImpl {
/// @}
- MachObjectWriter *Writer;
+private:
+ /// @name Utility Methods
+ /// @{
- raw_ostream &OS;
+ bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+ const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
+ (MCFixupKind) Kind);
- unsigned Is64Bit : 1;
+ return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+ }
+
+ /// @}
+
+ SectionAddrMap SectionAddress;
+ uint64_t getSectionAddress(const MCSectionData* SD) const {
+ return SectionAddress.lookup(SD);
+ }
+ uint64_t getSymbolAddress(const MCSymbolData* SD,
+ const MCAsmLayout &Layout) const {
+ return getSectionAddress(SD->getFragment()->getParent()) +
+ Layout.getSymbolOffset(SD);
+ }
+ uint64_t getFragmentAddress(const MCFragment *Fragment,
+ const MCAsmLayout &Layout) const {
+ return getSectionAddress(Fragment->getParent()) +
+ Layout.getFragmentOffset(Fragment);
+ }
+
+ uint64_t getPaddingSize(const MCSectionData *SD,
+ const MCAsmLayout &Layout) const {
+ uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
+ unsigned Next = SD->getLayoutOrder() + 1;
+ if (Next >= Layout.getSectionOrder().size())
+ return 0;
+
+ const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
+ if (NextSD.getSection().isVirtualSection())
+ return 0;
+ return OffsetToAlignment(EndAddr, NextSD.getAlignment());
+ }
public:
- MachObjectWriterImpl(MachObjectWriter *_Writer, bool _Is64Bit)
- : Writer(_Writer), OS(Writer->getStream()), Is64Bit(_Is64Bit) {
+ MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS,
+ bool _IsLittleEndian)
+ : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) {
}
- void Write8(uint8_t Value) { Writer->Write8(Value); }
- void Write16(uint16_t Value) { Writer->Write16(Value); }
- void Write32(uint32_t Value) { Writer->Write32(Value); }
- void Write64(uint64_t Value) { Writer->Write64(Value); }
- void WriteZeros(unsigned N) { Writer->WriteZeros(N); }
- void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
- Writer->WriteBytes(Str, ZeroFillSize);
+ /// @name Target Writer Proxy Accessors
+ /// @{
+
+ bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+ bool isARM() const {
+ uint32_t CPUType = TargetObjectWriter->getCPUType() & ~mach::CTFM_ArchMask;
+ return CPUType == mach::CTM_ARM;
}
+ /// @}
+
void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
bool SubsectionsViaSymbols) {
uint32_t Flags = 0;
if (SubsectionsViaSymbols)
- Flags |= HF_SubsectionsViaSymbols;
+ Flags |= macho::HF_SubsectionsViaSymbols;
// struct mach_header (28 bytes) or
// struct mach_header_64 (32 bytes)
@@ -235,21 +173,20 @@ public:
uint64_t Start = OS.tell();
(void) Start;
- Write32(Is64Bit ? Header_Magic64 : Header_Magic32);
+ Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
+
+ Write32(TargetObjectWriter->getCPUType());
+ Write32(TargetObjectWriter->getCPUSubtype());
- // FIXME: Support cputype.
- Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386);
- // FIXME: Support cpusubtype.
- Write32(MachO::CPUSubType_I386_ALL);
- Write32(HFT_Object);
- Write32(NumLoadCommands); // Object files have a single load command, the
- // segment.
+ Write32(macho::HFT_Object);
+ Write32(NumLoadCommands);
Write32(LoadCommandsSize);
Write32(Flags);
- if (Is64Bit)
+ if (is64Bit())
Write32(0); // reserved
- assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size);
+ assert(OS.tell() - Start ==
+ (is64Bit() ? macho::Header64Size : macho::Header32Size));
}
/// WriteSegmentLoadCommand - Write a segment load command.
@@ -266,14 +203,16 @@ public:
uint64_t Start = OS.tell();
(void) Start;
- unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size :
- SegmentLoadCommand32Size;
- Write32(Is64Bit ? LCT_Segment64 : LCT_Segment);
+ unsigned SegmentLoadCommandSize =
+ is64Bit() ? macho::SegmentLoadCommand64Size:
+ macho::SegmentLoadCommand32Size;
+ Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
Write32(SegmentLoadCommandSize +
- NumSections * (Is64Bit ? Section64Size : Section32Size));
+ NumSections * (is64Bit() ? macho::Section64Size :
+ macho::Section32Size));
WriteBytes("", 16);
- if (Is64Bit) {
+ if (is64Bit()) {
Write64(0); // vmaddr
Write64(VMSize); // vmsize
Write64(SectionDataStartOffset); // file offset
@@ -295,10 +234,10 @@ public:
void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCSectionData &SD, uint64_t FileOffset,
uint64_t RelocationsStart, unsigned NumRelocations) {
- uint64_t SectionSize = Layout.getSectionSize(&SD);
+ uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
// The offset is unused for virtual sections.
- if (Asm.getBackend().isVirtualSection(SD.getSection())) {
+ if (SD.getSection().isVirtualSection()) {
assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
FileOffset = 0;
}
@@ -312,11 +251,11 @@ public:
const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
WriteBytes(Section.getSectionName(), 16);
WriteBytes(Section.getSegmentName(), 16);
- if (Is64Bit) {
- Write64(Layout.getSectionAddress(&SD)); // address
+ if (is64Bit()) {
+ Write64(getSectionAddress(&SD)); // address
Write64(SectionSize); // size
} else {
- Write32(Layout.getSectionAddress(&SD)); // address
+ Write32(getSectionAddress(&SD)); // address
Write32(SectionSize); // size
}
Write32(FileOffset);
@@ -332,10 +271,11 @@ public:
Write32(Flags);
Write32(IndirectSymBase.lookup(&SD)); // reserved1
Write32(Section.getStubSize()); // reserved2
- if (Is64Bit)
+ if (is64Bit())
Write32(0); // reserved3
- assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size);
+ assert(OS.tell() - Start == is64Bit() ? macho::Section64Size :
+ macho::Section32Size);
}
void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
@@ -346,14 +286,14 @@ public:
uint64_t Start = OS.tell();
(void) Start;
- Write32(LCT_Symtab);
- Write32(SymtabLoadCommandSize);
+ Write32(macho::LCT_Symtab);
+ Write32(macho::SymtabLoadCommandSize);
Write32(SymbolOffset);
Write32(NumSymbols);
Write32(StringTableOffset);
Write32(StringTableSize);
- assert(OS.tell() - Start == SymtabLoadCommandSize);
+ assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
}
void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
@@ -369,8 +309,8 @@ public:
uint64_t Start = OS.tell();
(void) Start;
- Write32(LCT_Dysymtab);
- Write32(DysymtabLoadCommandSize);
+ Write32(macho::LCT_Dysymtab);
+ Write32(macho::DysymtabLoadCommandSize);
Write32(FirstLocalSymbol);
Write32(NumLocalSymbols);
Write32(FirstExternalSymbol);
@@ -390,7 +330,7 @@ public:
Write32(0); // locreloff
Write32(0); // nlocrel
- assert(OS.tell() - Start == DysymtabLoadCommandSize);
+ assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
}
void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) {
@@ -404,27 +344,27 @@ public:
//
// FIXME: Are the prebound or indirect fields possible here?
if (Symbol.isUndefined())
- Type = STT_Undefined;
+ Type = macho::STT_Undefined;
else if (Symbol.isAbsolute())
- Type = STT_Absolute;
+ Type = macho::STT_Absolute;
else
- Type = STT_Section;
+ Type = macho::STT_Section;
// FIXME: Set STAB bits.
if (Data.isPrivateExtern())
- Type |= STF_PrivateExtern;
+ Type |= macho::STF_PrivateExtern;
// Set external bit.
if (Data.isExternal() || Symbol.isUndefined())
- Type |= STF_External;
+ Type |= macho::STF_External;
// Compute the symbol address.
if (Symbol.isDefined()) {
if (Symbol.isAbsolute()) {
Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
} else {
- Address = Layout.getSymbolAddress(&Data);
+ Address = getSymbolAddress(&Data, Layout);
}
} else if (Data.isCommon()) {
// Common symbols are encoded with the size in the address
@@ -452,7 +392,7 @@ public:
// The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
// value.
Write16(Flags);
- if (Is64Bit)
+ if (is64Bit())
Write64(Address);
else
Write32(Address);
@@ -472,11 +412,15 @@ public:
// - Input errors, where something cannot be correctly encoded. 'as' allows
// these through in many cases.
+ static bool isFixupKindRIPRel(unsigned Kind) {
+ return Kind == X86::reloc_riprel_4byte ||
+ Kind == X86::reloc_riprel_4byte_movq_load;
+ }
void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
- unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
+ unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
@@ -484,7 +428,7 @@ public:
uint32_t FixupOffset =
Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
uint32_t FixupAddress =
- Layout.getFragmentAddress(Fragment) + Fixup.getOffset();
+ getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
int64_t Value = 0;
unsigned Index = 0;
unsigned IsExtern = 0;
@@ -503,7 +447,7 @@ public:
if (Target.isAbsolute()) { // constant
// SymbolNum of 0 indicates the absolute section.
- Type = RIT_X86_64_Unsigned;
+ Type = macho::RIT_X86_64_Unsigned;
Index = 0;
// FIXME: I believe this is broken, I don't think the linker can
@@ -513,16 +457,16 @@ public:
// yet).
if (IsPCRel) {
IsExtern = 1;
- Type = RIT_X86_64_Branch;
+ Type = macho::RIT_X86_64_Branch;
}
} else if (Target.getSymB()) { // A - B + constant
const MCSymbol *A = &Target.getSymA()->getSymbol();
MCSymbolData &A_SD = Asm.getSymbolData(*A);
- const MCSymbolData *A_Base = Asm.getAtom(Layout, &A_SD);
+ const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
const MCSymbol *B = &Target.getSymB()->getSymbol();
MCSymbolData &B_SD = Asm.getSymbolData(*B);
- const MCSymbolData *B_Base = Asm.getAtom(Layout, &B_SD);
+ const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
// Neither symbol can be modified.
if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
@@ -534,25 +478,35 @@ public:
if (IsPCRel)
report_fatal_error("unsupported pc-relative relocation of difference");
- // We don't currently support any situation where one or both of the
- // symbols would require a local relocation. This is almost certainly
- // unused and may not be possible to encode correctly.
- if (!A_Base || !B_Base)
- report_fatal_error("unsupported local relocations in difference");
+ // The support for the situation where one or both of the symbols would
+ // require a local relocation is handled just like if the symbols were
+ // external. This is certainly used in the case of debug sections where
+ // the section has only temporary symbols and thus the symbols don't have
+ // base symbols. This is encoded using the section ordinal and
+ // non-extern relocation entries.
// Darwin 'as' doesn't emit correct relocations for this (it ends up with
- // a single SIGNED relocation); reject it for now.
- if (A_Base == B_Base)
+ // a single SIGNED relocation); reject it for now. Except the case where
+ // both symbols don't have a base, equal but both NULL.
+ if (A_Base == B_Base && A_Base)
report_fatal_error("unsupported relocation with identical base");
- Value += Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(A_Base);
- Value -= Layout.getSymbolAddress(&B_SD) - Layout.getSymbolAddress(B_Base);
+ Value += getSymbolAddress(&A_SD, Layout) -
+ (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout));
+ Value -= getSymbolAddress(&B_SD, Layout) -
+ (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout));
- Index = A_Base->getIndex();
- IsExtern = 1;
- Type = RIT_X86_64_Unsigned;
+ if (A_Base) {
+ Index = A_Base->getIndex();
+ IsExtern = 1;
+ }
+ else {
+ Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+ IsExtern = 0;
+ }
+ Type = macho::RIT_X86_64_Unsigned;
- MachRelocationEntry MRE;
+ macho::RelocationEntry MRE;
MRE.Word0 = FixupOffset;
MRE.Word1 = ((Index << 0) |
(IsPCRel << 24) |
@@ -561,13 +515,19 @@ public:
(Type << 28));
Relocations[Fragment->getParent()].push_back(MRE);
- Index = B_Base->getIndex();
- IsExtern = 1;
- Type = RIT_X86_64_Subtractor;
+ if (B_Base) {
+ Index = B_Base->getIndex();
+ IsExtern = 1;
+ }
+ else {
+ Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+ IsExtern = 0;
+ }
+ Type = macho::RIT_X86_64_Subtractor;
} else {
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
MCSymbolData &SD = Asm.getSymbolData(*Symbol);
- const MCSymbolData *Base = Asm.getAtom(Layout, &SD);
+ const MCSymbolData *Base = Asm.getAtom(&SD);
// Relocations inside debug sections always use local relocations when
// possible. This seems to be done because the debugger doesn't fully
@@ -589,15 +549,26 @@ public:
// Add the local offset, if needed.
if (Base != &SD)
- Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base);
+ Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
} else if (Symbol->isInSection()) {
// The index is the section ordinal (1-based).
Index = SD.getFragment()->getParent()->getOrdinal() + 1;
IsExtern = 0;
- Value += Layout.getSymbolAddress(&SD);
+ Value += getSymbolAddress(&SD, Layout);
if (IsPCRel)
Value -= FixupAddress + (1 << Log2Size);
+ } else if (Symbol->isVariable()) {
+ const MCExpr *Value = Symbol->getVariableValue();
+ int64_t Res;
+ bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress);
+ if (isAbs) {
+ FixedValue = Res;
+ return;
+ } else {
+ report_fatal_error("unsupported relocation of variable '" +
+ Symbol->getName() + "'");
+ }
} else {
report_fatal_error("unsupported relocation of undefined symbol '" +
Symbol->getName() + "'");
@@ -611,15 +582,15 @@ public:
// rewrite the movq to an leaq at link time if the symbol ends up in
// the same linkage unit.
if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
- Type = RIT_X86_64_GOTLoad;
+ Type = macho::RIT_X86_64_GOTLoad;
else
- Type = RIT_X86_64_GOT;
+ Type = macho::RIT_X86_64_GOT;
} else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
- Type = RIT_X86_64_TLV;
+ Type = macho::RIT_X86_64_TLV;
} else if (Modifier != MCSymbolRefExpr::VK_None) {
report_fatal_error("unsupported symbol modifier in relocation");
} else {
- Type = RIT_X86_64_Signed;
+ Type = macho::RIT_X86_64_Signed;
// The Darwin x86_64 relocation format has a problem where it cannot
// encode an address (L<foo> + <constant>) which is outside the atom
@@ -636,9 +607,9 @@ public:
// (the additional bias), but instead appear to just look at the
// final offset.
switch (-(Target.getConstant() + (1LL << Log2Size))) {
- case 1: Type = RIT_X86_64_Signed1; break;
- case 2: Type = RIT_X86_64_Signed2; break;
- case 4: Type = RIT_X86_64_Signed4; break;
+ case 1: Type = macho::RIT_X86_64_Signed1; break;
+ case 2: Type = macho::RIT_X86_64_Signed2; break;
+ case 4: Type = macho::RIT_X86_64_Signed4; break;
}
}
} else {
@@ -646,24 +617,24 @@ public:
report_fatal_error("unsupported symbol modifier in branch "
"relocation");
- Type = RIT_X86_64_Branch;
+ Type = macho::RIT_X86_64_Branch;
}
} else {
if (Modifier == MCSymbolRefExpr::VK_GOT) {
- Type = RIT_X86_64_GOT;
+ Type = macho::RIT_X86_64_GOT;
} else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
// GOTPCREL is allowed as a modifier on non-PCrel instructions, in
// which case all we do is set the PCrel bit in the relocation entry;
// this is used with exception handling, for example. The source is
// required to include any necessary offset directly.
- Type = RIT_X86_64_GOT;
+ Type = macho::RIT_X86_64_GOT;
IsPCRel = 1;
} else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
report_fatal_error("TLVP symbol modifier should have been rip-rel");
} else if (Modifier != MCSymbolRefExpr::VK_None)
report_fatal_error("unsupported symbol modifier in relocation");
else
- Type = RIT_X86_64_Unsigned;
+ Type = macho::RIT_X86_64_Unsigned;
}
}
@@ -671,7 +642,7 @@ public:
FixedValue = Value;
// struct relocation_info (8 bytes)
- MachRelocationEntry MRE;
+ macho::RelocationEntry MRE;
MRE.Word0 = FixupOffset;
MRE.Word1 = ((Index << 0) |
(IsPCRel << 24) |
@@ -685,11 +656,11 @@ public:
const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
+ unsigned Log2Size,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
- unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
- unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
- unsigned Type = RIT_Vanilla;
+ unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = macho::RIT_Vanilla;
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -699,7 +670,9 @@ public:
report_fatal_error("symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
- uint32_t Value = Layout.getSymbolAddress(A_SD);
+ uint32_t Value = getSymbolAddress(A_SD, Layout);
+ uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
@@ -714,28 +687,184 @@ public:
// Note that there is no longer any semantic difference between these two
// relocation types from the linkers point of view, this is done solely
// for pedantic compatibility with 'as'.
- Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference;
- Value2 = Layout.getSymbolAddress(B_SD);
+ Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
+ (unsigned)macho::RIT_Generic_LocalDifference;
+ Value2 = getSymbolAddress(B_SD, Layout);
+ FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == macho::RIT_Difference ||
+ Type == macho::RIT_Generic_LocalDifference) {
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((0 << 0) |
+ (macho::RIT_Pair << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value2;
+ Relocations[Fragment->getParent()].push_back(MRE);
+ }
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value;
+ Relocations[Fragment->getParent()].push_back(MRE);
+ }
+
+ void RecordARMScatteredRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ unsigned Log2Size,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = macho::RIT_Vanilla;
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = getSymbolAddress(A_SD, Layout);
+ uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // Select the appropriate difference relocation type.
+ Type = macho::RIT_Difference;
+ Value2 = getSymbolAddress(B_SD, Layout);
+ FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
}
// Relocations are written out in reverse order, so the PAIR comes first.
- if (Type == RIT_Difference || Type == RIT_LocalDifference) {
- MachRelocationEntry MRE;
+ if (Type == macho::RIT_Difference ||
+ Type == macho::RIT_Generic_LocalDifference) {
+ macho::RelocationEntry MRE;
MRE.Word0 = ((0 << 0) |
- (RIT_Pair << 24) |
+ (macho::RIT_Pair << 24) |
(Log2Size << 28) |
(IsPCRel << 30) |
- RF_Scattered);
+ macho::RF_Scattered);
MRE.Word1 = Value2;
Relocations[Fragment->getParent()].push_back(MRE);
}
- MachRelocationEntry MRE;
+ macho::RelocationEntry MRE;
MRE.Word0 = ((FixupOffset << 0) |
(Type << 24) |
(Log2Size << 28) |
(IsPCRel << 30) |
- RF_Scattered);
+ macho::RF_Scattered);
+ MRE.Word1 = Value;
+ Relocations[Fragment->getParent()].push_back(MRE);
+ }
+
+ void RecordARMMovwMovtRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Type = macho::RIT_ARM_Half;
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = getSymbolAddress(A_SD, Layout);
+ uint32_t Value2 = 0;
+ uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // Select the appropriate difference relocation type.
+ Type = macho::RIT_ARM_HalfDifference;
+ Value2 = getSymbolAddress(B_SD, Layout);
+ FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
+ }
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
+ //
+ // For these two r_type relocations they always have a pair following them
+ // and the r_length bits are used differently. The encoding of the
+ // r_length is as follows:
+ // low bit of r_length:
+ // 0 - :lower16: for movw instructions
+ // 1 - :upper16: for movt instructions
+ // high bit of r_length:
+ // 0 - arm instructions
+ // 1 - thumb instructions
+ // the other half of the relocated expression is in the following pair
+ // relocation entry in the the low 16 bits of r_address field.
+ unsigned ThumbBit = 0;
+ unsigned MovtBit = 0;
+ switch (Fixup.getKind()) {
+ default: break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ MovtBit = 1;
+ break;
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ MovtBit = 1;
+ // Fallthrough
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ ThumbBit = 1;
+ break;
+ }
+
+
+ if (Type == macho::RIT_ARM_HalfDifference) {
+ uint32_t OtherHalf = MovtBit
+ ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((OtherHalf << 0) |
+ (macho::RIT_Pair << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
+ MRE.Word1 = Value2;
+ Relocations[Fragment->getParent()].push_back(MRE);
+ }
+
+ macho::RelocationEntry MRE;
+ MRE.Word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ macho::RF_Scattered);
MRE.Word1 = Value;
Relocations[Fragment->getParent()].push_back(MRE);
}
@@ -746,7 +875,7 @@ public:
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
- !Is64Bit &&
+ !is64Bit() &&
"Should only be called with a 32-bit TLVP relocation!");
unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
@@ -764,50 +893,218 @@ public:
if (Target.getSymB()) {
// If this is a subtraction then we're pcrel.
uint32_t FixupAddress =
- Layout.getFragmentAddress(Fragment) + Fixup.getOffset();
+ getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
IsPCRel = 1;
- FixedValue = (FixupAddress - Layout.getSymbolAddress(SD_B) +
+ FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) +
Target.getConstant());
FixedValue += 1ULL << Log2Size;
} else {
FixedValue = 0;
}
-
+
// struct relocation_info (8 bytes)
- MachRelocationEntry MRE;
+ macho::RelocationEntry MRE;
MRE.Word0 = Value;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (1 << 27) | // Extern
+ (macho::RIT_Generic_TLV << 28)); // Type
+ Relocations[Fragment->getParent()].push_back(MRE);
+ }
+
+ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
+ unsigned &Log2Size) {
+ RelocType = unsigned(macho::RIT_Vanilla);
+ Log2Size = ~0U;
+
+ switch (Kind) {
+ default:
+ return false;
+
+ case FK_Data_1:
+ Log2Size = llvm::Log2_32(1);
+ return true;
+ case FK_Data_2:
+ Log2Size = llvm::Log2_32(2);
+ return true;
+ case FK_Data_4:
+ Log2Size = llvm::Log2_32(4);
+ return true;
+ case FK_Data_8:
+ Log2Size = llvm::Log2_32(8);
+ return true;
+
+ // Handle 24-bit branch kinds.
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+ // Report as 'long', even though that is not quite accurate.
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ // Handle Thumb branches.
+ case ARM::fixup_arm_thumb_br:
+ RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ Log2Size = llvm::Log2_32(2);
+ return true;
+
+ case ARM::fixup_arm_thumb_bl:
+ RelocType = unsigned(macho::RIT_ARM_ThumbBranch32Bit);
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ case ARM::fixup_arm_thumb_blx:
+ RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ // Report as 'long', even though that is not quite accurate.
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_HalfDifference);
+ // Report as 'long', even though that is not quite accurate.
+ Log2Size = llvm::Log2_32(4);
+ return true;
+
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ // Report as 'long', even though that is not quite accurate.
+ Log2Size = llvm::Log2_32(4);
+ return true;
+ }
+ }
+ void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) {
+ unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+ unsigned Log2Size;
+ unsigned RelocType = macho::RIT_Vanilla;
+ if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
+ report_fatal_error("unknown ARM fixup kind!");
+ return;
+ }
+
+ // If this is a difference or a defined symbol plus an offset, then we need
+ // a scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB()) {
+ if (RelocType == macho::RIT_ARM_Half ||
+ RelocType == macho::RIT_ARM_HalfDifference)
+ return RecordARMMovwMovtRelocation(Asm, Layout, Fragment, Fixup,
+ Target, FixedValue);
+ return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup,
+ Target, Log2Size, FixedValue);
+ }
+
+ // Get the symbol data, if any.
+ MCSymbolData *SD = 0;
+ if (Target.getSymA())
+ SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+ // FIXME: For other platforms, we need to use scattered relocations for
+ // internal relocations with offsets. If this is an internal relocation
+ // with an offset, it also needs a scattered relocation entry.
+ //
+ // Is this right for ARM?
+ uint32_t Offset = Target.getConstant();
+ if (IsPCRel && RelocType == macho::RIT_Vanilla)
+ Offset += 1 << Log2Size;
+ if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
+ return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, Target,
+ Log2Size, FixedValue);
+
+ // See <reloc.h>.
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = 0;
+
+ if (Target.isAbsolute()) { // constant
+ // FIXME!
+ report_fatal_error("FIXME: relocations to absolute targets "
+ "not yet implemented");
+ } else if (SD->getSymbol().isVariable()) {
+ int64_t Res;
+ if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ Res, Layout, SectionAddress)) {
+ FixedValue = Res;
+ return;
+ }
+
+ report_fatal_error("unsupported relocation of variable '" +
+ SD->getSymbol().getName() + "'");
+ } else {
+ // Check whether we need an external or internal relocation.
+ if (doesSymbolRequireExternRelocation(SD)) {
+ IsExtern = 1;
+ Index = SD->getIndex();
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!SD->Symbol->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(SD);
+ } else {
+ // The index is the section ordinal (1-based).
+ Index = SD->getFragment()->getParent()->getOrdinal() + 1;
+ FixedValue += getSectionAddress(SD->getFragment()->getParent());
+ }
+ if (IsPCRel)
+ FixedValue -= getSectionAddress(Fragment->getParent());
+
+ // The type is determined by the fixup kind.
+ Type = RelocType;
+ }
+
+ // struct relocation_info (8 bytes)
+ macho::RelocationEntry MRE;
+ MRE.Word0 = FixupOffset;
MRE.Word1 = ((Index << 0) |
(IsPCRel << 24) |
(Log2Size << 25) |
- (1 << 27) | // Extern
- (RIT_TLV << 28)); // Type
+ (IsExtern << 27) |
+ (Type << 28));
Relocations[Fragment->getParent()].push_back(MRE);
}
-
+
void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, uint64_t &FixedValue) {
- if (Is64Bit) {
+ // FIXME: These needs to be factored into the target Mach-O writer.
+ if (isARM()) {
+ RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
+ return;
+ }
+ if (is64Bit()) {
RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
return;
}
- unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
+ unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
// If this is a 32-bit TLVP reloc it's handled a bit differently.
- if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
+ if (Target.getSymA() &&
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
return;
}
-
+
// If this is a difference or a defined symbol plus an offset, then we need
// a scattered relocation entry.
// Differences always require scattered relocations.
if (Target.getSymB())
return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
- Target, FixedValue);
+ Target, Log2Size, FixedValue);
// Get the symbol data, if any.
MCSymbolData *SD = 0;
@@ -821,7 +1118,7 @@ public:
Offset += 1 << Log2Size;
if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
- Target, FixedValue);
+ Target, Log2Size, FixedValue);
// See <reloc.h>.
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
@@ -834,7 +1131,17 @@ public:
//
// FIXME: Currently, these are never generated (see code below). I cannot
// find a case where they are actually emitted.
- Type = RIT_Vanilla;
+ Type = macho::RIT_Vanilla;
+ } else if (SD->getSymbol().isVariable()) {
+ int64_t Res;
+ if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ Res, Layout, SectionAddress)) {
+ FixedValue = Res;
+ return;
+ }
+
+ report_fatal_error("unsupported relocation of variable '" +
+ SD->getSymbol().getName() + "'");
} else {
// Check whether we need an external or internal relocation.
if (doesSymbolRequireExternRelocation(SD)) {
@@ -844,17 +1151,20 @@ public:
// compensate for the addend of the symbol address, if it was
// undefined. This occurs with weak definitions, for example.
if (!SD->Symbol->isUndefined())
- FixedValue -= Layout.getSymbolAddress(SD);
+ FixedValue -= Layout.getSymbolOffset(SD);
} else {
// The index is the section ordinal (1-based).
Index = SD->getFragment()->getParent()->getOrdinal() + 1;
+ FixedValue += getSectionAddress(SD->getFragment()->getParent());
}
+ if (IsPCRel)
+ FixedValue -= getSectionAddress(Fragment->getParent());
- Type = RIT_Vanilla;
+ Type = macho::RIT_Vanilla;
}
// struct relocation_info (8 bytes)
- MachRelocationEntry MRE;
+ macho::RelocationEntry MRE;
MRE.Word0 = FixupOffset;
MRE.Word1 = ((Index << 0) |
(IsPCRel << 24) |
@@ -885,7 +1195,7 @@ public:
// Initialize the section indirect symbol base, if necessary.
if (!IndirectSymBase.count(it->SectionData))
IndirectSymBase[it->SectionData] = IndirectIndex;
-
+
Asm.getOrCreateSymbolData(*it->Symbol);
}
@@ -1028,7 +1338,25 @@ public:
StringTable += '\x00';
}
- void ExecutePostLayoutBinding(MCAssembler &Asm) {
+ void computeSectionAddresses(const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ uint64_t StartAddress = 0;
+ const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
+ for (int i = 0, n = Order.size(); i != n ; ++i) {
+ const MCSectionData *SD = Order[i];
+ StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
+ SectionAddress[SD] = StartAddress;
+ StartAddress += Layout.getSectionAddressSize(SD);
+ // Explicitly pad the section to match the alignment requirements of the
+ // following one. This is for 'gas' compatibility, it shouldn't
+ /// strictly be necessary.
+ StartAddress += getPaddingSize(SD, Layout);
+ }
+ }
+
+ void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) {
+ computeSectionAddresses(Asm, Layout);
+
// Create symbol data for any indirect symbols.
BindIndirectSymbols(Asm);
@@ -1037,41 +1365,101 @@ public:
UndefinedSymbolData);
}
- void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout) {
+ virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+ const MCSymbolData &DataA,
+ const MCFragment &FB,
+ bool InSet,
+ bool IsPCRel) const {
+ if (InSet)
+ return true;
+
+ // The effective address is
+ // addr(atom(A)) + offset(A)
+ // - addr(atom(B)) - offset(B)
+ // and the offsets are not relocatable, so the fixup is fully resolved when
+ // addr(atom(A)) - addr(atom(B)) == 0.
+ const MCSymbolData *A_Base = 0, *B_Base = 0;
+
+ const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
+ const MCSection &SecA = SA.getSection();
+ const MCSection &SecB = FB.getParent()->getSection();
+
+ if (IsPCRel) {
+ // The simple (Darwin, except on x86_64) way of dealing with this was to
+ // assume that any reference to a temporary symbol *must* be a temporary
+ // symbol in the same atom, unless the sections differ. Therefore, any
+ // PCrel relocation to a temporary symbol (in the same section) is fully
+ // resolved. This also works in conjunction with absolutized .set, which
+ // requires the compiler to use .set to absolutize the differences between
+ // symbols which the compiler knows to be assembly time constants, so we
+ // don't need to worry about considering symbol differences fully
+ // resolved.
+
+ if (!Asm.getBackend().hasReliableSymbolDifference()) {
+ if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
+ return false;
+ return true;
+ }
+ } else {
+ if (!TargetObjectWriter->useAggressiveSymbolFolding())
+ return false;
+ }
+
+ const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
+
+ A_Base = FA.getAtom();
+ if (!A_Base)
+ return false;
+
+ B_Base = FB.getAtom();
+ if (!B_Base)
+ return false;
+
+ // If the atoms are the same, they are guaranteed to have the same address.
+ if (A_Base == B_Base)
+ return true;
+
+ // Otherwise, we can't prove this is fully resolved.
+ return false;
+ }
+
+ void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
unsigned NumSections = Asm.size();
// The section data starts after the header, the segment load command (and
// section headers) and the symbol table.
unsigned NumLoadCommands = 1;
- uint64_t LoadCommandsSize = Is64Bit ?
- SegmentLoadCommand64Size + NumSections * Section64Size :
- SegmentLoadCommand32Size + NumSections * Section32Size;
+ uint64_t LoadCommandsSize = is64Bit() ?
+ macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
+ macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
// Add the symbol table load command sizes, if used.
unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
UndefinedSymbolData.size();
if (NumSymbols) {
NumLoadCommands += 2;
- LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize;
+ LoadCommandsSize += (macho::SymtabLoadCommandSize +
+ macho::DysymtabLoadCommandSize);
}
// Compute the total size of the section data, as well as its file size and
// vm size.
- uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size)
- + LoadCommandsSize;
+ uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
+ macho::Header32Size) + LoadCommandsSize;
uint64_t SectionDataSize = 0;
uint64_t SectionDataFileSize = 0;
uint64_t VMSize = 0;
for (MCAssembler::const_iterator it = Asm.begin(),
ie = Asm.end(); it != ie; ++it) {
const MCSectionData &SD = *it;
- uint64_t Address = Layout.getSectionAddress(&SD);
- uint64_t Size = Layout.getSectionSize(&SD);
+ uint64_t Address = getSectionAddress(&SD);
+ uint64_t Size = Layout.getSectionAddressSize(&SD);
uint64_t FileSize = Layout.getSectionFileSize(&SD);
+ FileSize += getPaddingSize(&SD, Layout);
VMSize = std::max(VMSize, Address + Size);
- if (Asm.getBackend().isVirtualSection(SD.getSection()))
+ if (SD.getSection().isVirtualSection())
continue;
SectionDataSize = std::max(SectionDataSize, Address + Size);
@@ -1094,11 +1482,11 @@ public:
uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
for (MCAssembler::const_iterator it = Asm.begin(),
ie = Asm.end(); it != ie; ++it) {
- std::vector<MachRelocationEntry> &Relocs = Relocations[it];
+ std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
unsigned NumRelocs = Relocs.size();
- uint64_t SectionStart = SectionDataStart + Layout.getSectionAddress(it);
+ uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
- RelocTableEnd += NumRelocs * RelocationInfoSize;
+ RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
}
// Write the symbol table load command, if used.
@@ -1124,8 +1512,8 @@ public:
// The string table is written after symbol table.
uint64_t StringTableOffset =
- SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size :
- Nlist32Size);
+ SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
+ macho::Nlist32Size);
WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
StringTableOffset, StringTable.size());
@@ -1137,8 +1525,13 @@ public:
// Write the actual section data.
for (MCAssembler::const_iterator it = Asm.begin(),
- ie = Asm.end(); it != ie; ++it)
- Asm.WriteSectionData(it, Layout, Writer);
+ ie = Asm.end(); it != ie; ++it) {
+ Asm.WriteSectionData(it, Layout);
+
+ uint64_t Pad = getPaddingSize(it, Layout);
+ for (unsigned int i = 0; i < Pad; ++i)
+ Write8(0);
+ }
// Write the extra padding.
WriteZeros(SectionDataPadding);
@@ -1148,7 +1541,7 @@ public:
ie = Asm.end(); it != ie; ++it) {
// Write the section relocation entries, in reverse order to match 'as'
// (approximately, the exact algorithm is more complicated than this).
- std::vector<MachRelocationEntry> &Relocs = Relocations[it];
+ std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
Write32(Relocs[e - i - 1].Word0);
Write32(Relocs[e - i - 1].Word1);
@@ -1169,9 +1562,9 @@ public:
// If this symbol is defined and internal, mark it as such.
if (it->Symbol->isDefined() &&
!Asm.getSymbolData(*it->Symbol).isExternal()) {
- uint32_t Flags = ISF_Local;
+ uint32_t Flags = macho::ISF_Local;
if (it->Symbol->isAbsolute())
- Flags |= ISF_Absolute;
+ Flags |= macho::ISF_Absolute;
Write32(Flags);
continue;
}
@@ -1198,32 +1591,8 @@ public:
}
-MachObjectWriter::MachObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- bool IsLittleEndian)
- : MCObjectWriter(OS, IsLittleEndian)
-{
- Impl = new MachObjectWriterImpl(this, Is64Bit);
-}
-
-MachObjectWriter::~MachObjectWriter() {
- delete (MachObjectWriterImpl*) Impl;
-}
-
-void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
- ((MachObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm);
-}
-
-void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue) {
- ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup,
- Target, FixedValue);
-}
-
-void MachObjectWriter::WriteObject(const MCAssembler &Asm,
- const MCAsmLayout &Layout) {
- ((MachObjectWriterImpl*) Impl)->WriteObject(Asm, Layout);
+MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
+ raw_ostream &OS,
+ bool IsLittleEndian) {
+ return new MachObjectWriter(MOTW, OS, IsLittleEndian);
}
diff --git a/contrib/llvm/lib/MC/TargetAsmBackend.cpp b/contrib/llvm/lib/MC/TargetAsmBackend.cpp
index bbfddbe..1927557 100644
--- a/contrib/llvm/lib/MC/TargetAsmBackend.cpp
+++ b/contrib/llvm/lib/MC/TargetAsmBackend.cpp
@@ -10,13 +10,28 @@
#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
-TargetAsmBackend::TargetAsmBackend(const Target &T)
- : TheTarget(T),
- HasAbsolutizedSet(false),
- HasReliableSymbolDifference(false),
- HasScatteredSymbols(false)
+TargetAsmBackend::TargetAsmBackend()
+ : HasReliableSymbolDifference(false)
{
}
TargetAsmBackend::~TargetAsmBackend() {
}
+
+const MCFixupKindInfo &
+TargetAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ static const MCFixupKindInfo Builtins[] = {
+ { "FK_Data_1", 0, 8, 0 },
+ { "FK_Data_2", 0, 16, 0 },
+ { "FK_Data_4", 0, 32, 0 },
+ { "FK_Data_8", 0, 64, 0 },
+ { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel }
+ };
+
+ assert((size_t)Kind <= sizeof(Builtins) / sizeof(Builtins[0]) &&
+ "Unknown fixup kind");
+ return Builtins[Kind];
+}
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
index eeb2b96..6ca5d37 100644
--- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -31,7 +31,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/System/TimeValue.h"
+#include "llvm/Support/TimeValue.h"
#include "../Target/X86/X86FixupKinds.h"
@@ -55,6 +55,9 @@ struct AuxSymbol {
COFF::Auxiliary Aux;
};
+class COFFSymbol;
+class COFFSection;
+
class COFFSymbol {
public:
COFF::symbol Data;
@@ -62,15 +65,19 @@ public:
typedef llvm::SmallVector<AuxSymbol, 1> AuxiliarySymbols;
name Name;
- size_t Index;
+ int Index;
AuxiliarySymbols Aux;
COFFSymbol *Other;
+ COFFSection *Section;
+ int Relocations;
MCSymbolData const *MCData;
- COFFSymbol(llvm::StringRef name, size_t index);
+ COFFSymbol(llvm::StringRef name);
size_t size() const;
void set_name_offset(uint32_t Offset);
+
+ bool should_keep() const;
};
// This class contains staging data for a COFF relocation entry.
@@ -89,12 +96,12 @@ public:
COFF::section Header;
std::string Name;
- size_t Number;
+ int Number;
MCSectionData const *MCData;
- COFFSymbol *Symb;
+ COFFSymbol *Symbol;
relocations Relocations;
- COFFSection(llvm::StringRef name, size_t Index);
+ COFFSection(llvm::StringRef name);
static size_t size();
};
@@ -118,11 +125,8 @@ public:
typedef std::vector<COFFSymbol*> symbols;
typedef std::vector<COFFSection*> sections;
- typedef StringMap<COFFSymbol *> name_symbol_map;
- typedef StringMap<COFFSection *> name_section_map;
-
- typedef DenseMap<MCSymbolData const *, COFFSymbol *> symbol_map;
- typedef DenseMap<MCSectionData const *, COFFSection *> section_map;
+ typedef DenseMap<MCSymbol const *, COFFSymbol *> symbol_map;
+ typedef DenseMap<MCSection const *, COFFSection *> section_map;
// Root level file contents.
bool Is64Bit;
@@ -138,11 +142,9 @@ public:
WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
~WinCOFFObjectWriter();
- COFFSymbol *createSymbol(llvm::StringRef Name);
- COFFSection *createSection(llvm::StringRef Name);
-
- void InitCOFFEntity(COFFSymbol &Symbol);
- void InitCOFFEntity(COFFSection &Section);
+ COFFSymbol *createSymbol(StringRef Name);
+ COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol * Symbol);
+ COFFSection *createSection(StringRef Name);
template <typename object_t, typename list_t>
object_t *createCOFFEntity(llvm::StringRef Name, list_t &List);
@@ -150,9 +152,14 @@ public:
void DefineSection(MCSectionData const &SectionData);
void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler);
- bool ExportSection(COFFSection *S);
+ void MakeSymbolReal(COFFSymbol &S, size_t Index);
+ void MakeSectionReal(COFFSection &S, size_t Number);
+
+ bool ExportSection(COFFSection const *S);
bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm);
+ bool IsPhysicalSection(COFFSection *S);
+
// Entity writing methods.
void WriteFileHeader(const COFF::header &Header);
@@ -163,7 +170,7 @@ public:
// MCObjectWriter interface implementation.
- void ExecutePostLayoutBinding(MCAssembler &Asm);
+ void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout);
void RecordRelocation(const MCAssembler &Asm,
const MCAsmLayout &Layout,
@@ -172,7 +179,7 @@ public:
MCValue Target,
uint64_t &FixedValue);
- void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
+ void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
};
}
@@ -198,9 +205,12 @@ static inline void write_uint8_le(void *Data, uint8_t const &Value) {
//------------------------------------------------------------------------------
// Symbol class implementation
-COFFSymbol::COFFSymbol(llvm::StringRef name, size_t index)
- : Name(name.begin(), name.end()), Index(-1)
- , Other(NULL), MCData(NULL) {
+COFFSymbol::COFFSymbol(llvm::StringRef name)
+ : Name(name.begin(), name.end())
+ , Other(NULL)
+ , Section(NULL)
+ , Relocations(0)
+ , MCData(NULL) {
memset(&Data, 0, sizeof(Data));
}
@@ -216,12 +226,41 @@ void COFFSymbol::set_name_offset(uint32_t Offset) {
write_uint32_le(Data.Name + 4, Offset);
}
+/// logic to decide if the symbol should be reported in the symbol table
+bool COFFSymbol::should_keep() const {
+ // no section means its external, keep it
+ if (Section == NULL)
+ return true;
+
+ // if it has relocations pointing at it, keep it
+ if (Relocations > 0) {
+ assert(Section->Number != -1 && "Sections with relocations must be real!");
+ return true;
+ }
+
+ // if the section its in is being droped, drop it
+ if (Section->Number == -1)
+ return false;
+
+ // if it is the section symbol, keep it
+ if (Section->Symbol == this)
+ return true;
+
+ // if its temporary, drop it
+ if (MCData && MCData->getSymbol().isTemporary())
+ return false;
+
+ // otherwise, keep it
+ return true;
+}
+
//------------------------------------------------------------------------------
// Section class implementation
-COFFSection::COFFSection(llvm::StringRef name, size_t Index)
- : Name(name), Number(Index + 1)
- , MCData(NULL), Symb(NULL) {
+COFFSection::COFFSection(llvm::StringRef name)
+ : Name(name)
+ , MCData(NULL)
+ , Symbol(NULL) {
memset(&Header, 0, sizeof(Header));
}
@@ -290,43 +329,22 @@ WinCOFFObjectWriter::~WinCOFFObjectWriter() {
delete *I;
}
-COFFSymbol *WinCOFFObjectWriter::createSymbol(llvm::StringRef Name) {
+COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
return createCOFFEntity<COFFSymbol>(Name, Symbols);
}
-COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) {
- return createCOFFEntity<COFFSection>(Name, Sections);
-}
-
-/// This function initializes a symbol by entering its name into the string
-/// table if it is too long to fit in the symbol table header.
-void WinCOFFObjectWriter::InitCOFFEntity(COFFSymbol &S) {
- if (S.Name.size() > COFF::NameSize) {
- size_t StringTableEntry = Strings.insert(S.Name.c_str());
-
- S.set_name_offset(StringTableEntry);
- } else
- memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
+COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol * Symbol){
+ symbol_map::iterator i = SymbolMap.find(Symbol);
+ if (i != SymbolMap.end())
+ return i->second;
+ COFFSymbol *RetSymbol
+ = createCOFFEntity<COFFSymbol>(Symbol->getName(), Symbols);
+ SymbolMap[Symbol] = RetSymbol;
+ return RetSymbol;
}
-/// This function initializes a section by entering its name into the string
-/// table if it is too long to fit in the section table header.
-void WinCOFFObjectWriter::InitCOFFEntity(COFFSection &S) {
- if (S.Name.size() > COFF::NameSize) {
- size_t StringTableEntry = Strings.insert(S.Name.c_str());
-
- // FIXME: Why is this number 999999? This number is never mentioned in the
- // spec. I'm assuming this is due to the printed value needing to fit into
- // the S.Header.Name field. In which case why not 9999999 (7 9's instead of
- // 6)? The spec does not state if this entry should be null terminated in
- // this case, and thus this seems to be the best way to do it. I think I
- // just solved my own FIXME...
- if (StringTableEntry > 999999)
- report_fatal_error("COFF string table is greater than 999999 bytes.");
-
- sprintf(S.Header.Name, "/%d", (unsigned)StringTableEntry);
- } else
- memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
+COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) {
+ return createCOFFEntity<COFFSection>(Name, Sections);
}
/// A template used to lookup or create a symbol/section, and initialize it if
@@ -334,9 +352,7 @@ void WinCOFFObjectWriter::InitCOFFEntity(COFFSection &S) {
template <typename object_t, typename list_t>
object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name,
list_t &List) {
- object_t *Object = new object_t(Name, List.size());
-
- InitCOFFEntity(*Object);
+ object_t *Object = new object_t(Name);
List.push_back(Object);
@@ -346,6 +362,8 @@ object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name,
/// This function takes a section data object from the assembler
/// and creates the associated COFF section staging object.
void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
+ assert(SectionData.getSection().getVariant() == MCSection::SV_COFF
+ && "Got non COFF section in the COFF backend!");
// FIXME: Not sure how to verify this (at least in a debug build).
MCSectionCOFF const &Sec =
static_cast<MCSectionCOFF const &>(SectionData.getSection());
@@ -353,15 +371,14 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
COFFSection *coff_section = createSection(Sec.getSectionName());
COFFSymbol *coff_symbol = createSymbol(Sec.getSectionName());
- coff_section->Symb = coff_symbol;
+ coff_section->Symbol = coff_symbol;
+ coff_symbol->Section = coff_section;
coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
- coff_symbol->Data.SectionNumber = coff_section->Number;
// In this case the auxiliary symbol is a Section Definition.
coff_symbol->Aux.resize(1);
memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
coff_symbol->Aux[0].AuxType = ATSectionDefinition;
- coff_symbol->Aux[0].Aux.SectionDefinition.Number = coff_section->Number;
coff_symbol->Aux[0].Aux.SectionDefinition.Selection = Sec.getSelection();
coff_section->Header.Characteristics = Sec.getCharacteristics();
@@ -388,18 +405,53 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
// Bind internal COFF section to MC section.
coff_section->MCData = &SectionData;
- SectionMap[&SectionData] = coff_section;
+ SectionMap[&SectionData.getSection()] = coff_section;
}
/// This function takes a section data object from the assembler
/// and creates the associated COFF symbol staging object.
void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
- MCAssembler &Assembler) {
- COFFSymbol *coff_symbol = createSymbol(SymbolData.getSymbol().getName());
+ MCAssembler &Assembler) {
+ COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&SymbolData.getSymbol());
coff_symbol->Data.Type = (SymbolData.getFlags() & 0x0000FFFF) >> 0;
coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16;
+ if (SymbolData.getFlags() & COFF::SF_WeakExternal) {
+ coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+
+ if (SymbolData.getSymbol().isVariable()) {
+ coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+ const MCExpr *Value = SymbolData.getSymbol().getVariableValue();
+
+ // FIXME: This assert message isn't very good.
+ assert(Value->getKind() == MCExpr::SymbolRef &&
+ "Value must be a SymbolRef!");
+
+ const MCSymbolRefExpr *SymbolRef =
+ static_cast<const MCSymbolRefExpr *>(Value);
+ coff_symbol->Other = GetOrCreateCOFFSymbol(&SymbolRef->getSymbol());
+ } else {
+ std::string WeakName = std::string(".weak.")
+ + SymbolData.getSymbol().getName().str()
+ + ".default";
+ COFFSymbol *WeakDefault = createSymbol(WeakName);
+ WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
+ WeakDefault->Data.StorageClass = COFF::IMAGE_SYM_CLASS_EXTERNAL;
+ WeakDefault->Data.Type = 0;
+ WeakDefault->Data.Value = 0;
+ coff_symbol->Other = WeakDefault;
+ }
+
+ // Setup the Weak External auxiliary symbol.
+ coff_symbol->Aux.resize(1);
+ memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+ coff_symbol->Aux[0].AuxType = ATWeakExternal;
+ coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
+ coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
+ COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
+ }
+
// If no storage class was specified in the streamer, define it here.
if (coff_symbol->Data.StorageClass == 0) {
bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL);
@@ -408,44 +460,51 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
}
- if (SymbolData.getFlags() & COFF::SF_WeakReference) {
- coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
-
- const MCExpr *Value = SymbolData.getSymbol().getVariableValue();
+ if (SymbolData.Fragment != NULL)
+ coff_symbol->Section =
+ SectionMap[&SymbolData.Fragment->getParent()->getSection()];
- // FIXME: This assert message isn't very good.
- assert(Value->getKind() == MCExpr::SymbolRef &&
- "Value must be a SymbolRef!");
+ // Bind internal COFF symbol to MC symbol.
+ coff_symbol->MCData = &SymbolData;
+ SymbolMap[&SymbolData.getSymbol()] = coff_symbol;
+}
- const MCSymbolRefExpr *SymbolRef =
- static_cast<const MCSymbolRefExpr *>(Value);
+/// making a section real involves assigned it a number and putting
+/// name into the string table if needed
+void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) {
+ if (S.Name.size() > COFF::NameSize) {
+ size_t StringTableEntry = Strings.insert(S.Name.c_str());
- const MCSymbolData &OtherSymbolData =
- Assembler.getSymbolData(SymbolRef->getSymbol());
+ // FIXME: Why is this number 999999? This number is never mentioned in the
+ // spec. I'm assuming this is due to the printed value needing to fit into
+ // the S.Header.Name field. In which case why not 9999999 (7 9's instead of
+ // 6)? The spec does not state if this entry should be null terminated in
+ // this case, and thus this seems to be the best way to do it. I think I
+ // just solved my own FIXME...
+ if (StringTableEntry > 999999)
+ report_fatal_error("COFF string table is greater than 999999 bytes.");
- // FIXME: This assert message isn't very good.
- assert(SymbolMap.find(&OtherSymbolData) != SymbolMap.end() &&
- "OtherSymbolData must be in the symbol map!");
+ std::sprintf(S.Header.Name, "/%d", unsigned(StringTableEntry));
+ } else
+ std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
- coff_symbol->Other = SymbolMap[&OtherSymbolData];
+ S.Number = Number;
+ S.Symbol->Data.SectionNumber = S.Number;
+ S.Symbol->Aux[0].Aux.SectionDefinition.Number = S.Number;
+}
- // Setup the Weak External auxiliary symbol.
- coff_symbol->Aux.resize(1);
- memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
- coff_symbol->Aux[0].AuxType = ATWeakExternal;
- coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
- coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
- COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
- }
+void WinCOFFObjectWriter::MakeSymbolReal(COFFSymbol &S, size_t Index) {
+ if (S.Name.size() > COFF::NameSize) {
+ size_t StringTableEntry = Strings.insert(S.Name.c_str());
- // Bind internal COFF symbol to MC symbol.
- coff_symbol->MCData = &SymbolData;
- SymbolMap[&SymbolData] = coff_symbol;
+ S.set_name_offset(StringTableEntry);
+ } else
+ std::memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
+ S.Index = Index;
}
-bool WinCOFFObjectWriter::ExportSection(COFFSection *S) {
- return (S->Header.Characteristics
- & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0;
+bool WinCOFFObjectWriter::ExportSection(COFFSection const *S) {
+ return !S->MCData->getFragmentList().empty();
}
bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
@@ -455,8 +514,14 @@ bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
// return Asm.isSymbolLinkerVisible (&SymbolData);
- // For now, all symbols are exported, the linker will sort it out for us.
- return true;
+ // For now, all non-variable symbols are exported,
+ // the linker will sort the rest out for us.
+ return SymbolData.isExternal() || !SymbolData.getSymbol().isVariable();
+}
+
+bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
+ return (S->Header.Characteristics
+ & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0;
}
//------------------------------------------------------------------------------
@@ -546,9 +611,10 @@ void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
////////////////////////////////////////////////////////////////////////////////
// MCObjectWriter interface implementations
-void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
+void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
// "Define" each section & symbol. This creates section & symbol
- // entries in the staging area and gives them their final indexes.
+ // entries in the staging area.
for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e; i++)
DefineSection(*i);
@@ -574,19 +640,24 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
MCSectionData const *SectionData = Fragment->getParent();
// Mark this symbol as requiring an entry in the symbol table.
- assert(SectionMap.find(SectionData) != SectionMap.end() &&
+ assert(SectionMap.find(&SectionData->getSection()) != SectionMap.end() &&
"Section must already have been defined in ExecutePostLayoutBinding!");
- assert(SymbolMap.find(&A_SD) != SymbolMap.end() &&
+ assert(SymbolMap.find(&A_SD.getSymbol()) != SymbolMap.end() &&
"Symbol must already have been defined in ExecutePostLayoutBinding!");
- COFFSection *coff_section = SectionMap[SectionData];
- COFFSymbol *coff_symbol = SymbolMap[&A_SD];
+ COFFSection *coff_section = SectionMap[&SectionData->getSection()];
+ COFFSymbol *coff_symbol = SymbolMap[&A_SD.getSymbol()];
if (Target.getSymB()) {
+ if (&Target.getSymA()->getSymbol().getSection()
+ != &Target.getSymB()->getSymbol().getSection()) {
+ llvm_unreachable("Symbol relative relocations are only allowed between "
+ "symbols in the same section");
+ }
const MCSymbol *B = &Target.getSymB()->getSymbol();
MCSymbolData &B_SD = Asm.getSymbolData(*B);
- FixedValue = Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(&B_SD);
+ FixedValue = Layout.getSymbolOffset(&A_SD) - Layout.getSymbolOffset(&B_SD);
// In the case where we have SymbA and SymB, we just need to store the delta
// between the two symbols. Update FixedValue to account for the delta, and
@@ -600,12 +671,21 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
Reloc.Data.SymbolTableIndex = 0;
Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
- Reloc.Symb = coff_symbol;
+
+ // Turn relocations for temporary symbols into section relocations.
+ if (coff_symbol->MCData->getSymbol().isTemporary()) {
+ Reloc.Symb = coff_symbol->Section->Symbol;
+ FixedValue += Layout.getFragmentOffset(coff_symbol->MCData->Fragment)
+ + coff_symbol->MCData->getOffset();
+ } else
+ Reloc.Symb = coff_symbol;
+
+ ++Reloc.Symb->Relocations;
Reloc.Data.VirtualAddress += Fixup.getOffset();
- switch (Fixup.getKind()) {
- case X86::reloc_pcrel_4byte:
+ switch ((unsigned)Fixup.getKind()) {
+ case FK_PCRel_4:
case X86::reloc_riprel_4byte:
case X86::reloc_riprel_4byte_movq_load:
Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_REL32
@@ -615,6 +695,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
FixedValue += 4;
break;
case FK_Data_4:
+ case X86::reloc_signed_4byte:
Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32
: COFF::IMAGE_REL_I386_DIR32;
break;
@@ -631,9 +712,19 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
coff_section->Relocations.push_back(Reloc);
}
-void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
+void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
// Assign symbol and section indexes and offsets.
+ Header.NumberOfSections = 0;
+
+ for (sections::iterator i = Sections.begin(),
+ e = Sections.end(); i != e; i++) {
+ if (Layout.getSectionAddressSize((*i)->MCData) > 0) {
+ MakeSectionReal(**i, ++Header.NumberOfSections);
+ } else {
+ (*i)->Number = -1;
+ }
+ }
Header.NumberOfSymbols = 0;
@@ -641,32 +732,35 @@ void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
COFFSymbol *coff_symbol = *i;
MCSymbolData const *SymbolData = coff_symbol->MCData;
- coff_symbol->Index = Header.NumberOfSymbols++;
-
// Update section number & offset for symbols that have them.
if ((SymbolData != NULL) && (SymbolData->Fragment != NULL)) {
- COFFSection *coff_section = SectionMap[SymbolData->Fragment->getParent()];
+ assert(coff_symbol->Section != NULL);
- coff_symbol->Data.SectionNumber = coff_section->Number;
+ coff_symbol->Data.SectionNumber = coff_symbol->Section->Number;
coff_symbol->Data.Value = Layout.getFragmentOffset(SymbolData->Fragment)
+ SymbolData->Offset;
}
- // Update auxiliary symbol info.
- coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size();
- Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols;
+ if (coff_symbol->should_keep()) {
+ MakeSymbolReal(*coff_symbol, Header.NumberOfSymbols++);
+
+ // Update auxiliary symbol info.
+ coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size();
+ Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols;
+ } else
+ coff_symbol->Index = -1;
}
// Fixup weak external references.
for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
- COFFSymbol *symb = *i;
-
- if (symb->Other != NULL) {
- assert(symb->Aux.size() == 1 &&
+ COFFSymbol *coff_symbol = *i;
+ if (coff_symbol->Other != NULL) {
+ assert(coff_symbol->Index != -1);
+ assert(coff_symbol->Aux.size() == 1 &&
"Symbol must contain one aux symbol!");
- assert(symb->Aux[0].AuxType == ATWeakExternal &&
+ assert(coff_symbol->Aux[0].AuxType == ATWeakExternal &&
"Symbol's aux symbol must be a Weak External!");
- symb->Aux[0].Aux.WeakExternal.TagIndex = symb->Other->Index;
+ coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = coff_symbol->Other->Index;
}
}
@@ -675,18 +769,19 @@ void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
unsigned offset = 0;
offset += COFF::HeaderSize;
- offset += COFF::SectionSize * Asm.size();
-
- Header.NumberOfSections = Sections.size();
+ offset += COFF::SectionSize * Header.NumberOfSections;
for (MCAssembler::const_iterator i = Asm.begin(),
e = Asm.end();
i != e; i++) {
- COFFSection *Sec = SectionMap[i];
+ COFFSection *Sec = SectionMap[&i->getSection()];
- Sec->Header.SizeOfRawData = Layout.getSectionFileSize(i);
+ if (Sec->Number == -1)
+ continue;
- if (ExportSection(Sec)) {
+ Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(i);
+
+ if (IsPhysicalSection(Sec)) {
Sec->Header.PointerToRawData = offset;
offset += Sec->Header.SizeOfRawData;
@@ -700,13 +795,15 @@ void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
for (relocations::iterator cr = Sec->Relocations.begin(),
er = Sec->Relocations.end();
- cr != er; cr++) {
+ cr != er; ++cr) {
+ assert((*cr).Symb->Index != -1);
(*cr).Data.SymbolTableIndex = (*cr).Symb->Index;
}
}
- assert(Sec->Symb->Aux.size() == 1 && "Section's symbol must have one aux!");
- AuxSymbol &Aux = Sec->Symb->Aux[0];
+ assert(Sec->Symbol->Aux.size() == 1
+ && "Section's symbol must have one aux!");
+ AuxSymbol &Aux = Sec->Symbol->Aux[0];
assert(Aux.AuxType == ATSectionDefinition &&
"Section's symbol's aux symbol must be a Section Definition!");
Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData;
@@ -728,16 +825,21 @@ void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
MCAssembler::const_iterator j, je;
for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
- WriteSectionHeader((*i)->Header);
+ if ((*i)->Number != -1)
+ WriteSectionHeader((*i)->Header);
for (i = Sections.begin(), ie = Sections.end(),
j = Asm.begin(), je = Asm.end();
- (i != ie) && (j != je); i++, j++) {
+ (i != ie) && (j != je); ++i, ++j) {
+
+ if ((*i)->Number == -1)
+ continue;
+
if ((*i)->Header.PointerToRawData != 0) {
assert(OS.tell() == (*i)->Header.PointerToRawData &&
"Section::PointerToRawData is insane!");
- Asm.WriteSectionData(j, Layout, this);
+ Asm.WriteSectionData(j, Layout);
}
if ((*i)->Relocations.size() > 0) {
@@ -759,7 +861,8 @@ void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
"Header::PointerToSymbolTable is insane!");
for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++)
- WriteSymbol(*i);
+ if ((*i)->Index != -1)
+ WriteSymbol(*i);
OS.write((char const *)&Strings.Data.front(), Strings.Data.size());
}
diff --git a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
index 8a194bf..46968e6 100644
--- a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
@@ -48,8 +48,10 @@ public:
// MCStreamer interface
+ virtual void InitSections();
virtual void EmitLabel(MCSymbol *Symbol);
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitThumbFunc(MCSymbol *Func);
virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
@@ -66,18 +68,55 @@ public:
virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment);
virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
- virtual void EmitValue(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace);
- virtual void EmitGPRel32Value(const MCExpr *Value);
virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
unsigned ValueSize, unsigned MaxBytesToEmit);
virtual void EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit);
- virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value);
virtual void EmitFileDirective(StringRef Filename);
- virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename);
virtual void EmitInstruction(const MCInst &Instruction);
virtual void Finish();
+
+private:
+ virtual void EmitInstToFragment(const MCInst &Inst) {
+ llvm_unreachable("Not used by WinCOFF.");
+ }
+ virtual void EmitInstToData(const MCInst &Inst) {
+ llvm_unreachable("Not used by WinCOFF.");
+ }
+
+ void SetSection(StringRef Section,
+ unsigned Characteristics,
+ SectionKind Kind) {
+ SwitchSection(getContext().getCOFFSection(Section, Characteristics, Kind));
+ }
+
+ void SetSectionText() {
+ SetSection(".text",
+ COFF::IMAGE_SCN_CNT_CODE
+ | COFF::IMAGE_SCN_MEM_EXECUTE
+ | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getText());
+ EmitCodeAlignment(4, 0);
+ }
+
+ void SetSectionData() {
+ SetSection(".data",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ EmitCodeAlignment(4, 0);
+ }
+
+ void SetSectionBSS() {
+ SetSection(".bss",
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getBSS());
+ EmitCodeAlignment(4, 0);
+ }
+
};
} // end anonymous namespace.
@@ -126,47 +165,81 @@ void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
// MCStreamer interface
+void WinCOFFStreamer::InitSections() {
+ SetSectionText();
+ SetSectionData();
+ SetSectionBSS();
+ SetSectionText();
+}
+
void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
- // TODO: This is copied almost exactly from the MachOStreamer. Consider
- // merging into MCObjectStreamer?
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
- assert(CurSection && "Cannot emit before setting section!");
-
- Symbol->setSection(*CurSection);
-
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-
- // FIXME: This is wasteful, we don't necessarily need to create a data
- // fragment. Instead, we should mark the symbol as pointing into the data
- // fragment if it exists, otherwise we should just queue the label and set its
- // fragment pointer when we emit the next fragment.
- MCDataFragment *DF = getOrCreateDataFragment();
-
- assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
- SD.setFragment(DF);
- SD.setOffset(DF->getContents().size());
+ MCObjectStreamer::EmitLabel(Symbol);
}
void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
llvm_unreachable("not implemented");
}
+void WinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) {
+ llvm_unreachable("not implemented");
+}
+
void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
- // TODO: This is exactly the same as MachOStreamer. Consider merging into
- // MCObjectStreamer.
- getAssembler().getOrCreateSymbolData(*Symbol);
- AddValueSymbols(Value);
- Symbol->setVariableValue(Value);
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
+ // FIXME: This is all very ugly and depressing. What needs to happen here
+ // depends on quite a few things that are all part of relaxation, which we
+ // don't really even do.
+
+ if (Value->getKind() != MCExpr::SymbolRef) {
+ // TODO: This is exactly the same as MachOStreamer. Consider merging into
+ // MCObjectStreamer.
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ AddValueSymbols(Value);
+ Symbol->setVariableValue(Value);
+ } else {
+ // FIXME: This is a horrible way to do this :(. This should really be
+ // handled after we are done with the MC* objects and immediately before
+ // writing out the object file when we know exactly what the symbol should
+ // look like in the coff symbol table. I'm not doing that now because the
+ // COFF object writer doesn't have a clearly defined separation between MC
+ // data structures, the object writers data structures, and the raw, POD,
+ // data structures that get written to disk.
+
+ // Copy over the aliased data.
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ const MCSymbolData &RealSD = getAssembler().getOrCreateSymbolData(
+ dyn_cast<const MCSymbolRefExpr>(Value)->getSymbol());
+
+ // FIXME: This is particularly nasty because it breaks as soon as any data
+ // members of MCSymbolData change.
+ SD.CommonAlign = RealSD.CommonAlign;
+ SD.CommonSize = RealSD.CommonSize;
+ SD.Flags = RealSD.Flags;
+ SD.Fragment = RealSD.Fragment;
+ SD.Index = RealSD.Index;
+ SD.IsExternal = RealSD.IsExternal;
+ SD.IsPrivateExtern = RealSD.IsPrivateExtern;
+ SD.Offset = RealSD.Offset;
+ SD.SymbolSize = RealSD.SymbolSize;
+ }
}
void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) {
+ assert(Symbol && "Symbol must be non-null!");
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
switch (Attribute) {
case MCSA_WeakReference:
- getAssembler().getOrCreateSymbolData(*Symbol).modifyFlags(
- COFF::SF_WeakReference,
- COFF::SF_WeakReference);
+ case MCSA_Weak: {
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.modifyFlags(COFF::SF_WeakExternal, COFF::SF_WeakExternal);
+ SD.setExternal(true);
+ }
break;
case MCSA_Global:
@@ -184,6 +257,9 @@ void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
}
void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls "
"to BeginCOFFSymbolDef!");
CurSymbol = Symbol;
@@ -220,10 +296,16 @@ void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
AddCommonSymbol(Symbol, Size, ByteAlignment, true);
}
void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ assert((Symbol->isInSection()
+ ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+ : true) && "Got non COFF section in the COFF backend!");
AddCommonSymbol(Symbol, Size, 1, false);
}
@@ -243,32 +325,6 @@ void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
}
-void WinCOFFStreamer::EmitValue(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace) {
- assert(AddrSpace == 0 && "Address space must be 0!");
-
- // TODO: This is copied exactly from the MachOStreamer. Consider merging into
- // MCObjectStreamer?
- MCDataFragment *DF = getOrCreateDataFragment();
-
- // Avoid fixups when possible.
- int64_t AbsValue;
- if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
- // FIXME: Endianness assumption.
- for (unsigned i = 0; i != Size; ++i)
- DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
- } else {
- DF->addFixup(MCFixup::Create(DF->getContents().size(),
- AddValueSymbols(Value),
- MCFixup::getKindForSize(Size)));
- DF->getContents().resize(DF->getContents().size() + Size, 0);
- }
-}
-
-void WinCOFFStreamer::EmitGPRel32Value(const MCExpr *Value) {
- llvm_unreachable("not implemented");
-}
-
void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
int64_t Value,
unsigned ValueSize,
@@ -300,21 +356,11 @@ void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
getCurrentSectionData()->setAlignment(ByteAlignment);
}
-void WinCOFFStreamer::EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value) {
- llvm_unreachable("not implemented");
-}
-
void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
// Ignore for now, linkers don't care, and proper debug
// info will be a much large effort.
}
-void WinCOFFStreamer::EmitDwarfFileDirective(unsigned FileNo,
- StringRef Filename) {
- llvm_unreachable("not implemented");
-}
-
void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) {
for (unsigned i = 0, e = Instruction.getNumOperands(); i != e; ++i)
if (Instruction.getOperand(i).isExpr())
diff --git a/contrib/llvm/lib/Object/CMakeLists.txt b/contrib/llvm/lib/Object/CMakeLists.txt
new file mode 100644
index 0000000..6a6814f
--- /dev/null
+++ b/contrib/llvm/lib/Object/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMObject
+ MachOObject.cpp
+ ObjectFile.cpp
+ COFFObjectFile.cpp
+ ELFObjectFile.cpp
+ )
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
new file mode 100644
index 0000000..cfee82a
--- /dev/null
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -0,0 +1,375 @@
+//===- COFFObjectFile.cpp - COFF object file implementation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the COFFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+using support::ulittle8_t;
+using support::ulittle16_t;
+using support::ulittle32_t;
+using support::little16_t;
+}
+
+namespace {
+struct coff_file_header {
+ ulittle16_t Machine;
+ ulittle16_t NumberOfSections;
+ ulittle32_t TimeDateStamp;
+ ulittle32_t PointerToSymbolTable;
+ ulittle32_t NumberOfSymbols;
+ ulittle16_t SizeOfOptionalHeader;
+ ulittle16_t Characteristics;
+};
+}
+
+extern char coff_file_header_layout_static_assert
+ [sizeof(coff_file_header) == 20 ? 1 : -1];
+
+namespace {
+struct coff_symbol {
+ struct StringTableOffset {
+ ulittle32_t Zeroes;
+ ulittle32_t Offset;
+ };
+
+ union {
+ char ShortName[8];
+ StringTableOffset Offset;
+ } Name;
+
+ ulittle32_t Value;
+ little16_t SectionNumber;
+
+ struct {
+ ulittle8_t BaseType;
+ ulittle8_t ComplexType;
+ } Type;
+
+ ulittle8_t StorageClass;
+ ulittle8_t NumberOfAuxSymbols;
+};
+}
+
+extern char coff_coff_symbol_layout_static_assert
+ [sizeof(coff_symbol) == 18 ? 1 : -1];
+
+namespace {
+struct coff_section {
+ char Name[8];
+ ulittle32_t VirtualSize;
+ ulittle32_t VirtualAddress;
+ ulittle32_t SizeOfRawData;
+ ulittle32_t PointerToRawData;
+ ulittle32_t PointerToRelocations;
+ ulittle32_t PointerToLinenumbers;
+ ulittle16_t NumberOfRelocations;
+ ulittle16_t NumberOfLinenumbers;
+ ulittle32_t Characteristics;
+};
+}
+
+extern char coff_coff_section_layout_static_assert
+ [sizeof(coff_section) == 40 ? 1 : -1];
+
+namespace {
+class COFFObjectFile : public ObjectFile {
+private:
+ const coff_file_header *Header;
+ const coff_section *SectionTable;
+ const coff_symbol *SymbolTable;
+ const char *StringTable;
+
+ const coff_section *getSection(std::size_t index) const;
+ const char *getString(std::size_t offset) const;
+
+protected:
+ virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
+ virtual StringRef getSymbolName(DataRefImpl Symb) const;
+ virtual uint64_t getSymbolAddress(DataRefImpl Symb) const;
+ virtual uint64_t getSymbolSize(DataRefImpl Symb) const;
+ virtual char getSymbolNMTypeChar(DataRefImpl Symb) const;
+ virtual bool isSymbolInternal(DataRefImpl Symb) const;
+
+ virtual SectionRef getSectionNext(DataRefImpl Sec) const;
+ virtual StringRef getSectionName(DataRefImpl Sec) const;
+ virtual uint64_t getSectionAddress(DataRefImpl Sec) const;
+ virtual uint64_t getSectionSize(DataRefImpl Sec) const;
+ virtual StringRef getSectionContents(DataRefImpl Sec) const;
+ virtual bool isSectionText(DataRefImpl Sec) const;
+
+public:
+ COFFObjectFile(MemoryBuffer *Object);
+ virtual symbol_iterator begin_symbols() const;
+ virtual symbol_iterator end_symbols() const;
+ virtual section_iterator begin_sections() const;
+ virtual section_iterator end_sections() const;
+
+ virtual uint8_t getBytesInAddress() const;
+ virtual StringRef getFileFormatName() const;
+ virtual unsigned getArch() const;
+};
+} // end namespace
+
+SymbolRef COFFObjectFile::getSymbolNext(DataRefImpl Symb) const {
+ const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+ symb += 1 + symb->NumberOfAuxSymbols;
+ Symb.p = reinterpret_cast<intptr_t>(symb);
+ return SymbolRef(Symb, this);
+}
+
+StringRef COFFObjectFile::getSymbolName(DataRefImpl Symb) const {
+ const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+ // Check for string table entry. First 4 bytes are 0.
+ if (symb->Name.Offset.Zeroes == 0) {
+ uint32_t Offset = symb->Name.Offset.Offset;
+ return StringRef(getString(Offset));
+ }
+
+ if (symb->Name.ShortName[7] == 0)
+ // Null terminated, let ::strlen figure out the length.
+ return StringRef(symb->Name.ShortName);
+ // Not null terminated, use all 8 bytes.
+ return StringRef(symb->Name.ShortName, 8);
+}
+
+uint64_t COFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
+ const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+ const coff_section *Section = getSection(symb->SectionNumber);
+ char Type = getSymbolNMTypeChar(Symb);
+ if (Type == 'U' || Type == 'w')
+ return UnknownAddressOrSize;
+ if (Section)
+ return Section->VirtualAddress + symb->Value;
+ return symb->Value;
+}
+
+uint64_t COFFObjectFile::getSymbolSize(DataRefImpl Symb) const {
+ // FIXME: Return the correct size. This requires looking at all the symbols
+ // in the same section as this symbol, and looking for either the next
+ // symbol, or the end of the section.
+ const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+ const coff_section *Section = getSection(symb->SectionNumber);
+ char Type = getSymbolNMTypeChar(Symb);
+ if (Type == 'U' || Type == 'w')
+ return UnknownAddressOrSize;
+ if (Section)
+ return Section->SizeOfRawData - symb->Value;
+ return 0;
+}
+
+char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const {
+ const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+ char ret = StringSwitch<char>(getSymbolName(Symb))
+ .StartsWith(".debug", 'N')
+ .StartsWith(".sxdata", 'N')
+ .Default('?');
+
+ if (ret != '?')
+ return ret;
+
+ uint32_t Characteristics = 0;
+ uint32_t PointerToRawData = 0;
+ const coff_section *Section = getSection(symb->SectionNumber);
+ if (Section) {
+ Characteristics = Section->Characteristics;
+ PointerToRawData = Section->PointerToRawData;
+ }
+
+ switch (symb->SectionNumber) {
+ case COFF::IMAGE_SYM_UNDEFINED:
+ // Check storage classes.
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
+ return 'w'; // Don't do ::toupper.
+ else
+ ret = 'u';
+ break;
+ case COFF::IMAGE_SYM_ABSOLUTE:
+ ret = 'a';
+ break;
+ case COFF::IMAGE_SYM_DEBUG:
+ ret = 'n';
+ break;
+ default:
+ // Check section type.
+ if (Characteristics & COFF::IMAGE_SCN_CNT_CODE)
+ ret = 't';
+ else if ( Characteristics & COFF::IMAGE_SCN_MEM_READ
+ && ~Characteristics & COFF::IMAGE_SCN_MEM_WRITE) // Read only.
+ ret = 'r';
+ else if (Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+ ret = 'd';
+ else if (Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+ ret = 'b';
+ else if (Characteristics & COFF::IMAGE_SCN_LNK_INFO)
+ ret = 'i';
+
+ // Check for section symbol.
+ else if ( symb->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC
+ && symb->Value == 0)
+ ret = 's';
+ }
+
+ if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
+ ret = ::toupper(ret);
+
+ return ret;
+}
+
+bool COFFObjectFile::isSymbolInternal(DataRefImpl Symb) const {
+ return false;
+}
+
+SectionRef COFFObjectFile::getSectionNext(DataRefImpl Sec) const {
+ const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+ sec += 1;
+ Sec.p = reinterpret_cast<intptr_t>(sec);
+ return SectionRef(Sec, this);
+}
+
+StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const {
+ const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+ StringRef name;
+ if (sec->Name[7] == 0)
+ // Null terminated, let ::strlen figure out the length.
+ name = sec->Name;
+ else
+ // Not null terminated, use all 8 bytes.
+ name = StringRef(sec->Name, 8);
+
+ // Check for string table entry. First byte is '/'.
+ if (name[0] == '/') {
+ uint32_t Offset;
+ name.getAsInteger(10, Offset);
+ return StringRef(getString(Offset));
+ }
+
+ // It's just a normal name.
+ return name;
+}
+
+uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Sec) const {
+ const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+ return sec->VirtualAddress;
+}
+
+uint64_t COFFObjectFile::getSectionSize(DataRefImpl Sec) const {
+ const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+ return sec->SizeOfRawData;
+}
+
+StringRef COFFObjectFile::getSectionContents(DataRefImpl Sec) const {
+ const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+ return StringRef(reinterpret_cast<const char *>(base + sec->PointerToRawData),
+ sec->SizeOfRawData);
+}
+
+bool COFFObjectFile::isSectionText(DataRefImpl Sec) const {
+ const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+ return sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
+}
+
+COFFObjectFile::COFFObjectFile(MemoryBuffer *Object)
+ : ObjectFile(Object) {
+ Header = reinterpret_cast<const coff_file_header *>(base);
+ SectionTable =
+ reinterpret_cast<const coff_section *>( base
+ + sizeof(coff_file_header)
+ + Header->SizeOfOptionalHeader);
+ SymbolTable =
+ reinterpret_cast<const coff_symbol *>(base + Header->PointerToSymbolTable);
+
+ // Find string table.
+ StringTable = reinterpret_cast<const char *>(base)
+ + Header->PointerToSymbolTable
+ + Header->NumberOfSymbols * 18;
+}
+
+ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const {
+ DataRefImpl ret;
+ ret.p = reinterpret_cast<intptr_t>(SymbolTable);
+ return symbol_iterator(SymbolRef(ret, this));
+}
+
+ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const {
+ // The symbol table ends where the string table begins.
+ DataRefImpl ret;
+ ret.p = reinterpret_cast<intptr_t>(StringTable);
+ return symbol_iterator(SymbolRef(ret, this));
+}
+
+ObjectFile::section_iterator COFFObjectFile::begin_sections() const {
+ DataRefImpl ret;
+ ret.p = reinterpret_cast<intptr_t>(SectionTable);
+ return section_iterator(SectionRef(ret, this));
+}
+
+ObjectFile::section_iterator COFFObjectFile::end_sections() const {
+ DataRefImpl ret;
+ ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections);
+ return section_iterator(SectionRef(ret, this));
+}
+
+uint8_t COFFObjectFile::getBytesInAddress() const {
+ return getArch() == Triple::x86_64 ? 8 : 4;
+}
+
+StringRef COFFObjectFile::getFileFormatName() const {
+ switch(Header->Machine) {
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ return "COFF-i386";
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ return "COFF-x86-64";
+ default:
+ return "COFF-<unknown arch>";
+ }
+}
+
+unsigned COFFObjectFile::getArch() const {
+ switch(Header->Machine) {
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ return Triple::x86;
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ return Triple::x86_64;
+ default:
+ return Triple::UnknownArch;
+ }
+}
+
+const coff_section *COFFObjectFile::getSection(std::size_t index) const {
+ if (index > 0 && index <= Header->NumberOfSections)
+ return SectionTable + (index - 1);
+ return 0;
+}
+
+const char *COFFObjectFile::getString(std::size_t offset) const {
+ const ulittle32_t *StringTableSize =
+ reinterpret_cast<const ulittle32_t *>(StringTable);
+ if (offset < *StringTableSize)
+ return StringTable + offset;
+ return 0;
+}
+
+namespace llvm {
+
+ ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) {
+ return new COFFObjectFile(Object);
+ }
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm/lib/Object/ELFObjectFile.cpp
new file mode 100644
index 0000000..682be77
--- /dev/null
+++ b/contrib/llvm/lib/Object/ELFObjectFile.cpp
@@ -0,0 +1,686 @@
+//===- ELFObjectFile.cpp - ELF object file implementation -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <limits>
+#include <utility>
+
+using namespace llvm;
+using namespace object;
+
+// Templates to choose Elf_Addr and Elf_Off depending on is64Bits.
+namespace {
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelperCommon {
+ typedef support::detail::packed_endian_specific_integral
+ <uint16_t, target_endianness, support::aligned> Elf_Half;
+ typedef support::detail::packed_endian_specific_integral
+ <uint32_t, target_endianness, support::aligned> Elf_Word;
+ typedef support::detail::packed_endian_specific_integral
+ <int32_t, target_endianness, support::aligned> Elf_Sword;
+ typedef support::detail::packed_endian_specific_integral
+ <uint64_t, target_endianness, support::aligned> Elf_Xword;
+ typedef support::detail::packed_endian_specific_integral
+ <int64_t, target_endianness, support::aligned> Elf_Sxword;
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct ELFDataTypeTypedefHelper;
+
+/// ELF 32bit types.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelper<target_endianness, false>
+ : ELFDataTypeTypedefHelperCommon<target_endianness> {
+ typedef support::detail::packed_endian_specific_integral
+ <uint32_t, target_endianness, support::aligned> Elf_Addr;
+ typedef support::detail::packed_endian_specific_integral
+ <uint32_t, target_endianness, support::aligned> Elf_Off;
+};
+
+/// ELF 64bit types.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelper<target_endianness, true>
+ : ELFDataTypeTypedefHelperCommon<target_endianness>{
+ typedef support::detail::packed_endian_specific_integral
+ <uint64_t, target_endianness, support::aligned> Elf_Addr;
+ typedef support::detail::packed_endian_specific_integral
+ <uint64_t, target_endianness, support::aligned> Elf_Off;
+};
+}
+
+// I really don't like doing this, but the alternative is copypasta.
+#define LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Addr Elf_Addr; \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Off Elf_Off; \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Half Elf_Half; \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Word Elf_Word; \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sword Elf_Sword; \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Xword Elf_Xword; \
+typedef typename \
+ ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sxword Elf_Sxword;
+
+ // Section header.
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Shdr_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Shdr_Base<target_endianness, false> {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+ Elf_Word sh_name; // Section name (index into string table)
+ Elf_Word sh_type; // Section type (SHT_*)
+ Elf_Word sh_flags; // Section flags (SHF_*)
+ Elf_Addr sh_addr; // Address where section is to be loaded
+ Elf_Off sh_offset; // File offset of section data, in bytes
+ Elf_Word sh_size; // Size of section, in bytes
+ Elf_Word sh_link; // Section type-specific header table index link
+ Elf_Word sh_info; // Section type-specific extra information
+ Elf_Word sh_addralign;// Section address alignment
+ Elf_Word sh_entsize; // Size of records contained within the section
+};
+
+template<support::endianness target_endianness>
+struct Elf_Shdr_Base<target_endianness, true> {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+ Elf_Word sh_name; // Section name (index into string table)
+ Elf_Word sh_type; // Section type (SHT_*)
+ Elf_Xword sh_flags; // Section flags (SHF_*)
+ Elf_Addr sh_addr; // Address where section is to be loaded
+ Elf_Off sh_offset; // File offset of section data, in bytes
+ Elf_Xword sh_size; // Size of section, in bytes
+ Elf_Word sh_link; // Section type-specific header table index link
+ Elf_Word sh_info; // Section type-specific extra information
+ Elf_Xword sh_addralign;// Section address alignment
+ Elf_Xword sh_entsize; // Size of records contained within the section
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Shdr_Impl : Elf_Shdr_Base<target_endianness, is64Bits> {
+ using Elf_Shdr_Base<target_endianness, is64Bits>::sh_entsize;
+ using Elf_Shdr_Base<target_endianness, is64Bits>::sh_size;
+
+ /// @brief Get the number of entities this section contains if it has any.
+ unsigned getEntityCount() const {
+ if (sh_entsize == 0)
+ return 0;
+ return sh_size / sh_entsize;
+ }
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Sym_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Sym_Base<target_endianness, false> {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+ Elf_Word st_name; // Symbol name (index into string table)
+ Elf_Addr st_value; // Value or address associated with the symbol
+ Elf_Word st_size; // Size of the symbol
+ unsigned char st_info; // Symbol's type and binding attributes
+ unsigned char st_other; // Must be zero; reserved
+ Elf_Half st_shndx; // Which section (header table index) it's defined in
+};
+
+template<support::endianness target_endianness>
+struct Elf_Sym_Base<target_endianness, true> {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+ Elf_Word st_name; // Symbol name (index into string table)
+ unsigned char st_info; // Symbol's type and binding attributes
+ unsigned char st_other; // Must be zero; reserved
+ Elf_Half st_shndx; // Which section (header table index) it's defined in
+ Elf_Addr st_value; // Value or address associated with the symbol
+ Elf_Xword st_size; // Size of the symbol
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> {
+ using Elf_Sym_Base<target_endianness, is64Bits>::st_info;
+
+ // These accessors and mutators correspond to the ELF32_ST_BIND,
+ // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
+ unsigned char getBinding() const { return st_info >> 4; }
+ unsigned char getType() const { return st_info & 0x0f; }
+ void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
+ void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
+ void setBindingAndType(unsigned char b, unsigned char t) {
+ st_info = (b << 4) + (t & 0x0f);
+ }
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+class ELFObjectFile : public ObjectFile {
+ LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+
+ typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
+ typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
+
+ struct Elf_Ehdr {
+ unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes
+ Elf_Half e_type; // Type of file (see ET_*)
+ Elf_Half e_machine; // Required architecture for this file (see EM_*)
+ Elf_Word e_version; // Must be equal to 1
+ Elf_Addr e_entry; // Address to jump to in order to start program
+ Elf_Off e_phoff; // Program header table's file offset, in bytes
+ Elf_Off e_shoff; // Section header table's file offset, in bytes
+ Elf_Word e_flags; // Processor-specific flags
+ Elf_Half e_ehsize; // Size of ELF header, in bytes
+ Elf_Half e_phentsize;// Size of an entry in the program header table
+ Elf_Half e_phnum; // Number of entries in the program header table
+ Elf_Half e_shentsize;// Size of an entry in the section header table
+ Elf_Half e_shnum; // Number of entries in the section header table
+ Elf_Half e_shstrndx; // Section header table index of section name
+ // string table
+ bool checkMagic() const {
+ return (memcmp(e_ident, ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0;
+ }
+ unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; }
+ unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
+ };
+
+ typedef SmallVector<const Elf_Shdr*, 1> SymbolTableSections_t;
+
+ const Elf_Ehdr *Header;
+ const Elf_Shdr *SectionHeaderTable;
+ const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
+ const Elf_Shdr *dot_strtab_sec; // Symbol header string table.
+ SymbolTableSections_t SymbolTableSections;
+
+ void validateSymbol(DataRefImpl Symb) const;
+ const Elf_Sym *getSymbol(DataRefImpl Symb) const;
+ const Elf_Shdr *getSection(DataRefImpl index) const;
+ const Elf_Shdr *getSection(uint16_t index) const;
+ const char *getString(uint16_t section, uint32_t offset) const;
+ const char *getString(const Elf_Shdr *section, uint32_t offset) const;
+
+protected:
+ virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
+ virtual StringRef getSymbolName(DataRefImpl Symb) const;
+ virtual uint64_t getSymbolAddress(DataRefImpl Symb) const;
+ virtual uint64_t getSymbolSize(DataRefImpl Symb) const;
+ virtual char getSymbolNMTypeChar(DataRefImpl Symb) const;
+ virtual bool isSymbolInternal(DataRefImpl Symb) const;
+
+ virtual SectionRef getSectionNext(DataRefImpl Sec) const;
+ virtual StringRef getSectionName(DataRefImpl Sec) const;
+ virtual uint64_t getSectionAddress(DataRefImpl Sec) const;
+ virtual uint64_t getSectionSize(DataRefImpl Sec) const;
+ virtual StringRef getSectionContents(DataRefImpl Sec) const;
+ virtual bool isSectionText(DataRefImpl Sec) const;
+
+public:
+ ELFObjectFile(MemoryBuffer *Object);
+ virtual symbol_iterator begin_symbols() const;
+ virtual symbol_iterator end_symbols() const;
+ virtual section_iterator begin_sections() const;
+ virtual section_iterator end_sections() const;
+
+ virtual uint8_t getBytesInAddress() const;
+ virtual StringRef getFileFormatName() const;
+ virtual unsigned getArch() const;
+};
+} // end namespace
+
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>
+ ::validateSymbol(DataRefImpl Symb) const {
+ const Elf_Sym *symb = getSymbol(Symb);
+ const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
+ // FIXME: We really need to do proper error handling in the case of an invalid
+ // input file. Because we don't use exceptions, I think we'll just pass
+ // an error object around.
+ if (!( symb
+ && SymbolTableSection
+ && symb >= (const Elf_Sym*)(base
+ + SymbolTableSection->sh_offset)
+ && symb < (const Elf_Sym*)(base
+ + SymbolTableSection->sh_offset
+ + SymbolTableSection->sh_size)))
+ // FIXME: Proper error handling.
+ report_fatal_error("Symb must point to a valid symbol!");
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+SymbolRef ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolNext(DataRefImpl Symb) const {
+ validateSymbol(Symb);
+ const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
+
+ ++Symb.d.a;
+ // Check to see if we are at the end of this symbol table.
+ if (Symb.d.a >= SymbolTableSection->getEntityCount()) {
+ // We are at the end. If there are other symbol tables, jump to them.
+ ++Symb.d.b;
+ Symb.d.a = 1; // The 0th symbol in ELF is fake.
+ // Otherwise return the terminator.
+ if (Symb.d.b >= SymbolTableSections.size()) {
+ Symb.d.a = std::numeric_limits<uint32_t>::max();
+ Symb.d.b = std::numeric_limits<uint32_t>::max();
+ }
+ }
+
+ return SymbolRef(Symb, this);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolName(DataRefImpl Symb) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+ if (symb->st_name == 0) {
+ const Elf_Shdr *section = getSection(symb->st_shndx);
+ if (!section)
+ return "";
+ return getString(dot_shstrtab_sec, section->sh_name);
+ }
+
+ // Use the default symbol table name section.
+ return getString(dot_strtab_sec, symb->st_name);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolAddress(DataRefImpl Symb) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+ const Elf_Shdr *Section;
+ switch (symb->st_shndx) {
+ case ELF::SHN_COMMON:
+ // Undefined symbols have no address yet.
+ case ELF::SHN_UNDEF: return UnknownAddressOrSize;
+ case ELF::SHN_ABS: return symb->st_value;
+ default: Section = getSection(symb->st_shndx);
+ }
+
+ switch (symb->getType()) {
+ case ELF::STT_SECTION: return Section ? Section->sh_addr
+ : UnknownAddressOrSize;
+ case ELF::STT_FUNC:
+ case ELF::STT_OBJECT:
+ case ELF::STT_NOTYPE:
+ return symb->st_value;
+ default: return UnknownAddressOrSize;
+ }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolSize(DataRefImpl Symb) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+ if (symb->st_size == 0)
+ return UnknownAddressOrSize;
+ return symb->st_size;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+char ELFObjectFile<target_endianness, is64Bits>
+ ::getSymbolNMTypeChar(DataRefImpl Symb) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+ const Elf_Shdr *Section = getSection(symb->st_shndx);
+
+ char ret = '?';
+
+ if (Section) {
+ switch (Section->sh_type) {
+ case ELF::SHT_PROGBITS:
+ case ELF::SHT_DYNAMIC:
+ switch (Section->sh_flags) {
+ case (ELF::SHF_ALLOC | ELF::SHF_EXECINSTR):
+ ret = 't'; break;
+ case (ELF::SHF_ALLOC | ELF::SHF_WRITE):
+ ret = 'd'; break;
+ case ELF::SHF_ALLOC:
+ case (ELF::SHF_ALLOC | ELF::SHF_MERGE):
+ case (ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS):
+ ret = 'r'; break;
+ }
+ break;
+ case ELF::SHT_NOBITS: ret = 'b';
+ }
+ }
+
+ switch (symb->st_shndx) {
+ case ELF::SHN_UNDEF:
+ if (ret == '?')
+ ret = 'U';
+ break;
+ case ELF::SHN_ABS: ret = 'a'; break;
+ case ELF::SHN_COMMON: ret = 'c'; break;
+ }
+
+ switch (symb->getBinding()) {
+ case ELF::STB_GLOBAL: ret = ::toupper(ret); break;
+ case ELF::STB_WEAK:
+ if (symb->st_shndx == ELF::SHN_UNDEF)
+ ret = 'w';
+ else
+ if (symb->getType() == ELF::STT_OBJECT)
+ ret = 'V';
+ else
+ ret = 'W';
+ }
+
+ if (ret == '?' && symb->getType() == ELF::STT_SECTION)
+ return StringSwitch<char>(getSymbolName(Symb))
+ .StartsWith(".debug", 'N')
+ .StartsWith(".note", 'n');
+
+ return ret;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+bool ELFObjectFile<target_endianness, is64Bits>
+ ::isSymbolInternal(DataRefImpl Symb) const {
+ validateSymbol(Symb);
+ const Elf_Sym *symb = getSymbol(Symb);
+
+ if ( symb->getType() == ELF::STT_FILE
+ || symb->getType() == ELF::STT_SECTION)
+ return true;
+ return false;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+SectionRef ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionNext(DataRefImpl Sec) const {
+ const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p);
+ sec += Header->e_shentsize;
+ Sec.p = reinterpret_cast<intptr_t>(sec);
+ return SectionRef(Sec, this);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionName(DataRefImpl Sec) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ return StringRef(getString(dot_shstrtab_sec, sec->sh_name));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionAddress(DataRefImpl Sec) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ return sec->sh_addr;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionSize(DataRefImpl Sec) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ return sec->sh_size;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+ ::getSectionContents(DataRefImpl Sec) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ const char *start = (char*)base + sec->sh_offset;
+ return StringRef(start, sec->sh_size);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+bool ELFObjectFile<target_endianness, is64Bits>
+ ::isSectionText(DataRefImpl Sec) const {
+ const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+ if (sec->sh_flags & ELF::SHF_EXECINSTR)
+ return true;
+ return false;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object)
+ : ObjectFile(Object)
+ , SectionHeaderTable(0)
+ , dot_shstrtab_sec(0)
+ , dot_strtab_sec(0) {
+ Header = reinterpret_cast<const Elf_Ehdr *>(base);
+
+ if (Header->e_shoff == 0)
+ return;
+
+ SectionHeaderTable =
+ reinterpret_cast<const Elf_Shdr *>(base + Header->e_shoff);
+ uint32_t SectionTableSize = Header->e_shnum * Header->e_shentsize;
+ if (!( (const uint8_t *)SectionHeaderTable + SectionTableSize
+ <= base + MapFile->getBufferSize()))
+ // FIXME: Proper error handling.
+ report_fatal_error("Section table goes past end of file!");
+
+
+ // To find the symbol tables we walk the section table to find SHT_STMTAB.
+ for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
+ *e = i + Header->e_shnum * Header->e_shentsize;
+ i != e; i += Header->e_shentsize) {
+ const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
+ if (sh->sh_type == ELF::SHT_SYMTAB) {
+ SymbolTableSections.push_back(sh);
+ }
+ }
+
+ // Get string table sections.
+ dot_shstrtab_sec = getSection(Header->e_shstrndx);
+ if (dot_shstrtab_sec) {
+ // Verify that the last byte in the string table in a null.
+ if (((const char*)base + dot_shstrtab_sec->sh_offset)
+ [dot_shstrtab_sec->sh_size - 1] != 0)
+ // FIXME: Proper error handling.
+ report_fatal_error("String table must end with a null terminator!");
+ }
+
+ // Merge this into the above loop.
+ for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
+ *e = i + Header->e_shnum * Header->e_shentsize;
+ i != e; i += Header->e_shentsize) {
+ const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
+ if (sh->sh_type == ELF::SHT_STRTAB) {
+ StringRef SectionName(getString(dot_shstrtab_sec, sh->sh_name));
+ if (SectionName == ".strtab") {
+ if (dot_strtab_sec != 0)
+ // FIXME: Proper error handling.
+ report_fatal_error("Already found section named .strtab!");
+ dot_strtab_sec = sh;
+ const char *dot_strtab = (const char*)base + sh->sh_offset;
+ if (dot_strtab[sh->sh_size - 1] != 0)
+ // FIXME: Proper error handling.
+ report_fatal_error("String table must end with a null terminator!");
+ }
+ }
+ }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::begin_symbols() const {
+ DataRefImpl SymbolData;
+ memset(&SymbolData, 0, sizeof(SymbolData));
+ if (SymbolTableSections.size() == 0) {
+ SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+ SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+ } else {
+ SymbolData.d.a = 1; // The 0th symbol in ELF is fake.
+ SymbolData.d.b = 0;
+ }
+ return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::end_symbols() const {
+ DataRefImpl SymbolData;
+ memset(&SymbolData, 0, sizeof(SymbolData));
+ SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+ SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+ return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::begin_sections() const {
+ DataRefImpl ret;
+ ret.p = reinterpret_cast<intptr_t>(base + Header->e_shoff);
+ return section_iterator(SectionRef(ret, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
+ ::end_sections() const {
+ DataRefImpl ret;
+ ret.p = reinterpret_cast<intptr_t>(base
+ + Header->e_shoff
+ + (Header->e_shentsize * Header->e_shnum));
+ return section_iterator(SectionRef(ret, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint8_t ELFObjectFile<target_endianness, is64Bits>::getBytesInAddress() const {
+ return is64Bits ? 8 : 4;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+ ::getFileFormatName() const {
+ switch(Header->e_ident[ELF::EI_CLASS]) {
+ case ELF::ELFCLASS32:
+ switch(Header->e_machine) {
+ case ELF::EM_386:
+ return "ELF32-i386";
+ case ELF::EM_X86_64:
+ return "ELF32-x86-64";
+ default:
+ return "ELF32-unknown";
+ }
+ case ELF::ELFCLASS64:
+ switch(Header->e_machine) {
+ case ELF::EM_386:
+ return "ELF64-i386";
+ case ELF::EM_X86_64:
+ return "ELF64-x86-64";
+ default:
+ return "ELF64-unknown";
+ }
+ default:
+ // FIXME: Proper error handling.
+ report_fatal_error("Invalid ELFCLASS!");
+ }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const {
+ switch(Header->e_machine) {
+ case ELF::EM_386:
+ return Triple::x86;
+ case ELF::EM_X86_64:
+ return Triple::x86_64;
+ default:
+ return Triple::UnknownArch;
+ }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
+ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const {
+ const Elf_Shdr *sec = SymbolTableSections[Symb.d.b];
+ return reinterpret_cast<const Elf_Sym *>(
+ base
+ + sec->sh_offset
+ + (Symb.d.a * sec->sh_entsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const {
+ const Elf_Shdr *sec = getSection(Symb.d.b);
+ if (sec->sh_type != ELF::SHT_SYMTAB)
+ // FIXME: Proper error handling.
+ report_fatal_error("Invalid symbol table section!");
+ return sec;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>::getSection(uint16_t index) const {
+ if (index == 0 || index >= ELF::SHN_LORESERVE)
+ return 0;
+ if (!SectionHeaderTable || index >= Header->e_shnum)
+ // FIXME: Proper error handling.
+ report_fatal_error("Invalid section index!");
+
+ return reinterpret_cast<const Elf_Shdr *>(
+ reinterpret_cast<const char *>(SectionHeaderTable)
+ + (index * Header->e_shentsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const char *ELFObjectFile<target_endianness, is64Bits>
+ ::getString(uint16_t section,
+ ELF::Elf32_Word offset) const {
+ return getString(getSection(section), offset);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const char *ELFObjectFile<target_endianness, is64Bits>
+ ::getString(const Elf_Shdr *section,
+ ELF::Elf32_Word offset) const {
+ assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!");
+ if (offset >= section->sh_size)
+ // FIXME: Proper error handling.
+ report_fatal_error("Sybol name offset outside of string table!");
+ return (const char *)base + section->sh_offset + offset;
+}
+
+// EI_CLASS, EI_DATA.
+static std::pair<unsigned char, unsigned char>
+getElfArchType(MemoryBuffer *Object) {
+ if (Object->getBufferSize() < ELF::EI_NIDENT)
+ return std::make_pair((uint8_t)ELF::ELFCLASSNONE,(uint8_t)ELF::ELFDATANONE);
+ return std::make_pair( (uint8_t)Object->getBufferStart()[ELF::EI_CLASS]
+ , (uint8_t)Object->getBufferStart()[ELF::EI_DATA]);
+}
+
+namespace llvm {
+
+ ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
+ std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
+ if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
+ return new ELFObjectFile<support::little, false>(Object);
+ else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
+ return new ELFObjectFile<support::big, false>(Object);
+ else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB)
+ return new ELFObjectFile<support::little, true>(Object);
+ else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
+ return new ELFObjectFile<support::big, true>(Object);
+ // FIXME: Proper error handling.
+ report_fatal_error("Not an ELF object file!");
+ }
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Object/MachOObject.cpp b/contrib/llvm/lib/Object/MachOObject.cpp
new file mode 100644
index 0000000..5e64d63
--- /dev/null
+++ b/contrib/llvm/lib/Object/MachOObject.cpp
@@ -0,0 +1,342 @@
+//===- MachOObject.cpp - Mach-O Object File Wrapper -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/MachOObject.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+/* Translation Utilities */
+
+template<typename T>
+static void SwapValue(T &Value) {
+ Value = sys::SwapByteOrder(Value);
+}
+
+template<typename T>
+static void SwapStruct(T &Value);
+
+template<typename T>
+static void ReadInMemoryStruct(const MachOObject &MOO,
+ StringRef Buffer, uint64_t Base,
+ InMemoryStruct<T> &Res) {
+ typedef T struct_type;
+ uint64_t Size = sizeof(struct_type);
+
+ // Check that the buffer contains the expected data.
+ if (Base + Size > Buffer.size()) {
+ Res = 0;
+ return;
+ }
+
+ // Check whether we can return a direct pointer.
+ struct_type *Ptr = (struct_type *) (Buffer.data() + Base);
+ if (!MOO.isSwappedEndian()) {
+ Res = Ptr;
+ return;
+ }
+
+ // Otherwise, copy the struct and translate the values.
+ Res = *Ptr;
+ SwapStruct(*Res);
+}
+
+/* *** */
+
+MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_,
+ bool Is64Bit_)
+ : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_),
+ IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()),
+ HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) {
+ // Load the common header.
+ memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header));
+ if (IsSwappedEndian) {
+ SwapValue(Header.Magic);
+ SwapValue(Header.CPUType);
+ SwapValue(Header.CPUSubtype);
+ SwapValue(Header.FileType);
+ SwapValue(Header.NumLoadCommands);
+ SwapValue(Header.SizeOfLoadCommands);
+ SwapValue(Header.Flags);
+ }
+
+ if (is64Bit()) {
+ memcpy(&Header64Ext, Buffer->getBuffer().data() + sizeof(Header),
+ sizeof(Header64Ext));
+ if (IsSwappedEndian) {
+ SwapValue(Header64Ext.Reserved);
+ }
+ }
+
+ // Create the load command array if sane.
+ if (getHeader().NumLoadCommands < (1 << 20))
+ LoadCommands = new LoadCommandInfo[getHeader().NumLoadCommands];
+}
+
+MachOObject::~MachOObject() {
+ delete [] LoadCommands;
+}
+
+MachOObject *MachOObject::LoadFromBuffer(MemoryBuffer *Buffer,
+ std::string *ErrorStr) {
+ // First, check the magic value and initialize the basic object info.
+ bool IsLittleEndian = false, Is64Bit = false;
+ StringRef Magic = Buffer->getBuffer().slice(0, 4);
+ if (Magic == "\xFE\xED\xFA\xCE") {
+ } else if (Magic == "\xCE\xFA\xED\xFE") {
+ IsLittleEndian = true;
+ } else if (Magic == "\xFE\xED\xFA\xCF") {
+ Is64Bit = true;
+ } else if (Magic == "\xCF\xFA\xED\xFE") {
+ IsLittleEndian = true;
+ Is64Bit = true;
+ } else {
+ if (ErrorStr) *ErrorStr = "not a Mach object file (invalid magic)";
+ return 0;
+ }
+
+ // Ensure that the at least the full header is present.
+ unsigned HeaderSize = Is64Bit ? macho::Header64Size : macho::Header32Size;
+ if (Buffer->getBufferSize() < HeaderSize) {
+ if (ErrorStr) *ErrorStr = "not a Mach object file (invalid header)";
+ return 0;
+ }
+
+ OwningPtr<MachOObject> Object(new MachOObject(Buffer, IsLittleEndian,
+ Is64Bit));
+
+ // Check for bogus number of load commands.
+ if (Object->getHeader().NumLoadCommands >= (1 << 20)) {
+ if (ErrorStr) *ErrorStr = "not a Mach object file (unreasonable header)";
+ return 0;
+ }
+
+ if (ErrorStr) *ErrorStr = "";
+ return Object.take();
+}
+
+StringRef MachOObject::getData(size_t Offset, size_t Size) const {
+ return Buffer->getBuffer().substr(Offset,Size);
+}
+
+void MachOObject::RegisterStringTable(macho::SymtabLoadCommand &SLC) {
+ HasStringTable = true;
+ StringTable = Buffer->getBuffer().substr(SLC.StringTableOffset,
+ SLC.StringTableSize);
+}
+
+const MachOObject::LoadCommandInfo &
+MachOObject::getLoadCommandInfo(unsigned Index) const {
+ assert(Index < getHeader().NumLoadCommands && "Invalid index!");
+
+ // Load the command, if necessary.
+ if (Index >= NumLoadedCommands) {
+ uint64_t Offset;
+ if (Index == 0) {
+ Offset = getHeaderSize();
+ } else {
+ const LoadCommandInfo &Prev = getLoadCommandInfo(Index - 1);
+ Offset = Prev.Offset + Prev.Command.Size;
+ }
+
+ LoadCommandInfo &Info = LoadCommands[Index];
+ memcpy(&Info.Command, Buffer->getBuffer().data() + Offset,
+ sizeof(macho::LoadCommand));
+ if (IsSwappedEndian) {
+ SwapValue(Info.Command.Type);
+ SwapValue(Info.Command.Size);
+ }
+ Info.Offset = Offset;
+ NumLoadedCommands = Index + 1;
+ }
+
+ return LoadCommands[Index];
+}
+
+template<>
+void SwapStruct(macho::SegmentLoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.VMAddress);
+ SwapValue(Value.VMSize);
+ SwapValue(Value.FileOffset);
+ SwapValue(Value.FileSize);
+ SwapValue(Value.MaxVMProtection);
+ SwapValue(Value.InitialVMProtection);
+ SwapValue(Value.NumSections);
+ SwapValue(Value.Flags);
+}
+void MachOObject::ReadSegmentLoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::SegmentLoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Segment64LoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.VMAddress);
+ SwapValue(Value.VMSize);
+ SwapValue(Value.FileOffset);
+ SwapValue(Value.FileSize);
+ SwapValue(Value.MaxVMProtection);
+ SwapValue(Value.InitialVMProtection);
+ SwapValue(Value.NumSections);
+ SwapValue(Value.Flags);
+}
+void MachOObject::ReadSegment64LoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::Segment64LoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::SymtabLoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.SymbolTableOffset);
+ SwapValue(Value.NumSymbolTableEntries);
+ SwapValue(Value.StringTableOffset);
+ SwapValue(Value.StringTableSize);
+}
+void MachOObject::ReadSymtabLoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::SymtabLoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::DysymtabLoadCommand &Value) {
+ SwapValue(Value.Type);
+ SwapValue(Value.Size);
+ SwapValue(Value.LocalSymbolsIndex);
+ SwapValue(Value.NumLocalSymbols);
+ SwapValue(Value.ExternalSymbolsIndex);
+ SwapValue(Value.NumExternalSymbols);
+ SwapValue(Value.UndefinedSymbolsIndex);
+ SwapValue(Value.NumUndefinedSymbols);
+ SwapValue(Value.TOCOffset);
+ SwapValue(Value.NumTOCEntries);
+ SwapValue(Value.ModuleTableOffset);
+ SwapValue(Value.NumModuleTableEntries);
+ SwapValue(Value.ReferenceSymbolTableOffset);
+ SwapValue(Value.NumReferencedSymbolTableEntries);
+ SwapValue(Value.IndirectSymbolTableOffset);
+ SwapValue(Value.NumIndirectSymbolTableEntries);
+ SwapValue(Value.ExternalRelocationTableOffset);
+ SwapValue(Value.NumExternalRelocationTableEntries);
+ SwapValue(Value.LocalRelocationTableOffset);
+ SwapValue(Value.NumLocalRelocationTableEntries);
+}
+void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI,
+ InMemoryStruct<macho::DysymtabLoadCommand> &Res) const {
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::IndirectSymbolTableEntry &Value) {
+ SwapValue(Value.Index);
+}
+void
+MachOObject::ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC,
+ unsigned Index,
+ InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const {
+ uint64_t Offset = (DLC.IndirectSymbolTableOffset +
+ Index * sizeof(macho::IndirectSymbolTableEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+
+template<>
+void SwapStruct(macho::Section &Value) {
+ SwapValue(Value.Address);
+ SwapValue(Value.Size);
+ SwapValue(Value.Offset);
+ SwapValue(Value.Align);
+ SwapValue(Value.RelocationTableOffset);
+ SwapValue(Value.NumRelocationTableEntries);
+ SwapValue(Value.Flags);
+ SwapValue(Value.Reserved1);
+ SwapValue(Value.Reserved2);
+}
+void MachOObject::ReadSection(const LoadCommandInfo &LCI,
+ unsigned Index,
+ InMemoryStruct<macho::Section> &Res) const {
+ assert(LCI.Command.Type == macho::LCT_Segment &&
+ "Unexpected load command info!");
+ uint64_t Offset = (LCI.Offset + sizeof(macho::SegmentLoadCommand) +
+ Index * sizeof(macho::Section));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Section64 &Value) {
+ SwapValue(Value.Address);
+ SwapValue(Value.Size);
+ SwapValue(Value.Offset);
+ SwapValue(Value.Align);
+ SwapValue(Value.RelocationTableOffset);
+ SwapValue(Value.NumRelocationTableEntries);
+ SwapValue(Value.Flags);
+ SwapValue(Value.Reserved1);
+ SwapValue(Value.Reserved2);
+ SwapValue(Value.Reserved3);
+}
+void MachOObject::ReadSection64(const LoadCommandInfo &LCI,
+ unsigned Index,
+ InMemoryStruct<macho::Section64> &Res) const {
+ assert(LCI.Command.Type == macho::LCT_Segment64 &&
+ "Unexpected load command info!");
+ uint64_t Offset = (LCI.Offset + sizeof(macho::Segment64LoadCommand) +
+ Index * sizeof(macho::Section64));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::RelocationEntry &Value) {
+ SwapValue(Value.Word0);
+ SwapValue(Value.Word1);
+}
+void MachOObject::ReadRelocationEntry(uint64_t RelocationTableOffset,
+ unsigned Index,
+ InMemoryStruct<macho::RelocationEntry> &Res) const {
+ uint64_t Offset = (RelocationTableOffset +
+ Index * sizeof(macho::RelocationEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::SymbolTableEntry &Value) {
+ SwapValue(Value.StringIndex);
+ SwapValue(Value.Flags);
+ SwapValue(Value.Value);
+}
+void MachOObject::ReadSymbolTableEntry(uint64_t SymbolTableOffset,
+ unsigned Index,
+ InMemoryStruct<macho::SymbolTableEntry> &Res) const {
+ uint64_t Offset = (SymbolTableOffset +
+ Index * sizeof(macho::SymbolTableEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Symbol64TableEntry &Value) {
+ SwapValue(Value.StringIndex);
+ SwapValue(Value.Flags);
+ SwapValue(Value.Value);
+}
+void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
+ unsigned Index,
+ InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
+ uint64_t Offset = (SymbolTableOffset +
+ Index * sizeof(macho::Symbol64TableEntry));
+ ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
diff --git a/contrib/llvm/lib/Object/Makefile b/contrib/llvm/lib/Object/Makefile
new file mode 100644
index 0000000..79388dc
--- /dev/null
+++ b/contrib/llvm/lib/Object/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Object/Makefile ---------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMObject
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp
new file mode 100644
index 0000000..161ae3a
--- /dev/null
+++ b/contrib/llvm/lib/Object/ObjectFile.cpp
@@ -0,0 +1,71 @@
+//===- ObjectFile.cpp - File format independent object file -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a file format independent ObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/system_error.h"
+
+using namespace llvm;
+using namespace object;
+
+ObjectFile::ObjectFile(MemoryBuffer *Object)
+ : MapFile(Object) {
+ assert(MapFile && "Must be a valid MemoryBuffer!");
+ base = reinterpret_cast<const uint8_t *>(MapFile->getBufferStart());
+}
+
+ObjectFile::~ObjectFile() {
+ delete MapFile;
+}
+
+StringRef ObjectFile::getFilename() const {
+ return MapFile->getBufferIdentifier();
+}
+
+ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
+ if (!Object || Object->getBufferSize() < 64)
+ return 0;
+ sys::LLVMFileType type = sys::IdentifyFileType(Object->getBufferStart(),
+ static_cast<unsigned>(Object->getBufferSize()));
+ switch (type) {
+ case sys::ELF_Relocatable_FileType:
+ case sys::ELF_Executable_FileType:
+ case sys::ELF_SharedObject_FileType:
+ case sys::ELF_Core_FileType:
+ return createELFObjectFile(Object);
+ case sys::Mach_O_Object_FileType:
+ case sys::Mach_O_Executable_FileType:
+ case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case sys::Mach_O_Core_FileType:
+ case sys::Mach_O_PreloadExecutable_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case sys::Mach_O_DynamicLinker_FileType:
+ case sys::Mach_O_Bundle_FileType:
+ case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ return 0;
+ case sys::COFF_FileType:
+ return createCOFFObjectFile(Object);
+ default:
+ llvm_unreachable("Unknown Object File Type");
+ }
+}
+
+ObjectFile *ObjectFile::createObjectFile(StringRef ObjectPath) {
+ OwningPtr<MemoryBuffer> File;
+ if (error_code ec = MemoryBuffer::getFile(ObjectPath, File))
+ return NULL;
+ return createObjectFile(File.take());
+}
diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp
index b87ddf9..e765ba0 100644
--- a/contrib/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm/lib/Support/APFloat.cpp
@@ -175,7 +175,7 @@ totalExponent(StringRef::iterator p, StringRef::iterator end,
{
int unsignedExponent;
bool negative, overflow;
- int exponent;
+ int exponent = 0;
assert(p != end && "Exponent has no digits");
@@ -194,11 +194,11 @@ totalExponent(StringRef::iterator p, StringRef::iterator end,
assert(value < 10U && "Invalid character in exponent");
unsignedExponent = unsignedExponent * 10 + value;
- if (unsignedExponent > 65535)
+ if (unsignedExponent > 32767)
overflow = true;
}
- if (exponentAdjustment > 65535 || exponentAdjustment < -65536)
+ if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
overflow = true;
if (!overflow) {
@@ -206,12 +206,12 @@ totalExponent(StringRef::iterator p, StringRef::iterator end,
if (negative)
exponent = -exponent;
exponent += exponentAdjustment;
- if (exponent > 65535 || exponent < -65536)
+ if (exponent > 32767 || exponent < -32768)
overflow = true;
}
if (overflow)
- exponent = negative ? -65536: 65535;
+ exponent = negative ? -32768: 32767;
return exponent;
}
@@ -3197,6 +3197,12 @@ APFloat::initFromAPInt(const APInt& api, bool isIEEE)
llvm_unreachable(0);
}
+APFloat
+APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
+{
+ return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
+}
+
APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
APFloat Val(Sem, fcNormal, Negative);
@@ -3258,14 +3264,12 @@ APFloat::APFloat(const APInt& api, bool isIEEE)
APFloat::APFloat(float f)
{
- APInt api = APInt(32, 0);
- initFromAPInt(api.floatToBits(f));
+ initFromAPInt(APInt::floatToBits(f));
}
APFloat::APFloat(double d)
{
- APInt api = APInt(64, 0);
- initFromAPInt(api.doubleToBits(d));
+ initFromAPInt(APInt::doubleToBits(d));
}
namespace {
@@ -3312,7 +3316,7 @@ namespace {
// Truncate the significand down to its active bit count, but
// don't try to drop below 32.
unsigned newPrecision = std::max(32U, significand.getActiveBits());
- significand.trunc(newPrecision);
+ significand = significand.trunc(newPrecision);
}
@@ -3417,7 +3421,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
// Nothing to do.
} else if (exp > 0) {
// Just shift left.
- significand.zext(semantics->precision + exp);
+ significand = significand.zext(semantics->precision + exp);
significand <<= exp;
exp = 0;
} else { /* exp < 0 */
@@ -3436,7 +3440,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
// Multiply significand by 5^e.
// N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
- significand.zext(precision);
+ significand = significand.zext(precision);
APInt five_to_the_i(precision, 5);
while (true) {
if (texp & 1) significand *= five_to_the_i;
diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp
index 8a212a2..7703342 100644
--- a/contrib/llvm/lib/Support/APInt.cpp
+++ b/contrib/llvm/lib/Support/APInt.cpp
@@ -361,7 +361,7 @@ APInt& APInt::operator*=(const APInt& RHS) {
unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
if (!rhsWords) {
// X * 0 ===> 0
- clear();
+ clearAllBits();
return *this;
}
@@ -373,7 +373,7 @@ APInt& APInt::operator*=(const APInt& RHS) {
mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
// Copy result back into *this
- clear();
+ clearAllBits();
unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
@@ -483,6 +483,7 @@ APInt APInt::operator-(const APInt& RHS) const {
}
bool APInt::operator[](unsigned bitPosition) const {
+ assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
return (maskBit(bitPosition) &
(isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != 0;
}
@@ -561,12 +562,12 @@ bool APInt::slt(const APInt& RHS) const {
bool rhsNeg = rhs.isNegative();
if (lhsNeg) {
// Sign bit is set so perform two's complement to make it positive
- lhs.flip();
+ lhs.flipAllBits();
lhs++;
}
if (rhsNeg) {
// Sign bit is set so perform two's complement to make it positive
- rhs.flip();
+ rhs.flipAllBits();
rhs++;
}
@@ -583,22 +584,20 @@ bool APInt::slt(const APInt& RHS) const {
return lhs.ult(rhs);
}
-APInt& APInt::set(unsigned bitPosition) {
+void APInt::setBit(unsigned bitPosition) {
if (isSingleWord())
VAL |= maskBit(bitPosition);
else
pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
- return *this;
}
/// Set the given bit to 0 whose position is given as "bitPosition".
/// @brief Set a given bit to 0.
-APInt& APInt::clear(unsigned bitPosition) {
+void APInt::clearBit(unsigned bitPosition) {
if (isSingleWord())
VAL &= ~maskBit(bitPosition);
else
pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
- return *this;
}
/// @brief Toggle every bit to its opposite value.
@@ -606,11 +605,10 @@ APInt& APInt::clear(unsigned bitPosition) {
/// Toggle a given bit to its opposite value whose position is given
/// as "bitPosition".
/// @brief Toggles a given bit to its opposite value.
-APInt& APInt::flip(unsigned bitPosition) {
+void APInt::flipBit(unsigned bitPosition) {
assert(bitPosition < BitWidth && "Out of the bit-width range!");
- if ((*this)[bitPosition]) clear(bitPosition);
- else set(bitPosition);
- return *this;
+ if ((*this)[bitPosition]) clearBit(bitPosition);
+ else setBit(bitPosition);
}
unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
@@ -761,10 +759,6 @@ APInt APInt::getLoBits(unsigned numBits) const {
BitWidth - numBits);
}
-bool APInt::isPowerOf2() const {
- return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
-}
-
unsigned APInt::countLeadingZerosSlowCase() const {
// Treat the most significand word differently because it might have
// meaningless bits set beyond the precision.
@@ -1001,96 +995,90 @@ double APInt::roundToDouble(bool isSigned) const {
}
// Truncate to new width.
-APInt &APInt::trunc(unsigned width) {
+APInt APInt::trunc(unsigned width) const {
assert(width < BitWidth && "Invalid APInt Truncate request");
assert(width && "Can't truncate to 0 bits");
- unsigned wordsBefore = getNumWords();
- BitWidth = width;
- unsigned wordsAfter = getNumWords();
- if (wordsBefore != wordsAfter) {
- if (wordsAfter == 1) {
- uint64_t *tmp = pVal;
- VAL = pVal[0];
- delete [] tmp;
- } else {
- uint64_t *newVal = getClearedMemory(wordsAfter);
- for (unsigned i = 0; i < wordsAfter; ++i)
- newVal[i] = pVal[i];
- delete [] pVal;
- pVal = newVal;
- }
- }
- return clearUnusedBits();
+
+ if (width <= APINT_BITS_PER_WORD)
+ return APInt(width, getRawData()[0]);
+
+ APInt Result(getMemory(getNumWords(width)), width);
+
+ // Copy full words.
+ unsigned i;
+ for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
+ Result.pVal[i] = pVal[i];
+
+ // Truncate and copy any partial word.
+ unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
+ if (bits != 0)
+ Result.pVal[i] = pVal[i] << bits >> bits;
+
+ return Result;
}
// Sign extend to a new width.
-APInt &APInt::sext(unsigned width) {
+APInt APInt::sext(unsigned width) const {
assert(width > BitWidth && "Invalid APInt SignExtend request");
- // If the sign bit isn't set, this is the same as zext.
- if (!isNegative()) {
- zext(width);
- return *this;
+
+ if (width <= APINT_BITS_PER_WORD) {
+ uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
+ val = (int64_t)val >> (width - BitWidth);
+ return APInt(width, val >> (APINT_BITS_PER_WORD - width));
}
- // The sign bit is set. First, get some facts
- unsigned wordsBefore = getNumWords();
- unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
- BitWidth = width;
- unsigned wordsAfter = getNumWords();
-
- // Mask the high order word appropriately
- if (wordsBefore == wordsAfter) {
- unsigned newWordBits = width % APINT_BITS_PER_WORD;
- // The extension is contained to the wordsBefore-1th word.
- uint64_t mask = ~0ULL;
- if (newWordBits)
- mask >>= APINT_BITS_PER_WORD - newWordBits;
- mask <<= wordBits;
- if (wordsBefore == 1)
- VAL |= mask;
- else
- pVal[wordsBefore-1] |= mask;
- return clearUnusedBits();
+ APInt Result(getMemory(getNumWords(width)), width);
+
+ // Copy full words.
+ unsigned i;
+ uint64_t word = 0;
+ for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
+ word = getRawData()[i];
+ Result.pVal[i] = word;
}
- uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
- uint64_t *newVal = getMemory(wordsAfter);
- if (wordsBefore == 1)
- newVal[0] = VAL | mask;
- else {
- for (unsigned i = 0; i < wordsBefore; ++i)
- newVal[i] = pVal[i];
- newVal[wordsBefore-1] |= mask;
+ // Read and sign-extend any partial word.
+ unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
+ if (bits != 0)
+ word = (int64_t)getRawData()[i] << bits >> bits;
+ else
+ word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
+
+ // Write remaining full words.
+ for (; i != width / APINT_BITS_PER_WORD; i++) {
+ Result.pVal[i] = word;
+ word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
}
- for (unsigned i = wordsBefore; i < wordsAfter; i++)
- newVal[i] = -1ULL;
- if (wordsBefore != 1)
- delete [] pVal;
- pVal = newVal;
- return clearUnusedBits();
+
+ // Write any partial word.
+ bits = (0 - width) % APINT_BITS_PER_WORD;
+ if (bits != 0)
+ Result.pVal[i] = word << bits >> bits;
+
+ return Result;
}
// Zero extend to a new width.
-APInt &APInt::zext(unsigned width) {
+APInt APInt::zext(unsigned width) const {
assert(width > BitWidth && "Invalid APInt ZeroExtend request");
- unsigned wordsBefore = getNumWords();
- BitWidth = width;
- unsigned wordsAfter = getNumWords();
- if (wordsBefore != wordsAfter) {
- uint64_t *newVal = getClearedMemory(wordsAfter);
- if (wordsBefore == 1)
- newVal[0] = VAL;
- else
- for (unsigned i = 0; i < wordsBefore; ++i)
- newVal[i] = pVal[i];
- if (wordsBefore != 1)
- delete [] pVal;
- pVal = newVal;
- }
- return *this;
+
+ if (width <= APINT_BITS_PER_WORD)
+ return APInt(width, VAL);
+
+ APInt Result(getMemory(getNumWords(width)), width);
+
+ // Copy words.
+ unsigned i;
+ for (i = 0; i != getNumWords(); i++)
+ Result.pVal[i] = getRawData()[i];
+
+ // Zero remaining words.
+ memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
+
+ return Result;
}
-APInt &APInt::zextOrTrunc(unsigned width) {
+APInt APInt::zextOrTrunc(unsigned width) const {
if (BitWidth < width)
return zext(width);
if (BitWidth > width)
@@ -1098,7 +1086,7 @@ APInt &APInt::zextOrTrunc(unsigned width) {
return *this;
}
-APInt &APInt::sextOrTrunc(unsigned width) {
+APInt APInt::sextOrTrunc(unsigned width) const {
if (BitWidth < width)
return sext(width);
if (BitWidth > width)
@@ -1873,7 +1861,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
if (!Quotient->isSingleWord())
Quotient->pVal = getClearedMemory(Quotient->getNumWords());
} else
- Quotient->clear();
+ Quotient->clearAllBits();
// The quotient is in Q. Reconstitute the quotient into Quotient's low
// order words.
@@ -1904,7 +1892,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
if (!Remainder->isSingleWord())
Remainder->pVal = getClearedMemory(Remainder->getNumWords());
} else
- Remainder->clear();
+ Remainder->clearAllBits();
// The remainder is in R. Reconstitute the remainder into Remainder's low
// order words.
@@ -2046,6 +2034,64 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS,
divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
}
+APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this+RHS;
+ Overflow = isNonNegative() == RHS.isNonNegative() &&
+ Res.isNonNegative() != isNonNegative();
+ return Res;
+}
+
+APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this+RHS;
+ Overflow = Res.ult(RHS);
+ return Res;
+}
+
+APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this - RHS;
+ Overflow = isNonNegative() != RHS.isNonNegative() &&
+ Res.isNonNegative() != isNonNegative();
+ return Res;
+}
+
+APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this-RHS;
+ Overflow = Res.ugt(*this);
+ return Res;
+}
+
+APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
+ // MININT/-1 --> overflow.
+ Overflow = isMinSignedValue() && RHS.isAllOnesValue();
+ return sdiv(RHS);
+}
+
+APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
+ APInt Res = *this * RHS;
+
+ if (*this != 0 && RHS != 0)
+ Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
+ else
+ Overflow = false;
+ return Res;
+}
+
+APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
+ Overflow = ShAmt >= getBitWidth();
+ if (Overflow)
+ ShAmt = getBitWidth()-1;
+
+ if (isNonNegative()) // Don't allow sign change.
+ Overflow = ShAmt >= countLeadingZeros();
+ else
+ Overflow = ShAmt >= countLeadingOnes();
+
+ return *this << ShAmt;
+}
+
+
+
+
void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
// Check our assumptions here
assert(!str.empty() && "Invalid string length");
@@ -2101,7 +2147,7 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
// If its negative, put it in two's complement form
if (isNeg) {
(*this)--;
- this->flip();
+ this->flipAllBits();
}
}
@@ -2149,7 +2195,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
// They want to print the signed version and it is a negative value
// Flip the bits and add one to turn it into the equivalent positive
// value and put a '-' in the result.
- Tmp.flip();
+ Tmp.flipAllBits();
Tmp++;
Str.push_back('-');
}
diff --git a/contrib/llvm/lib/Support/Allocator.cpp b/contrib/llvm/lib/Support/Allocator.cpp
index 90df262..5e27df6 100644
--- a/contrib/llvm/lib/Support/Allocator.cpp
+++ b/contrib/llvm/lib/Support/Allocator.cpp
@@ -12,10 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Allocator.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Recycler.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Memory.h"
+#include "llvm/Support/Memory.h"
#include <cstring>
namespace llvm {
@@ -44,6 +44,12 @@ char *BumpPtrAllocator::AlignPtr(char *Ptr, size_t Alignment) {
/// StartNewSlab - Allocate a new slab and move the bump pointers over into
/// the new slab. Modifies CurPtr and End.
void BumpPtrAllocator::StartNewSlab() {
+ // If we allocated a big number of slabs already it's likely that we're going
+ // to allocate more. Increase slab size to reduce mallocs and possibly memory
+ // overhead. The factors are chosen conservatively to avoid overallocation.
+ if (BytesAllocated >= SlabSize * 128)
+ SlabSize *= 2;
+
MemSlab *NewSlab = Allocator.Allocate(SlabSize);
NewSlab->NextPtr = CurSlab;
CurSlab = NewSlab;
diff --git a/contrib/llvm/lib/System/Atomic.cpp b/contrib/llvm/lib/Support/Atomic.cpp
index 7ba8b77..c7b4bff 100644
--- a/contrib/llvm/lib/System/Atomic.cpp
+++ b/contrib/llvm/lib/Support/Atomic.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/Atomic.h"
+#include "llvm/Support/Atomic.h"
#include "llvm/Config/config.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
index ae66110..7e74499 100644
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -22,9 +22,10 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
#include "llvm/Target/TargetRegistry.h"
-#include "llvm/System/Host.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Path.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
@@ -179,6 +180,45 @@ static Option *LookupOption(StringRef &Arg, StringRef &Value,
return I->second;
}
+/// LookupNearestOption - Lookup the closest match to the option specified by
+/// the specified option on the command line. If there is a value specified
+/// (after an equal sign) return that as well. This assumes that leading dashes
+/// have already been stripped.
+static Option *LookupNearestOption(StringRef Arg,
+ const StringMap<Option*> &OptionsMap,
+ const char *&NearestString) {
+ // Reject all dashes.
+ if (Arg.empty()) return 0;
+
+ // Split on any equal sign.
+ StringRef LHS = Arg.split('=').first;
+
+ // Find the closest match.
+ Option *Best = 0;
+ unsigned BestDistance = 0;
+ for (StringMap<Option*>::const_iterator it = OptionsMap.begin(),
+ ie = OptionsMap.end(); it != ie; ++it) {
+ Option *O = it->second;
+ SmallVector<const char*, 16> OptionNames;
+ O->getExtraOptionNames(OptionNames);
+ if (O->ArgStr[0])
+ OptionNames.push_back(O->ArgStr);
+
+ for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
+ StringRef Name = OptionNames[i];
+ unsigned Distance = StringRef(Name).edit_distance(
+ Arg, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance);
+ if (!Best || Distance < BestDistance) {
+ Best = O;
+ NearestString = OptionNames[i];
+ BestDistance = Distance;
+ }
+ }
+ }
+
+ return Best;
+}
+
/// CommaSeparateAndAddOccurence - A wrapper around Handler->addOccurence() that
/// does special handling of cl::CommaSeparated options.
static bool CommaSeparateAndAddOccurence(Option *Handler, unsigned pos,
@@ -463,10 +503,6 @@ static void ExpandResponseFiles(unsigned argc, char** argv,
const sys::FileStatus *FileStat = respFile.getFileStatus();
if (FileStat && FileStat->getSize() != 0) {
- // Mmap the response file into memory.
- OwningPtr<MemoryBuffer>
- respFilePtr(MemoryBuffer::getFile(respFile.c_str()));
-
// If we could open the file, parse its contents, otherwise
// pass the @file option verbatim.
@@ -475,7 +511,9 @@ static void ExpandResponseFiles(unsigned argc, char** argv,
// itself contain additional @file options; any such options will be
// processed recursively.")
- if (respFilePtr != 0) {
+ // Mmap the response file into memory.
+ OwningPtr<MemoryBuffer> respFilePtr;
+ if (!MemoryBuffer::getFile(respFile.c_str(), respFilePtr)) {
ParseCStringVector(newArgv, respFilePtr->getBufferStart());
continue;
}
@@ -506,7 +544,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
}
// Copy the program name into ProgName, making sure not to overflow it.
- std::string ProgName = sys::Path(argv[0]).getLast();
+ std::string ProgName = sys::path::filename(argv[0]);
size_t Len = std::min(ProgName.size(), size_t(79));
memcpy(ProgramName, ProgName.data(), Len);
ProgramName[Len] = '\0';
@@ -572,6 +610,8 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
bool DashDashFound = false; // Have we read '--'?
for (int i = 1; i < argc; ++i) {
Option *Handler = 0;
+ Option *NearestHandler = 0;
+ const char *NearestHandlerString = 0;
StringRef Value;
StringRef ArgName = "";
@@ -645,12 +685,25 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
if (Handler == 0)
Handler = HandlePrefixedOrGroupedOption(ArgName, Value,
ErrorParsing, Opts);
+
+ // Otherwise, look for the closest available option to report to the user
+ // in the upcoming error.
+ if (Handler == 0 && SinkOpts.empty())
+ NearestHandler = LookupNearestOption(ArgName, Opts,
+ NearestHandlerString);
}
if (Handler == 0) {
if (SinkOpts.empty()) {
errs() << ProgramName << ": Unknown command line argument '"
<< argv[i] << "'. Try: '" << argv[0] << " -help'\n";
+
+ if (NearestHandler) {
+ // If we know a near match, report it as well.
+ errs() << ProgramName << ": Did you mean '-"
+ << NearestHandlerString << "'?\n";
+ }
+
ErrorParsing = true;
} else {
for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(),
@@ -765,6 +818,15 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
}
}
+ // Now that we know if -debug is specified, we can use it.
+ // Note that if ReadResponseFiles == true, this must be done before the
+ // memory allocated for the expanded command line is free()d below.
+ DEBUG(dbgs() << "Args: ";
+ for (int i = 0; i < argc; ++i)
+ dbgs() << argv[i] << ' ';
+ dbgs() << '\n';
+ );
+
// Free all of the memory allocated to the map. Command line options may only
// be processed once!
Opts.clear();
@@ -779,12 +841,6 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
free(*i);
}
- DEBUG(dbgs() << "Args: ";
- for (int i = 0; i < argc; ++i)
- dbgs() << argv[i] << ' ';
- dbgs() << '\n';
- );
-
// If we had an error processing our arguments, don't let the program execute
if (ErrorParsing) exit(1);
}
diff --git a/contrib/llvm/lib/Support/ConstantRange.cpp b/contrib/llvm/lib/Support/ConstantRange.cpp
index 8ef3785..493f708 100644
--- a/contrib/llvm/lib/Support/ConstantRange.cpp
+++ b/contrib/llvm/lib/Support/ConstantRange.cpp
@@ -51,6 +51,9 @@ ConstantRange::ConstantRange(const APInt &L, const APInt &U) :
ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
const ConstantRange &CR) {
+ if (CR.isEmptySet())
+ return CR;
+
uint32_t W = CR.getBitWidth();
switch (Pred) {
default: assert(!"Invalid ICmp predicate to makeICmpRegion()");
@@ -60,10 +63,18 @@ ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
if (CR.isSingleElement())
return ConstantRange(CR.getUpper(), CR.getLower());
return ConstantRange(W);
- case ICmpInst::ICMP_ULT:
- return ConstantRange(APInt::getMinValue(W), CR.getUnsignedMax());
- case ICmpInst::ICMP_SLT:
- return ConstantRange(APInt::getSignedMinValue(W), CR.getSignedMax());
+ case ICmpInst::ICMP_ULT: {
+ APInt UMax(CR.getUnsignedMax());
+ if (UMax.isMinValue())
+ return ConstantRange(W, /* empty */ false);
+ return ConstantRange(APInt::getMinValue(W), UMax);
+ }
+ case ICmpInst::ICMP_SLT: {
+ APInt SMax(CR.getSignedMax());
+ if (SMax.isMinSignedValue())
+ return ConstantRange(W, /* empty */ false);
+ return ConstantRange(APInt::getSignedMinValue(W), SMax);
+ }
case ICmpInst::ICMP_ULE: {
APInt UMax(CR.getUnsignedMax());
if (UMax.isMaxValue())
@@ -72,15 +83,22 @@ ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
}
case ICmpInst::ICMP_SLE: {
APInt SMax(CR.getSignedMax());
- if (SMax.isMaxSignedValue() || (SMax+1).isMaxSignedValue())
+ if (SMax.isMaxSignedValue())
return ConstantRange(W);
return ConstantRange(APInt::getSignedMinValue(W), SMax + 1);
}
- case ICmpInst::ICMP_UGT:
- return ConstantRange(CR.getUnsignedMin() + 1, APInt::getNullValue(W));
- case ICmpInst::ICMP_SGT:
- return ConstantRange(CR.getSignedMin() + 1,
- APInt::getSignedMinValue(W));
+ case ICmpInst::ICMP_UGT: {
+ APInt UMin(CR.getUnsignedMin());
+ if (UMin.isMaxValue())
+ return ConstantRange(W, /* empty */ false);
+ return ConstantRange(UMin + 1, APInt::getNullValue(W));
+ }
+ case ICmpInst::ICMP_SGT: {
+ APInt SMin(CR.getSignedMin());
+ if (SMin.isMaxSignedValue())
+ return ConstantRange(W, /* empty */ false);
+ return ConstantRange(SMin + 1, APInt::getSignedMinValue(W));
+ }
case ICmpInst::ICMP_UGE: {
APInt UMin(CR.getUnsignedMin());
if (UMin.isMinValue())
@@ -115,6 +133,14 @@ bool ConstantRange::isWrappedSet() const {
return Lower.ugt(Upper);
}
+/// isSignWrappedSet - Return true if this set wraps around the INT_MIN of
+/// its bitwidth, for example: i8 [120, 140).
+///
+bool ConstantRange::isSignWrappedSet() const {
+ return contains(APInt::getSignedMaxValue(getBitWidth())) &&
+ contains(APInt::getSignedMinValue(getBitWidth()));
+}
+
/// getSetSize - Return the number of elements in this set.
///
APInt ConstantRange::getSetSize() const {
@@ -408,15 +434,15 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
/// correspond to the possible range of values as if the source range had been
/// zero extended.
ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
+ if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+
unsigned SrcTySize = getBitWidth();
assert(SrcTySize < DstTySize && "Not a value extension");
- if (isFullSet())
- // Change a source full set into [0, 1 << 8*numbytes)
+ if (isFullSet() || isWrappedSet())
+ // Change into [0, 1 << src bit width)
return ConstantRange(APInt(DstTySize,0), APInt(DstTySize,1).shl(SrcTySize));
- APInt L = Lower; L.zext(DstTySize);
- APInt U = Upper; U.zext(DstTySize);
- return ConstantRange(L, U);
+ return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize));
}
/// signExtend - Return a new range in the specified integer type, which must
@@ -424,16 +450,16 @@ ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
/// correspond to the possible range of values as if the source range had been
/// sign extended.
ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
+ if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+
unsigned SrcTySize = getBitWidth();
assert(SrcTySize < DstTySize && "Not a value extension");
- if (isFullSet()) {
+ if (isFullSet() || isSignWrappedSet()) {
return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1),
APInt::getLowBitsSet(DstTySize, SrcTySize-1) + 1);
}
- APInt L = Lower; L.sext(DstTySize);
- APInt U = Upper; U.sext(DstTySize);
- return ConstantRange(L, U);
+ return ConstantRange(Lower.sext(DstTySize), Upper.sext(DstTySize));
}
/// truncate - Return a new range in the specified integer type, which must be
@@ -447,9 +473,7 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
if (isFullSet() || getSetSize().ugt(Size))
return ConstantRange(DstTySize, /*isFullSet=*/true);
- APInt L = Lower; L.trunc(DstTySize);
- APInt U = Upper; U.trunc(DstTySize);
- return ConstantRange(L, U);
+ return ConstantRange(Lower.trunc(DstTySize), Upper.trunc(DstTySize));
}
/// zextOrTrunc - make this range have the bit width given by \p DstTySize. The
@@ -596,6 +620,32 @@ ConstantRange::udiv(const ConstantRange &RHS) const {
}
ConstantRange
+ConstantRange::binaryAnd(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ // TODO: replace this with something less conservative
+
+ APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax());
+ if (umin.isAllOnesValue())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return ConstantRange(APInt::getNullValue(getBitWidth()), umin + 1);
+}
+
+ConstantRange
+ConstantRange::binaryOr(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ // TODO: replace this with something less conservative
+
+ APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
+ if (umax.isMinValue())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return ConstantRange(umax, APInt::getNullValue(getBitWidth()));
+}
+
+ConstantRange
ConstantRange::shl(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
return ConstantRange(getBitWidth(), /*isFullSet=*/false);
diff --git a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
index 49258ede..bf8ca3f 100644
--- a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -10,8 +10,8 @@
#include "llvm/Support/CrashRecoveryContext.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Config/config.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/ThreadLocal.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/ThreadLocal.h"
#include <setjmp.h>
#include <cstdio>
using namespace llvm;
@@ -128,6 +128,9 @@ static void CrashRecoverySignalHandler(int Signal) {
// This call of Disable isn't thread safe, but it doesn't actually matter.
CrashRecoveryContext::Disable();
raise(Signal);
+
+ // The signal will be thrown once the signal mask is restored.
+ return;
}
// Unblock the signal we received.
@@ -202,3 +205,26 @@ const std::string &CrashRecoveryContext::getBacktrace() const {
assert(CRC->Failed && "No crash was detected!");
return CRC->Backtrace;
}
+
+//
+
+namespace {
+struct RunSafelyOnThreadInfo {
+ void (*UserFn)(void*);
+ void *UserData;
+ CrashRecoveryContext *CRC;
+ bool Result;
+};
+}
+
+static void RunSafelyOnThread_Dispatch(void *UserData) {
+ RunSafelyOnThreadInfo *Info =
+ reinterpret_cast<RunSafelyOnThreadInfo*>(UserData);
+ Info->Result = Info->CRC->RunSafely(Info->UserFn, Info->UserData);
+}
+bool CrashRecoveryContext::RunSafelyOnThread(void (*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
+ RunSafelyOnThreadInfo Info = { Fn, UserData, this, false };
+ llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize);
+ return Info.Result;
+}
diff --git a/contrib/llvm/lib/Support/Debug.cpp b/contrib/llvm/lib/Support/Debug.cpp
index 7f48f8a..9fdb12e 100644
--- a/contrib/llvm/lib/Support/Debug.cpp
+++ b/contrib/llvm/lib/Support/Debug.cpp
@@ -26,7 +26,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/circular_raw_ostream.h"
-#include "llvm/System/Signals.h"
+#include "llvm/Support/Signals.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/System/Disassembler.cpp b/contrib/llvm/lib/Support/Disassembler.cpp
index 139e3be..6362aff 100644
--- a/contrib/llvm/lib/System/Disassembler.cpp
+++ b/contrib/llvm/lib/Support/Disassembler.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Config/config.h"
-#include "llvm/System/Disassembler.h"
+#include "llvm/Support/Disassembler.h"
#include <cassert>
#include <iomanip>
diff --git a/contrib/llvm/lib/Support/Dwarf.cpp b/contrib/llvm/lib/Support/Dwarf.cpp
index 96ce9d3..9799ef5 100644
--- a/contrib/llvm/lib/Support/Dwarf.cpp
+++ b/contrib/llvm/lib/Support/Dwarf.cpp
@@ -78,6 +78,10 @@ const char *llvm::dwarf::TagString(unsigned Tag) {
case DW_TAG_shared_type: return "DW_TAG_shared_type";
case DW_TAG_lo_user: return "DW_TAG_lo_user";
case DW_TAG_hi_user: return "DW_TAG_hi_user";
+ case DW_TAG_auto_variable: return "DW_TAG_auto_variable";
+ case DW_TAG_arg_variable: return "DW_TAG_arg_variable";
+ case DW_TAG_return_variable: return "DW_TAG_return_variable";
+ case DW_TAG_vector_type: return "DW_TAG_vector_type";
}
return 0;
}
diff --git a/contrib/llvm/lib/System/DynamicLibrary.cpp b/contrib/llvm/lib/Support/DynamicLibrary.cpp
index 660db49..455c380 100644
--- a/contrib/llvm/lib/System/DynamicLibrary.cpp
+++ b/contrib/llvm/lib/Support/DynamicLibrary.cpp
@@ -14,7 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/DynamicLibrary.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
#include "llvm/Config/config.h"
#include <cstdio>
#include <cstring>
@@ -46,7 +47,7 @@ void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
#ifdef LLVM_ON_WIN32
-#include "Win32/DynamicLibrary.inc"
+#include "Windows/DynamicLibrary.inc"
#else
@@ -63,6 +64,12 @@ using namespace llvm::sys;
static std::vector<void *> *OpenedHandles = 0;
+static SmartMutex<true>& getMutex() {
+ static SmartMutex<true> HandlesMutex;
+ return HandlesMutex;
+}
+
+
bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
std::string *ErrMsg) {
void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL);
@@ -76,6 +83,7 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
if (Filename == NULL)
H = RTLD_DEFAULT;
#endif
+ SmartScopedLock<true> Lock(getMutex());
if (OpenedHandles == 0)
OpenedHandles = new std::vector<void *>();
OpenedHandles->push_back(H);
@@ -103,13 +111,14 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
std::map<std::string, void *>::iterator I =
ExplicitSymbols->find(symbolName);
std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
-
+
if (I != E)
return I->second;
}
#if HAVE_DLFCN_H
// Now search the libraries.
+ SmartScopedLock<true> Lock(getMutex());
if (OpenedHandles) {
for (std::vector<void *>::iterator I = OpenedHandles->begin(),
E = OpenedHandles->end(); I != E; ++I) {
@@ -130,7 +139,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
if (!strcmp(symbolName, #SYM)) return &SYM
// On linux we have a weird situation. The stderr/out/in symbols are both
-// macros and global variables because of standards requirements. So, we
+// macros and global variables because of standards requirements. So, we
// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first.
#if defined(__linux__)
{
diff --git a/contrib/llvm/lib/System/Errno.cpp b/contrib/llvm/lib/Support/Errno.cpp
index 68f66f6..18c6581 100644
--- a/contrib/llvm/lib/System/Errno.cpp
+++ b/contrib/llvm/lib/Support/Errno.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/Errno.h"
+#include "llvm/Support/Errno.h"
#include "llvm/Config/config.h" // Get autoconf configuration settings
#if HAVE_STRING_H
@@ -50,7 +50,7 @@ std::string StrError(int errnum) {
# else
strerror_r(errnum,buffer,MaxErrStrLen-1);
# endif
-#elif defined(HAVE_STRERROR_S) // Windows.
+#elif HAVE_DECL_STRERROR_S // "Windows Secure API"
if (errnum)
strerror_s(buffer, errnum);
#elif defined(HAVE_STRERROR)
diff --git a/contrib/llvm/lib/Support/ErrorHandling.cpp b/contrib/llvm/lib/Support/ErrorHandling.cpp
index 0b7af3e..3579546 100644
--- a/contrib/llvm/lib/Support/ErrorHandling.cpp
+++ b/contrib/llvm/lib/Support/ErrorHandling.cpp
@@ -16,8 +16,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
-#include "llvm/System/Threading.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Threading.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Config/config.h"
#include <cassert>
@@ -58,6 +58,10 @@ void llvm::report_fatal_error(const std::string &Reason) {
report_fatal_error(Twine(Reason));
}
+void llvm::report_fatal_error(StringRef Reason) {
+ report_fatal_error(Twine(Reason));
+}
+
void llvm::report_fatal_error(const Twine &Reason) {
if (ErrorHandler) {
ErrorHandler(ErrorHandlerUserData, Reason.str());
@@ -69,7 +73,8 @@ void llvm::report_fatal_error(const Twine &Reason) {
raw_svector_ostream OS(Buffer);
OS << "LLVM ERROR: " << Reason << "\n";
StringRef MessageStr = OS.str();
- (void)::write(2, MessageStr.data(), MessageStr.size());
+ ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
+ (void)written; // If something went wrong, we deliberately just give up.
}
// If we reached here, we are failing ungracefully. Run the interrupt handlers
diff --git a/contrib/llvm/lib/Support/FileUtilities.cpp b/contrib/llvm/lib/Support/FileUtilities.cpp
index 1bde2fe..5dbabee 100644
--- a/contrib/llvm/lib/Support/FileUtilities.cpp
+++ b/contrib/llvm/lib/Support/FileUtilities.cpp
@@ -15,7 +15,8 @@
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/system_error.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include <cstdlib>
@@ -108,17 +109,17 @@ static bool CompareNumbers(const char *&F1P, const char *&F2P,
SmallString<200> StrTmp(F1P, EndOfNumber(F1NumEnd)+1);
// Strange exponential notation!
StrTmp[static_cast<unsigned>(F1NumEnd-F1P)] = 'e';
-
+
V1 = strtod(&StrTmp[0], const_cast<char**>(&F1NumEnd));
F1NumEnd = F1P + (F1NumEnd-&StrTmp[0]);
}
-
+
if (*F2NumEnd == 'D' || *F2NumEnd == 'd') {
// Copy string into tmp buffer to replace the 'D' with an 'e'.
SmallString<200> StrTmp(F2P, EndOfNumber(F2NumEnd)+1);
// Strange exponential notation!
StrTmp[static_cast<unsigned>(F2NumEnd-F2P)] = 'e';
-
+
V2 = strtod(&StrTmp[0], const_cast<char**>(&F2NumEnd));
F2NumEnd = F2P + (F2NumEnd-&StrTmp[0]);
}
@@ -199,11 +200,20 @@ int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA,
// Now its safe to mmap the files into memory becasue both files
// have a non-zero size.
- OwningPtr<MemoryBuffer> F1(MemoryBuffer::getFile(FileA.c_str(), Error));
- OwningPtr<MemoryBuffer> F2(MemoryBuffer::getFile(FileB.c_str(), Error));
- if (F1 == 0 || F2 == 0)
+ error_code ec;
+ OwningPtr<MemoryBuffer> F1;
+ if (error_code ec = MemoryBuffer::getFile(FileA.c_str(), F1)) {
+ if (Error)
+ *Error = ec.message();
return 2;
-
+ }
+ OwningPtr<MemoryBuffer> F2;
+ if (error_code ec = MemoryBuffer::getFile(FileB.c_str(), F2)) {
+ if (Error)
+ *Error = ec.message();
+ return 2;
+ }
+
// Okay, now that we opened the files, scan them for the first difference.
const char *File1Start = F1->getBufferStart();
const char *File2Start = F2->getBufferStart();
diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp
index 29b5952..a4f80a9 100644
--- a/contrib/llvm/lib/Support/FoldingSet.cpp
+++ b/contrib/llvm/lib/Support/FoldingSet.cpp
@@ -18,6 +18,7 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Host.h"
#include <cassert>
#include <cstring>
using namespace llvm;
@@ -110,18 +111,32 @@ void FoldingSetNodeID::AddString(StringRef String) {
Pos = (Units + 1) * 4;
} else {
// Otherwise do it the hard way.
- for (Pos += 4; Pos <= Size; Pos += 4) {
- unsigned V = ((unsigned char)String[Pos - 4] << 24) |
- ((unsigned char)String[Pos - 3] << 16) |
- ((unsigned char)String[Pos - 2] << 8) |
- (unsigned char)String[Pos - 1];
- Bits.push_back(V);
+ // To be compatible with above bulk transfer, we need to take endianness
+ // into account.
+ if (sys::isBigEndianHost()) {
+ for (Pos += 4; Pos <= Size; Pos += 4) {
+ unsigned V = ((unsigned char)String[Pos - 4] << 24) |
+ ((unsigned char)String[Pos - 3] << 16) |
+ ((unsigned char)String[Pos - 2] << 8) |
+ (unsigned char)String[Pos - 1];
+ Bits.push_back(V);
+ }
+ } else {
+ assert(sys::isLittleEndianHost() && "Unexpected host endianness");
+ for (Pos += 4; Pos <= Size; Pos += 4) {
+ unsigned V = ((unsigned char)String[Pos - 1] << 24) |
+ ((unsigned char)String[Pos - 2] << 16) |
+ ((unsigned char)String[Pos - 3] << 8) |
+ (unsigned char)String[Pos - 4];
+ Bits.push_back(V);
+ }
}
}
// With the leftover bits.
unsigned V = 0;
- // Pos will have overshot size by 4 - #bytes left over.
+ // Pos will have overshot size by 4 - #bytes left over.
+ // No need to take endianness into account here - this is always executed.
switch (Pos - Size) {
case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
diff --git a/contrib/llvm/lib/Support/FormattedStream.cpp b/contrib/llvm/lib/Support/FormattedStream.cpp
index c72b5a1..231ae48 100644
--- a/contrib/llvm/lib/Support/FormattedStream.cpp
+++ b/contrib/llvm/lib/Support/FormattedStream.cpp
@@ -13,6 +13,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
using namespace llvm;
diff --git a/contrib/llvm/lib/Support/GraphWriter.cpp b/contrib/llvm/lib/Support/GraphWriter.cpp
index fdd6285..0dba28a 100644
--- a/contrib/llvm/lib/Support/GraphWriter.cpp
+++ b/contrib/llvm/lib/Support/GraphWriter.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/GraphWriter.h"
-#include "llvm/System/Path.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
#include "llvm/Config/config.h"
using namespace llvm;
@@ -63,11 +63,37 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
args.push_back(0);
errs() << "Running 'Graphviz' program... ";
- if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg))
- errs() << "Error viewing graph " << Filename.str() << ": " << ErrMsg
- << "\n";
- else
- Filename.eraseFromDisk();
+ if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) {
+ errs() << "Error: " << ErrMsg << "\n";
+ return;
+ }
+ Filename.eraseFromDisk();
+ errs() << " done. \n";
+
+#elif HAVE_XDOT_PY
+ std::vector<const char*> args;
+ args.push_back(LLVM_PATH_XDOT_PY);
+ args.push_back(Filename.c_str());
+
+ switch (program) {
+ case GraphProgram::DOT: args.push_back("-f"); args.push_back("dot"); break;
+ case GraphProgram::FDP: args.push_back("-f"); args.push_back("fdp"); break;
+ case GraphProgram::NEATO: args.push_back("-f"); args.push_back("neato");break;
+ case GraphProgram::TWOPI: args.push_back("-f"); args.push_back("twopi");break;
+ case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break;
+ default: errs() << "Unknown graph layout name; using default.\n";
+ }
+
+ args.push_back(0);
+
+ errs() << "Running 'xdot.py' program... ";
+ if (sys::Program::ExecuteAndWait(sys::Path(LLVM_PATH_XDOT_PY),
+ &args[0],0,0,0,0,&ErrMsg)) {
+ errs() << "Error: " << ErrMsg << "\n";
+ return;
+ }
+ Filename.eraseFromDisk();
+ errs() << " done. \n";
#elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \
HAVE_TWOPI || HAVE_CIRCO))
@@ -128,8 +154,7 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
errs() << "Running '" << prog.str() << "' program... ";
if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) {
- errs() << "Error viewing graph " << Filename.str() << ": '"
- << ErrMsg << "\n";
+ errs() << "Error: " << ErrMsg << "\n";
return;
}
errs() << " done. \n";
@@ -144,7 +169,7 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
ErrMsg.clear();
if (wait) {
if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
- errs() << "Error viewing graph: " << ErrMsg << "\n";
+ errs() << "Error: " << ErrMsg << "\n";
Filename.eraseFromDisk();
PSFilename.eraseFromDisk();
}
@@ -163,8 +188,7 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
errs() << "Running 'dotty' program... ";
if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) {
- errs() << "Error viewing graph " << Filename.str() << ": "
- << ErrMsg << "\n";
+ errs() << "Error: " << ErrMsg << "\n";
} else {
// Dotty spawns another app and doesn't wait until it returns
#if defined (__MINGW32__) || defined (_WINDOWS)
diff --git a/contrib/llvm/lib/System/Host.cpp b/contrib/llvm/lib/Support/Host.cpp
index e7193db..4dacf96 100644
--- a/contrib/llvm/lib/System/Host.cpp
+++ b/contrib/llvm/lib/Support/Host.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/Host.h"
+#include "llvm/Support/Host.h"
#include "llvm/Config/config.h"
#include <string.h>
@@ -20,7 +20,7 @@
#include "Unix/Host.inc"
#endif
#ifdef LLVM_ON_WIN32
-#include "Win32/Host.inc"
+#include "Windows/Host.inc"
#endif
#ifdef _MSC_VER
#include <intrin.h>
@@ -92,7 +92,8 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
return true;
}
-static void DetectX86FamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
+static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
+ unsigned &Model) {
Family = (EAX >> 8) & 0xf; // Bits 8 - 11
Model = (EAX >> 4) & 0xf; // Bits 4 - 7
if (Family == 6 || Family == 0xf) {
@@ -112,9 +113,9 @@ std::string sys::getHostCPUName() {
unsigned Model = 0;
DetectX86FamilyModel(EAX, Family, Model);
+ bool HasSSE3 = (ECX & 0x1);
GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
bool Em64T = (EDX >> 29) & 0x1;
- bool HasSSE3 = (ECX & 0x1);
union {
unsigned u[3];
@@ -128,21 +129,21 @@ std::string sys::getHostCPUName() {
return "i386";
case 4:
switch (Model) {
- case 0: // Intel486TM DX processors
- case 1: // Intel486TM DX processors
+ case 0: // Intel486 DX processors
+ case 1: // Intel486 DX processors
case 2: // Intel486 SX processors
- case 3: // Intel487TM processors, IntelDX2 OverDrive® processors,
- // IntelDX2TM processors
+ case 3: // Intel487 processors, IntelDX2 OverDrive processors,
+ // IntelDX2 processors
case 4: // Intel486 SL processor
- case 5: // IntelSX2TM processors
+ case 5: // IntelSX2 processors
case 7: // Write-Back Enhanced IntelDX2 processors
- case 8: // IntelDX4 OverDrive processors, IntelDX4TM processors
+ case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
default: return "i486";
}
case 5:
switch (Model) {
case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
- // Pentium® processors (60, 66)
+ // Pentium processors (60, 66)
case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
// 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
// 150, 166, 200)
@@ -150,9 +151,9 @@ std::string sys::getHostCPUName() {
// systems
return "pentium";
- case 4: // Pentium OverDrive processor with MMXTM technology for Pentium
+ case 4: // Pentium OverDrive processor with MMX technology for Pentium
// processor (75, 90, 100, 120, 133), Pentium processor with
- // MMXTM technology (166, 200)
+ // MMX technology (166, 200)
return "pentium-mmx";
default: return "pentium";
@@ -165,7 +166,7 @@ std::string sys::getHostCPUName() {
case 3: // Intel Pentium II OverDrive processor, Pentium II processor,
// model 03
case 5: // Pentium II processor, model 05, Pentium II Xeon processor,
- // model 05, and Intel® Celeron® processor, model 05
+ // model 05, and Intel Celeron processor, model 05
case 6: // Celeron processor, model 06
return "pentium2";
@@ -182,13 +183,13 @@ std::string sys::getHostCPUName() {
// 0Dh. All processors are manufactured using the 90 nm process.
return "pentium-m";
- case 14: // Intel CoreTM Duo processor, Intel CoreTM Solo processor, model
+ case 14: // Intel Core Duo processor, Intel Core Solo processor, model
// 0Eh. All processors are manufactured using the 65 nm process.
return "yonah";
- case 15: // Intel CoreTM2 Duo processor, Intel CoreTM2 Duo mobile
- // processor, Intel CoreTM2 Quad processor, Intel CoreTM2 Quad
- // mobile processor, Intel CoreTM2 Extreme processor, Intel
+ case 15: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
+ // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
+ // mobile processor, Intel Core 2 Extreme processor, Intel
// Pentium Dual-Core processor, Intel Xeon processor, model
// 0Fh. All processors are manufactured using the 65 nm process.
case 22: // Intel Celeron processor model 16h. All processors are
@@ -199,7 +200,7 @@ std::string sys::getHostCPUName() {
// Integrated Processor with Intel QuickAssist Technology
return "i686"; // FIXME: ???
- case 23: // Intel CoreTM2 Extreme processor, Intel Xeon processor, model
+ case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model
// 17h. All processors are manufactured using the 45 nm process.
//
// 45nm: Penryn , Wolfdale, Yorkfield (XE)
@@ -209,6 +210,9 @@ std::string sys::getHostCPUName() {
// processors are manufactured using the 45 nm process.
case 29: // Intel Xeon processor MP. All processors are manufactured using
// the 45 nm process.
+ case 30: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
+ // As found in a Summer 2010 model iMac.
+ case 37: // Intel Core i7, laptop version.
return "corei7";
case 28: // Intel Atom processor. All processors are manufactured using
@@ -224,7 +228,7 @@ std::string sys::getHostCPUName() {
case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
// processor MP, and Intel Celeron processor. All processors are
// model 01h and manufactured using the 0.18 micron process.
- case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor – M,
+ case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
// Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
// processor, and Mobile Intel Celeron processor. All processors
// are model 02h and manufactured using the 0.13 micron process.
@@ -277,14 +281,12 @@ std::string sys::getHostCPUName() {
default: return "athlon";
}
case 15:
- if (HasSSE3) {
+ if (HasSSE3)
return "k8-sse3";
- } else {
- switch (Model) {
- case 1: return "opteron";
- case 5: return "athlon-fx"; // also opteron
- default: return "athlon64";
- }
+ switch (Model) {
+ case 1: return "opteron";
+ case 5: return "athlon-fx"; // also opteron
+ default: return "athlon64";
}
case 16:
return "amdfam10";
diff --git a/contrib/llvm/lib/System/IncludeFile.cpp b/contrib/llvm/lib/Support/IncludeFile.cpp
index 8258d40..5da8826 100644
--- a/contrib/llvm/lib/System/IncludeFile.cpp
+++ b/contrib/llvm/lib/Support/IncludeFile.cpp
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/IncludeFile.h"
+#include "llvm/Support/IncludeFile.h"
using namespace llvm;
// This constructor is used to ensure linking of other modules. See the
-// llvm/System/IncludeFile.h header for details.
+// llvm/Support/IncludeFile.h header for details.
IncludeFile::IncludeFile(const void*) {}
diff --git a/contrib/llvm/lib/Support/IntEqClasses.cpp b/contrib/llvm/lib/Support/IntEqClasses.cpp
new file mode 100644
index 0000000..1134495
--- /dev/null
+++ b/contrib/llvm/lib/Support/IntEqClasses.cpp
@@ -0,0 +1,70 @@
+//===-- llvm/ADT/IntEqClasses.cpp - Equivalence Classes of Integers -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Equivalence classes for small integers. This is a mapping of the integers
+// 0 .. N-1 into M equivalence classes numbered 0 .. M-1.
+//
+// Initially each integer has its own equivalence class. Classes are joined by
+// passing a representative member of each class to join().
+//
+// Once the classes are built, compress() will number them 0 .. M-1 and prevent
+// further changes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntEqClasses.h"
+
+using namespace llvm;
+
+void IntEqClasses::grow(unsigned N) {
+ assert(NumClasses == 0 && "grow() called after compress().");
+ EC.reserve(N);
+ while (EC.size() < N)
+ EC.push_back(EC.size());
+}
+
+void IntEqClasses::join(unsigned a, unsigned b) {
+ assert(NumClasses == 0 && "join() called after compress().");
+ unsigned eca = EC[a];
+ unsigned ecb = EC[b];
+ // Update pointers while searching for the leaders, compressing the paths
+ // incrementally. The larger leader will eventually be updated, joining the
+ // classes.
+ while (eca != ecb)
+ if (eca < ecb)
+ EC[b] = eca, b = ecb, ecb = EC[b];
+ else
+ EC[a] = ecb, a = eca, eca = EC[a];
+}
+
+unsigned IntEqClasses::findLeader(unsigned a) const {
+ assert(NumClasses == 0 && "findLeader() called after compress().");
+ while (a != EC[a])
+ a = EC[a];
+ return a;
+}
+
+void IntEqClasses::compress() {
+ if (NumClasses)
+ return;
+ for (unsigned i = 0, e = EC.size(); i != e; ++i)
+ EC[i] = (EC[i] == i) ? NumClasses++ : EC[EC[i]];
+}
+
+void IntEqClasses::uncompress() {
+ if (!NumClasses)
+ return;
+ SmallVector<unsigned, 8> Leader;
+ for (unsigned i = 0, e = EC.size(); i != e; ++i)
+ if (EC[i] < Leader.size())
+ EC[i] = Leader[EC[i]];
+ else
+ Leader.push_back(EC[i] = i);
+ NumClasses = 0;
+}
diff --git a/contrib/llvm/lib/Support/IntervalMap.cpp b/contrib/llvm/lib/Support/IntervalMap.cpp
new file mode 100644
index 0000000..4dfcc40
--- /dev/null
+++ b/contrib/llvm/lib/Support/IntervalMap.cpp
@@ -0,0 +1,161 @@
+//===- lib/Support/IntervalMap.cpp - A sorted interval map ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the few non-templated functions in IntervalMap.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntervalMap.h"
+
+namespace llvm {
+namespace IntervalMapImpl {
+
+void Path::replaceRoot(void *Root, unsigned Size, IdxPair Offsets) {
+ assert(!path.empty() && "Can't replace missing root");
+ path.front() = Entry(Root, Size, Offsets.first);
+ path.insert(path.begin() + 1, Entry(subtree(0), Offsets.second));
+}
+
+NodeRef Path::getLeftSibling(unsigned Level) const {
+ // The root has no siblings.
+ if (Level == 0)
+ return NodeRef();
+
+ // Go up the tree until we can go left.
+ unsigned l = Level - 1;
+ while (l && path[l].offset == 0)
+ --l;
+
+ // We can't go left.
+ if (path[l].offset == 0)
+ return NodeRef();
+
+ // NR is the subtree containing our left sibling.
+ NodeRef NR = path[l].subtree(path[l].offset - 1);
+
+ // Keep right all the way down.
+ for (++l; l != Level; ++l)
+ NR = NR.subtree(NR.size() - 1);
+ return NR;
+}
+
+void Path::moveLeft(unsigned Level) {
+ assert(Level != 0 && "Cannot move the root node");
+
+ // Go up the tree until we can go left.
+ unsigned l = 0;
+ if (valid()) {
+ l = Level - 1;
+ while (path[l].offset == 0) {
+ assert(l != 0 && "Cannot move beyond begin()");
+ --l;
+ }
+ } else if (height() < Level)
+ // end() may have created a height=0 path.
+ path.resize(Level + 1, Entry(0, 0, 0));
+
+ // NR is the subtree containing our left sibling.
+ --path[l].offset;
+ NodeRef NR = subtree(l);
+
+ // Get the rightmost node in the subtree.
+ for (++l; l != Level; ++l) {
+ path[l] = Entry(NR, NR.size() - 1);
+ NR = NR.subtree(NR.size() - 1);
+ }
+ path[l] = Entry(NR, NR.size() - 1);
+}
+
+NodeRef Path::getRightSibling(unsigned Level) const {
+ // The root has no siblings.
+ if (Level == 0)
+ return NodeRef();
+
+ // Go up the tree until we can go right.
+ unsigned l = Level - 1;
+ while (l && atLastEntry(l))
+ --l;
+
+ // We can't go right.
+ if (atLastEntry(l))
+ return NodeRef();
+
+ // NR is the subtree containing our right sibling.
+ NodeRef NR = path[l].subtree(path[l].offset + 1);
+
+ // Keep left all the way down.
+ for (++l; l != Level; ++l)
+ NR = NR.subtree(0);
+ return NR;
+}
+
+void Path::moveRight(unsigned Level) {
+ assert(Level != 0 && "Cannot move the root node");
+
+ // Go up the tree until we can go right.
+ unsigned l = Level - 1;
+ while (l && atLastEntry(l))
+ --l;
+
+ // NR is the subtree containing our right sibling. If we hit end(), we have
+ // offset(0) == node(0).size().
+ if (++path[l].offset == path[l].size)
+ return;
+ NodeRef NR = subtree(l);
+
+ for (++l; l != Level; ++l) {
+ path[l] = Entry(NR, 0);
+ NR = NR.subtree(0);
+ }
+ path[l] = Entry(NR, 0);
+}
+
+
+IdxPair distribute(unsigned Nodes, unsigned Elements, unsigned Capacity,
+ const unsigned *CurSize, unsigned NewSize[],
+ unsigned Position, bool Grow) {
+ assert(Elements + Grow <= Nodes * Capacity && "Not enough room for elements");
+ assert(Position <= Elements && "Invalid position");
+ if (!Nodes)
+ return IdxPair();
+
+ // Trivial algorithm: left-leaning even distribution.
+ const unsigned PerNode = (Elements + Grow) / Nodes;
+ const unsigned Extra = (Elements + Grow) % Nodes;
+ IdxPair PosPair = IdxPair(Nodes, 0);
+ unsigned Sum = 0;
+ for (unsigned n = 0; n != Nodes; ++n) {
+ Sum += NewSize[n] = PerNode + (n < Extra);
+ if (PosPair.first == Nodes && Sum > Position)
+ PosPair = IdxPair(n, Position - (Sum - NewSize[n]));
+ }
+ assert(Sum == Elements + Grow && "Bad distribution sum");
+
+ // Subtract the Grow element that was added.
+ if (Grow) {
+ assert(PosPair.first < Nodes && "Bad algebra");
+ assert(NewSize[PosPair.first] && "Too few elements to need Grow");
+ --NewSize[PosPair.first];
+ }
+
+#ifndef NDEBUG
+ Sum = 0;
+ for (unsigned n = 0; n != Nodes; ++n) {
+ assert(NewSize[n] <= Capacity && "Overallocated node");
+ Sum += NewSize[n];
+ }
+ assert(Sum == Elements && "Bad distribution sum");
+#endif
+
+ return PosPair;
+}
+
+} // namespace IntervalMapImpl
+} // namespace llvm
+
diff --git a/contrib/llvm/lib/Support/ManagedStatic.cpp b/contrib/llvm/lib/Support/ManagedStatic.cpp
index 4e655a0..c767c15 100644
--- a/contrib/llvm/lib/Support/ManagedStatic.cpp
+++ b/contrib/llvm/lib/Support/ManagedStatic.cpp
@@ -13,7 +13,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Config/config.h"
-#include "llvm/System/Atomic.h"
+#include "llvm/Support/Atomic.h"
#include <cassert>
using namespace llvm;
diff --git a/contrib/llvm/lib/System/Memory.cpp b/contrib/llvm/lib/Support/Memory.cpp
index 49ccf3d..ac7af0a 100644
--- a/contrib/llvm/lib/System/Memory.cpp
+++ b/contrib/llvm/lib/Support/Memory.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/Memory.h"
-#include "llvm/System/Valgrind.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Valgrind.h"
#include "llvm/Config/config.h"
namespace llvm {
@@ -25,7 +25,7 @@ using namespace sys;
#include "Unix/Memory.inc"
#endif
#ifdef LLVM_ON_WIN32
-#include "Win32/Memory.inc"
+#include "Windows/Memory.inc"
#endif
extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
@@ -35,7 +35,7 @@ extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
/// platforms.
void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr,
size_t Len) {
-
+
// icache invalidation for PPC and ARM.
#if defined(__APPLE__)
diff --git a/contrib/llvm/lib/Support/MemoryBuffer.cpp b/contrib/llvm/lib/Support/MemoryBuffer.cpp
index 542162d..a0c650d 100644
--- a/contrib/llvm/lib/Support/MemoryBuffer.cpp
+++ b/contrib/llvm/lib/Support/MemoryBuffer.cpp
@@ -15,14 +15,16 @@
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/System/Errno.h"
-#include "llvm/System/Path.h"
-#include "llvm/System/Process.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/system_error.h"
#include <cassert>
#include <cstdio>
#include <cstring>
#include <cerrno>
+#include <new>
#include <sys/types.h>
#include <sys/stat.h>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
@@ -34,6 +36,8 @@
#include <fcntl.h>
using namespace llvm;
+namespace { const llvm::error_code success; }
+
//===----------------------------------------------------------------------===//
// MemoryBuffer implementation itself.
//===----------------------------------------------------------------------===//
@@ -142,22 +146,20 @@ MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
/// if the Filename is "-". If an error occurs, this returns null and fills
/// in *ErrStr with a reason. If stdin is empty, this API (unlike getSTDIN)
/// returns an empty buffer.
-MemoryBuffer *MemoryBuffer::getFileOrSTDIN(StringRef Filename,
- std::string *ErrStr,
- int64_t FileSize,
- struct stat *FileInfo) {
+error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize) {
if (Filename == "-")
- return getSTDIN(ErrStr);
- return getFile(Filename, ErrStr, FileSize, FileInfo);
+ return getSTDIN(result);
+ return getFile(Filename, result, FileSize);
}
-MemoryBuffer *MemoryBuffer::getFileOrSTDIN(const char *Filename,
- std::string *ErrStr,
- int64_t FileSize,
- struct stat *FileInfo) {
+error_code MemoryBuffer::getFileOrSTDIN(const char *Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize) {
if (strcmp(Filename, "-") == 0)
- return getSTDIN(ErrStr);
- return getFile(Filename, ErrStr, FileSize, FileInfo);
+ return getSTDIN(result);
+ return getFile(Filename, result, FileSize);
}
//===----------------------------------------------------------------------===//
@@ -177,50 +179,47 @@ public:
sys::Path::UnMapFilePages(getBufferStart(), getBufferSize());
}
};
-
-/// FileCloser - RAII object to make sure an FD gets closed properly.
-class FileCloser {
- int FD;
-public:
- explicit FileCloser(int FD) : FD(FD) {}
- ~FileCloser() { ::close(FD); }
-};
}
-MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
- int64_t FileSize, struct stat *FileInfo) {
+error_code MemoryBuffer::getFile(StringRef Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize) {
+ // Ensure the path is null terminated.
SmallString<256> PathBuf(Filename.begin(), Filename.end());
- return MemoryBuffer::getFile(PathBuf.c_str(), ErrStr, FileSize, FileInfo);
+ return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize);
}
-MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
- int64_t FileSize, struct stat *FileInfo) {
+error_code MemoryBuffer::getFile(const char *Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize) {
int OpenFlags = O_RDONLY;
#ifdef O_BINARY
OpenFlags |= O_BINARY; // Open input file in binary mode on win32.
#endif
int FD = ::open(Filename, OpenFlags);
if (FD == -1) {
- if (ErrStr) *ErrStr = sys::StrError();
- return 0;
+ return error_code(errno, posix_category());
}
- FileCloser FC(FD); // Close FD on return.
-
+ error_code ret = getOpenFile(FD, Filename, result, FileSize);
+ close(FD);
+ return ret;
+}
+
+error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
+ OwningPtr<MemoryBuffer> &result,
+ int64_t FileSize) {
// If we don't know the file size, use fstat to find out. fstat on an open
// file descriptor is cheaper than stat on a random path.
- if (FileSize == -1 || FileInfo) {
- struct stat MyFileInfo;
- struct stat *FileInfoPtr = FileInfo? FileInfo : &MyFileInfo;
-
+ if (FileSize == -1) {
+ struct stat FileInfo;
// TODO: This should use fstat64 when available.
- if (fstat(FD, FileInfoPtr) == -1) {
- if (ErrStr) *ErrStr = sys::StrError();
- return 0;
+ if (fstat(FD, &FileInfo) == -1) {
+ return error_code(errno, posix_category());
}
- FileSize = FileInfoPtr->st_size;
+ FileSize = FileInfo.st_size;
}
-
-
+
+
// If the file is large, try to use mmap to read it in. We don't use mmap
// for small files, because this can severely fragment our address space. Also
// don't try to map files that are exactly a multiple of the system page size,
@@ -230,16 +229,17 @@ MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
if (FileSize >= 4096*4 &&
(FileSize & (sys::Process::GetPageSize()-1)) != 0) {
if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) {
- return GetNamedBuffer<MemoryBufferMMapFile>(StringRef(Pages, FileSize),
- Filename);
+ result.reset(GetNamedBuffer<MemoryBufferMMapFile>(
+ StringRef(Pages, FileSize), Filename));
+ return success;
}
}
MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(FileSize, Filename);
if (!Buf) {
- // Failed to create a buffer.
- if (ErrStr) *ErrStr = "could not allocate buffer";
- return 0;
+ // Failed to create a buffer. The only way it can fail is if
+ // new(std::nothrow) returns 0.
+ return make_error_code(errc::not_enough_memory);
}
OwningPtr<MemoryBuffer> SB(Buf);
@@ -252,26 +252,27 @@ MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
if (errno == EINTR)
continue;
// Error while reading.
- if (ErrStr) *ErrStr = sys::StrError();
- return 0;
+ return error_code(errno, posix_category());
} else if (NumRead == 0) {
// We hit EOF early, truncate and terminate buffer.
Buf->BufferEnd = BufPtr;
*BufPtr = 0;
- return SB.take();
+ result.swap(SB);
+ return success;
}
BytesLeft -= NumRead;
BufPtr += NumRead;
}
- return SB.take();
+ result.swap(SB);
+ return success;
}
//===----------------------------------------------------------------------===//
// MemoryBuffer::getSTDIN implementation.
//===----------------------------------------------------------------------===//
-MemoryBuffer *MemoryBuffer::getSTDIN(std::string *ErrStr) {
+error_code MemoryBuffer::getSTDIN(OwningPtr<MemoryBuffer> &result) {
// Read in all of the data from stdin, we cannot mmap stdin.
//
// FIXME: That isn't necessarily true, we should try to mmap stdin and
@@ -287,11 +288,11 @@ MemoryBuffer *MemoryBuffer::getSTDIN(std::string *ErrStr) {
ReadBytes = read(0, Buffer.end(), ChunkSize);
if (ReadBytes == -1) {
if (errno == EINTR) continue;
- if (ErrStr) *ErrStr = sys::StrError();
- return 0;
+ return error_code(errno, posix_category());
}
Buffer.set_size(Buffer.size() + ReadBytes);
} while (ReadBytes != 0);
- return getMemBufferCopy(Buffer, "<stdin>");
+ result.reset(getMemBufferCopy(Buffer, "<stdin>"));
+ return success;
}
diff --git a/contrib/llvm/lib/System/Mutex.cpp b/contrib/llvm/lib/Support/Mutex.cpp
index 8ccd6e5..b408973 100644
--- a/contrib/llvm/lib/System/Mutex.cpp
+++ b/contrib/llvm/lib/Support/Mutex.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Config/config.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only TRULY operating system
@@ -78,6 +78,7 @@ MutexImpl::MutexImpl( bool recursive)
#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
// Make it a process local mutex
errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
+ assert(errorcode == 0);
#endif
// Initialize the mutex
@@ -149,9 +150,8 @@ MutexImpl::tryacquire()
#elif defined(LLVM_ON_UNIX)
#include "Unix/Mutex.inc"
#elif defined( LLVM_ON_WIN32)
-#include "Win32/Mutex.inc"
+#include "Windows/Mutex.inc"
#else
#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
#endif
#endif
-
diff --git a/contrib/llvm/lib/System/Path.cpp b/contrib/llvm/lib/Support/Path.cpp
index 4445c66..e5e875b 100644
--- a/contrib/llvm/lib/System/Path.cpp
+++ b/contrib/llvm/lib/Support/Path.cpp
@@ -11,8 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Config/config.h"
+#include "llvm/Support/FileSystem.h"
#include <cassert>
#include <cstring>
#include <ostream>
@@ -104,7 +106,7 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
case 2: return Mach_O_Executable_FileType;
case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType;
case 4: return Mach_O_Core_FileType;
- case 5: return Mach_O_PreloadExectuable_FileType;
+ case 5: return Mach_O_PreloadExecutable_FileType;
case 6: return Mach_O_DynamicallyLinkedSharedLib_FileType;
case 7: return Mach_O_DynamicLinker_FileType;
case 8: return Mach_O_Bundle_FileType;
@@ -127,6 +129,10 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
if (magic[1] == 0x02)
return COFF_FileType;
break;
+ case 0x64: // x86-64 Windows.
+ if (magic[1] == char(0x86))
+ return COFF_FileType;
+ break;
default:
break;
@@ -136,24 +142,33 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
bool
Path::isArchive() const {
- return hasMagicNumber("!<arch>\012");
+ LLVMFileType type;
+ if (fs::identify_magic(str(), type))
+ return false;
+ return type == Archive_FileType;
}
bool
Path::isDynamicLibrary() const {
- std::string Magic;
- if (getMagicNumber(Magic, 64))
- switch (IdentifyFileType(Magic.c_str(),
- static_cast<unsigned>(Magic.length()))) {
- default: return false;
- case Mach_O_FixedVirtualMemorySharedLib_FileType:
- case Mach_O_DynamicallyLinkedSharedLib_FileType:
- case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
- case ELF_SharedObject_FileType:
- case COFF_FileType: return true;
- }
+ LLVMFileType type;
+ if (fs::identify_magic(str(), type))
+ return false;
+ switch (type) {
+ default: return false;
+ case Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ case ELF_SharedObject_FileType:
+ case COFF_FileType: return true;
+ }
+}
- return false;
+bool
+Path::isObjectFile() const {
+ LLVMFileType type;
+ if (fs::identify_magic(str(), type) || type == Unknown_FileType)
+ return false;
+ return true;
}
Path
@@ -174,18 +189,23 @@ Path::FindLibrary(std::string& name) {
}
StringRef Path::GetDLLSuffix() {
- return LTDL_SHLIB_EXT;
+ return &(LTDL_SHLIB_EXT[1]);
+}
+
+void
+Path::appendSuffix(StringRef suffix) {
+ if (!suffix.empty()) {
+ path.append(".");
+ path.append(suffix);
+ }
}
bool
Path::isBitcodeFile() const {
- std::string actualMagic;
- if (!getMagicNumber(actualMagic, 4))
+ LLVMFileType type;
+ if (fs::identify_magic(str(), type))
return false;
- LLVMFileType FT =
- IdentifyFileType(actualMagic.c_str(),
- static_cast<unsigned>(actualMagic.length()));
- return FT == Bitcode_FileType;
+ return type == Bitcode_FileType;
}
bool Path::hasMagicNumber(StringRef Magic) const {
@@ -259,6 +279,5 @@ static StringRef getDirnameCharSep(StringRef path, const char *Sep) {
#include "Unix/Path.inc"
#endif
#if defined(LLVM_ON_WIN32)
-#include "Win32/Path.inc"
+#include "Windows/Path.inc"
#endif
-
diff --git a/contrib/llvm/lib/Support/PathV2.cpp b/contrib/llvm/lib/Support/PathV2.cpp
new file mode 100644
index 0000000..896c94c
--- /dev/null
+++ b/contrib/llvm/lib/Support/PathV2.cpp
@@ -0,0 +1,774 @@
+//===-- PathV2.cpp - Implement OS Path Concept ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the operating system PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PathV2.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cctype>
+#include <cstdio>
+#include <cstring>
+
+namespace {
+ using llvm::StringRef;
+ using llvm::sys::path::is_separator;
+
+#ifdef LLVM_ON_WIN32
+ const StringRef separators = "\\/";
+ const char prefered_separator = '\\';
+#else
+ const StringRef separators = "/";
+ const char prefered_separator = '/';
+#endif
+
+ const llvm::error_code success;
+
+ StringRef find_first_component(StringRef path) {
+ // Look for this first component in the following order.
+ // * empty (in this case we return an empty string)
+ // * either C: or {//,\\}net.
+ // * {/,\}
+ // * {.,..}
+ // * {file,directory}name
+
+ if (path.empty())
+ return path;
+
+#ifdef LLVM_ON_WIN32
+ // C:
+ if (path.size() >= 2 && std::isalpha(path[0]) && path[1] == ':')
+ return path.substr(0, 2);
+#endif
+
+ // //net
+ if ((path.size() > 2) &&
+ is_separator(path[0]) &&
+ path[0] == path[1] &&
+ !is_separator(path[2])) {
+ // Find the next directory separator.
+ size_t end = path.find_first_of(separators, 2);
+ return path.substr(0, end);
+ }
+
+ // {/,\}
+ if (is_separator(path[0]))
+ return path.substr(0, 1);
+
+ if (path.startswith(".."))
+ return path.substr(0, 2);
+
+ if (path[0] == '.')
+ return path.substr(0, 1);
+
+ // * {file,directory}name
+ size_t end = path.find_first_of(separators, 2);
+ return path.substr(0, end);
+ }
+
+ size_t filename_pos(StringRef str) {
+ if (str.size() == 2 &&
+ is_separator(str[0]) &&
+ str[0] == str[1])
+ return 0;
+
+ if (str.size() > 0 && is_separator(str[str.size() - 1]))
+ return str.size() - 1;
+
+ size_t pos = str.find_last_of(separators, str.size() - 1);
+
+#ifdef LLVM_ON_WIN32
+ if (pos == StringRef::npos)
+ pos = str.find_last_of(':', str.size() - 2);
+#endif
+
+ if (pos == StringRef::npos ||
+ (pos == 1 && is_separator(str[0])))
+ return 0;
+
+ return pos + 1;
+ }
+
+ size_t root_dir_start(StringRef str) {
+ // case "c:/"
+#ifdef LLVM_ON_WIN32
+ if (str.size() > 2 &&
+ str[1] == ':' &&
+ is_separator(str[2]))
+ return 2;
+#endif
+
+ // case "//"
+ if (str.size() == 2 &&
+ is_separator(str[0]) &&
+ str[0] == str[1])
+ return StringRef::npos;
+
+ // case "//net"
+ if (str.size() > 3 &&
+ is_separator(str[0]) &&
+ str[0] == str[1] &&
+ !is_separator(str[2])) {
+ return str.find_first_of(separators, 2);
+ }
+
+ // case "/"
+ if (str.size() > 0 && is_separator(str[0]))
+ return 0;
+
+ return StringRef::npos;
+ }
+
+ size_t parent_path_end(StringRef path) {
+ size_t end_pos = filename_pos(path);
+
+ bool filename_was_sep = path.size() > 0 && is_separator(path[end_pos]);
+
+ // Skip separators except for root dir.
+ size_t root_dir_pos = root_dir_start(path.substr(0, end_pos));
+
+ while(end_pos > 0 &&
+ (end_pos - 1) != root_dir_pos &&
+ is_separator(path[end_pos - 1]))
+ --end_pos;
+
+ if (end_pos == 1 && root_dir_pos == 0 && filename_was_sep)
+ return StringRef::npos;
+
+ return end_pos;
+ }
+} // end unnamed namespace
+
+namespace llvm {
+namespace sys {
+namespace path {
+
+const_iterator begin(StringRef path) {
+ const_iterator i;
+ i.Path = path;
+ i.Component = find_first_component(path);
+ i.Position = 0;
+ return i;
+}
+
+const_iterator end(StringRef path) {
+ const_iterator i;
+ i.Path = path;
+ i.Position = path.size();
+ return i;
+}
+
+const_iterator &const_iterator::operator++() {
+ assert(Position < Path.size() && "Tried to increment past end!");
+
+ // Increment Position to past the current component
+ Position += Component.size();
+
+ // Check for end.
+ if (Position == Path.size()) {
+ Component = StringRef();
+ return *this;
+ }
+
+ // Both POSIX and Windows treat paths that begin with exactly two separators
+ // specially.
+ bool was_net = Component.size() > 2 &&
+ is_separator(Component[0]) &&
+ Component[1] == Component[0] &&
+ !is_separator(Component[2]);
+
+ // Handle separators.
+ if (is_separator(Path[Position])) {
+ // Root dir.
+ if (was_net
+#ifdef LLVM_ON_WIN32
+ // c:/
+ || Component.endswith(":")
+#endif
+ ) {
+ Component = Path.substr(Position, 1);
+ return *this;
+ }
+
+ // Skip extra separators.
+ while (Position != Path.size() &&
+ is_separator(Path[Position])) {
+ ++Position;
+ }
+
+ // Treat trailing '/' as a '.'.
+ if (Position == Path.size()) {
+ --Position;
+ Component = ".";
+ return *this;
+ }
+ }
+
+ // Find next component.
+ size_t end_pos = Path.find_first_of(separators, Position);
+ Component = Path.slice(Position, end_pos);
+
+ return *this;
+}
+
+const_iterator &const_iterator::operator--() {
+ // If we're at the end and the previous char was a '/', return '.'.
+ if (Position == Path.size() &&
+ Path.size() > 1 &&
+ is_separator(Path[Position - 1])
+#ifdef LLVM_ON_WIN32
+ && Path[Position - 2] != ':'
+#endif
+ ) {
+ --Position;
+ Component = ".";
+ return *this;
+ }
+
+ // Skip separators unless it's the root directory.
+ size_t root_dir_pos = root_dir_start(Path);
+ size_t end_pos = Position;
+
+ while(end_pos > 0 &&
+ (end_pos - 1) != root_dir_pos &&
+ is_separator(Path[end_pos - 1]))
+ --end_pos;
+
+ // Find next separator.
+ size_t start_pos = filename_pos(Path.substr(0, end_pos));
+ Component = Path.slice(start_pos, end_pos);
+ Position = start_pos;
+ return *this;
+}
+
+bool const_iterator::operator==(const const_iterator &RHS) const {
+ return Path.begin() == RHS.Path.begin() &&
+ Position == RHS.Position;
+}
+
+bool const_iterator::operator!=(const const_iterator &RHS) const {
+ return !(*this == RHS);
+}
+
+ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const {
+ return Position - RHS.Position;
+}
+
+const StringRef root_path(StringRef path) {
+ const_iterator b = begin(path),
+ pos = b,
+ e = end(path);
+ if (b != e) {
+ bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+ bool has_drive =
+#ifdef LLVM_ON_WIN32
+ b->endswith(":");
+#else
+ false;
+#endif
+
+ if (has_net || has_drive) {
+ if ((++pos != e) && is_separator((*pos)[0])) {
+ // {C:/,//net/}, so get the first two components.
+ return path.substr(0, b->size() + pos->size());
+ } else {
+ // just {C:,//net}, return the first component.
+ return *b;
+ }
+ }
+
+ // POSIX style root directory.
+ if (is_separator((*b)[0])) {
+ return *b;
+ }
+ }
+
+ return StringRef();
+}
+
+const StringRef root_name(StringRef path) {
+ const_iterator b = begin(path),
+ e = end(path);
+ if (b != e) {
+ bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+ bool has_drive =
+#ifdef LLVM_ON_WIN32
+ b->endswith(":");
+#else
+ false;
+#endif
+
+ if (has_net || has_drive) {
+ // just {C:,//net}, return the first component.
+ return *b;
+ }
+ }
+
+ // No path or no name.
+ return StringRef();
+}
+
+const StringRef root_directory(StringRef path) {
+ const_iterator b = begin(path),
+ pos = b,
+ e = end(path);
+ if (b != e) {
+ bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+ bool has_drive =
+#ifdef LLVM_ON_WIN32
+ b->endswith(":");
+#else
+ false;
+#endif
+
+ if ((has_net || has_drive) &&
+ // {C:,//net}, skip to the next component.
+ (++pos != e) && is_separator((*pos)[0])) {
+ return *pos;
+ }
+
+ // POSIX style root directory.
+ if (!has_net && is_separator((*b)[0])) {
+ return *b;
+ }
+ }
+
+ // No path or no root.
+ return StringRef();
+}
+
+const StringRef relative_path(StringRef path) {
+ StringRef root = root_path(path);
+ return root.substr(root.size());
+}
+
+void append(SmallVectorImpl<char> &path, const Twine &a,
+ const Twine &b,
+ const Twine &c,
+ const Twine &d) {
+ SmallString<32> a_storage;
+ SmallString<32> b_storage;
+ SmallString<32> c_storage;
+ SmallString<32> d_storage;
+
+ SmallVector<StringRef, 4> components;
+ if (!a.isTriviallyEmpty()) components.push_back(a.toStringRef(a_storage));
+ if (!b.isTriviallyEmpty()) components.push_back(b.toStringRef(b_storage));
+ if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage));
+ if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage));
+
+ for (SmallVectorImpl<StringRef>::const_iterator i = components.begin(),
+ e = components.end();
+ i != e; ++i) {
+ bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]);
+ bool component_has_sep = !i->empty() && is_separator((*i)[0]);
+ bool is_root_name = has_root_name(*i);
+
+ if (path_has_sep) {
+ // Strip separators from beginning of component.
+ size_t loc = i->find_first_not_of(separators);
+ StringRef c = i->substr(loc);
+
+ // Append it.
+ path.append(c.begin(), c.end());
+ continue;
+ }
+
+ if (!component_has_sep && !(path.empty() || is_root_name)) {
+ // Add a separator.
+ path.push_back(prefered_separator);
+ }
+
+ path.append(i->begin(), i->end());
+ }
+}
+
+void append(SmallVectorImpl<char> &path,
+ const_iterator begin, const_iterator end) {
+ for (; begin != end; ++begin)
+ path::append(path, *begin);
+}
+
+const StringRef parent_path(StringRef path) {
+ size_t end_pos = parent_path_end(path);
+ if (end_pos == StringRef::npos)
+ return StringRef();
+ else
+ return path.substr(0, end_pos);
+}
+
+void remove_filename(SmallVectorImpl<char> &path) {
+ size_t end_pos = parent_path_end(StringRef(path.begin(), path.size()));
+ if (end_pos != StringRef::npos)
+ path.set_size(end_pos);
+}
+
+void replace_extension(SmallVectorImpl<char> &path, const Twine &extension) {
+ StringRef p(path.begin(), path.size());
+ SmallString<32> ext_storage;
+ StringRef ext = extension.toStringRef(ext_storage);
+
+ // Erase existing extension.
+ size_t pos = p.find_last_of('.');
+ if (pos != StringRef::npos && pos >= filename_pos(p))
+ path.set_size(pos);
+
+ // Append '.' if needed.
+ if (ext.size() > 0 && ext[0] != '.')
+ path.push_back('.');
+
+ // Append extension.
+ path.append(ext.begin(), ext.end());
+}
+
+void native(const Twine &path, SmallVectorImpl<char> &result) {
+ // Clear result.
+ result.clear();
+#ifdef LLVM_ON_WIN32
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+ result.reserve(p.size());
+ for (StringRef::const_iterator i = p.begin(),
+ e = p.end();
+ i != e;
+ ++i) {
+ if (*i == '/')
+ result.push_back('\\');
+ else
+ result.push_back(*i);
+ }
+#else
+ path.toVector(result);
+#endif
+}
+
+const StringRef filename(StringRef path) {
+ return *(--end(path));
+}
+
+const StringRef stem(StringRef path) {
+ StringRef fname = filename(path);
+ size_t pos = fname.find_last_of('.');
+ if (pos == StringRef::npos)
+ return fname;
+ else
+ if ((fname.size() == 1 && fname == ".") ||
+ (fname.size() == 2 && fname == ".."))
+ return fname;
+ else
+ return fname.substr(0, pos);
+}
+
+const StringRef extension(StringRef path) {
+ StringRef fname = filename(path);
+ size_t pos = fname.find_last_of('.');
+ if (pos == StringRef::npos)
+ return StringRef();
+ else
+ if ((fname.size() == 1 && fname == ".") ||
+ (fname.size() == 2 && fname == ".."))
+ return StringRef();
+ else
+ return fname.substr(pos);
+}
+
+bool is_separator(char value) {
+ switch(value) {
+#ifdef LLVM_ON_WIN32
+ case '\\': // fall through
+#endif
+ case '/': return true;
+ default: return false;
+ }
+}
+
+bool has_root_name(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !root_name(p).empty();
+}
+
+bool has_root_directory(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !root_directory(p).empty();
+}
+
+bool has_root_path(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !root_path(p).empty();
+}
+
+bool has_relative_path(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !relative_path(p).empty();
+}
+
+bool has_filename(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !filename(p).empty();
+}
+
+bool has_parent_path(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !parent_path(p).empty();
+}
+
+bool has_stem(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !stem(p).empty();
+}
+
+bool has_extension(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ return !extension(p).empty();
+}
+
+bool is_absolute(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ bool rootDir = has_root_directory(p),
+#ifdef LLVM_ON_WIN32
+ rootName = has_root_name(p);
+#else
+ rootName = true;
+#endif
+
+ return rootDir && rootName;
+}
+
+bool is_relative(const Twine &path) {
+ return !is_absolute(path);
+}
+
+} // end namespace path
+
+namespace fs {
+
+error_code make_absolute(SmallVectorImpl<char> &path) {
+ StringRef p(path.data(), path.size());
+
+ bool rootName = path::has_root_name(p),
+ rootDirectory = path::has_root_directory(p);
+
+ // Already absolute.
+ if (rootName && rootDirectory)
+ return success;
+
+ // All of the following conditions will need the current directory.
+ SmallString<128> current_dir;
+ if (error_code ec = current_path(current_dir)) return ec;
+
+ // Relative path. Prepend the current directory.
+ if (!rootName && !rootDirectory) {
+ // Append path to the current directory.
+ path::append(current_dir, p);
+ // Set path to the result.
+ path.swap(current_dir);
+ return success;
+ }
+
+ if (!rootName && rootDirectory) {
+ StringRef cdrn = path::root_name(current_dir);
+ SmallString<128> curDirRootName(cdrn.begin(), cdrn.end());
+ path::append(curDirRootName, p);
+ // Set path to the result.
+ path.swap(curDirRootName);
+ return success;
+ }
+
+ if (rootName && !rootDirectory) {
+ StringRef pRootName = path::root_name(p);
+ StringRef bRootDirectory = path::root_directory(current_dir);
+ StringRef bRelativePath = path::relative_path(current_dir);
+ StringRef pRelativePath = path::relative_path(p);
+
+ SmallString<128> res;
+ path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
+ path.swap(res);
+ return success;
+ }
+
+ llvm_unreachable("All rootName and rootDirectory combinations should have "
+ "occurred above!");
+}
+
+error_code create_directories(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ StringRef parent = path::parent_path(p);
+ bool parent_exists;
+
+ if (error_code ec = fs::exists(parent, parent_exists)) return ec;
+
+ if (!parent_exists)
+ return create_directories(parent, existed);
+
+ return create_directory(p, existed);
+}
+
+bool exists(file_status status) {
+ return status_known(status) && status.type() != file_type::file_not_found;
+}
+
+bool status_known(file_status s) {
+ return s.type() != file_type::status_error;
+}
+
+bool is_directory(file_status status) {
+ return status.type() == file_type::directory_file;
+}
+
+error_code is_directory(const Twine &path, bool &result) {
+ file_status st;
+ if (error_code ec = status(path, st))
+ return ec;
+ result = is_directory(st);
+ return success;
+}
+
+bool is_regular_file(file_status status) {
+ return status.type() == file_type::regular_file;
+}
+
+error_code is_regular_file(const Twine &path, bool &result) {
+ file_status st;
+ if (error_code ec = status(path, st))
+ return ec;
+ result = is_regular_file(st);
+ return success;
+}
+
+bool is_symlink(file_status status) {
+ return status.type() == file_type::symlink_file;
+}
+
+error_code is_symlink(const Twine &path, bool &result) {
+ file_status st;
+ if (error_code ec = status(path, st))
+ return ec;
+ result = is_symlink(st);
+ return success;
+}
+
+bool is_other(file_status status) {
+ return exists(status) &&
+ !is_regular_file(status) &&
+ !is_directory(status) &&
+ !is_symlink(status);
+}
+
+void directory_entry::replace_filename(const Twine &filename, file_status st,
+ file_status symlink_st) {
+ SmallString<128> path(Path.begin(), Path.end());
+ path::remove_filename(path);
+ path::append(path, filename);
+ Path = path.str();
+ Status = st;
+ SymlinkStatus = symlink_st;
+}
+
+error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
+ SmallString<32> MagicStorage;
+ StringRef Magic = magic.toStringRef(MagicStorage);
+ SmallString<32> Buffer;
+
+ if (error_code ec = get_magic(path, Magic.size(), Buffer)) {
+ if (ec == errc::value_too_large) {
+ // Magic.size() > file_size(Path).
+ result = false;
+ return success;
+ }
+ return ec;
+ }
+
+ result = Magic == Buffer;
+ return success;
+}
+
+error_code identify_magic(const Twine &path, LLVMFileType &result) {
+ SmallString<32> Magic;
+ error_code ec = get_magic(path, Magic.capacity(), Magic);
+ if (ec && ec != errc::value_too_large)
+ return ec;
+
+ result = IdentifyFileType(Magic.data(), Magic.size());
+ return success;
+}
+
+namespace {
+error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) {
+ if (ft == file_type::directory_file) {
+ // This code would be a lot better with exceptions ;/.
+ error_code ec;
+ for (directory_iterator i(path, ec), e; i != e; i.increment(ec)) {
+ if (ec) return ec;
+ file_status st;
+ if (error_code ec = i->status(st)) return ec;
+ if (error_code ec = remove_all_r(i->path(), st.type(), count)) return ec;
+ }
+ bool obviously_this_exists;
+ if (error_code ec = remove(path, obviously_this_exists)) return ec;
+ assert(obviously_this_exists);
+ ++count; // Include the directory itself in the items removed.
+ } else {
+ bool obviously_this_exists;
+ if (error_code ec = remove(path, obviously_this_exists)) return ec;
+ assert(obviously_this_exists);
+ ++count;
+ }
+
+ return success;
+}
+} // end unnamed namespace
+
+error_code remove_all(const Twine &path, uint32_t &num_removed) {
+ SmallString<128> path_storage;
+ StringRef p = path.toStringRef(path_storage);
+
+ file_status fs;
+ if (error_code ec = status(path, fs))
+ return ec;
+ num_removed = 0;
+ return remove_all_r(p, fs.type(), num_removed);
+}
+
+error_code directory_entry::status(file_status &result) const {
+ return fs::status(Path, result);
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
+
+// Include the truly platform-specific parts.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/PathV2.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/PathV2.inc"
+#endif
diff --git a/contrib/llvm/lib/Support/PluginLoader.cpp b/contrib/llvm/lib/Support/PluginLoader.cpp
index 36caecf..2924cfa 100644
--- a/contrib/llvm/lib/Support/PluginLoader.cpp
+++ b/contrib/llvm/lib/Support/PluginLoader.cpp
@@ -15,8 +15,8 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/DynamicLibrary.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
#include <vector>
using namespace llvm;
diff --git a/contrib/llvm/lib/Support/PrettyStackTrace.cpp b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
index 3c8a108..a9f4709 100644
--- a/contrib/llvm/lib/Support/PrettyStackTrace.cpp
+++ b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
@@ -15,8 +15,8 @@
#include "llvm/Config/config.h" // Get autoconf configuration settings
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Signals.h"
-#include "llvm/System/ThreadLocal.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/ThreadLocal.h"
#include "llvm/ADT/SmallString.h"
#ifdef HAVE_CRASHREPORTERCLIENT_H
@@ -55,7 +55,7 @@ static void PrintCurStackTrace(raw_ostream &OS) {
}
// Integrate with crash reporter libraries.
-#if defined (__APPLE__) && defined (HAVE_CRASHREPORTERCLIENT_H)
+#if defined (__APPLE__) && HAVE_CRASHREPORTERCLIENT_H
// If any clients of llvm try to link to libCrashReporterClient.a themselves,
// only one crash info struct will be used.
extern "C" {
@@ -64,7 +64,7 @@ struct crashreporter_annotations_t gCRAnnotations
__attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION)))
= { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0 };
}
-#elif defined (__APPLE__)
+#elif defined (__APPLE__) && HAVE_CRASHREPORTER_INFO
static const char *__crashreporter_info__ = 0;
asm(".desc ___crashreporter_info__, 0x10");
#endif
@@ -86,11 +86,11 @@ static void CrashHandler(void *) {
}
if (!TmpStr.empty()) {
-#ifndef HAVE_CRASHREPORTERCLIENT_H
- __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
-#else
+#ifdef HAVE_CRASHREPORTERCLIENT_H
// Cast to void to avoid warning.
(void)CRSetCrashLogMessage(std::string(TmpStr.str()).c_str());
+#elif HAVE_CRASHREPORTER_INFO
+ __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
#endif
errs() << TmpStr.str();
}
@@ -107,7 +107,7 @@ static bool RegisterCrashPrinter() {
PrettyStackTraceEntry::PrettyStackTraceEntry() {
// The first time this is called, we register the crash printer.
static bool HandlerRegistered = RegisterCrashPrinter();
- HandlerRegistered = HandlerRegistered;
+ (void)HandlerRegistered;
// Link ourselves.
NextEntry = PrettyStackTraceHead.get();
@@ -131,4 +131,3 @@ void PrettyStackTraceProgram::print(raw_ostream &OS) const {
OS << ArgV[i] << ' ';
OS << '\n';
}
-
diff --git a/contrib/llvm/lib/System/Process.cpp b/contrib/llvm/lib/Support/Process.cpp
index e93b2af..88ca7c3 100644
--- a/contrib/llvm/lib/System/Process.cpp
+++ b/contrib/llvm/lib/Support/Process.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/Process.h"
+#include "llvm/Support/Process.h"
#include "llvm/Config/config.h"
namespace llvm {
@@ -29,5 +29,5 @@ using namespace sys;
#include "Unix/Process.inc"
#endif
#ifdef LLVM_ON_WIN32
-#include "Win32/Process.inc"
+#include "Windows/Process.inc"
#endif
diff --git a/contrib/llvm/lib/System/Program.cpp b/contrib/llvm/lib/Support/Program.cpp
index cd58c2c..01860b0 100644
--- a/contrib/llvm/lib/System/Program.cpp
+++ b/contrib/llvm/lib/Support/Program.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/Program.h"
+#include "llvm/Support/Program.h"
#include "llvm/Config/config.h"
using namespace llvm;
using namespace sys;
@@ -31,7 +31,7 @@ Program::ExecuteAndWait(const Path& path,
std::string* ErrMsg) {
Program prg;
if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg))
- return prg.Wait(secondsToWait, ErrMsg);
+ return prg.Wait(path, secondsToWait, ErrMsg);
else
return -1;
}
@@ -52,5 +52,5 @@ Program::ExecuteNoWait(const Path& path,
#include "Unix/Program.inc"
#endif
#ifdef LLVM_ON_WIN32
-#include "Win32/Program.inc"
+#include "Windows/Program.inc"
#endif
diff --git a/contrib/llvm/lib/System/RWMutex.cpp b/contrib/llvm/lib/Support/RWMutex.cpp
index deb0470..fc02f9c 100644
--- a/contrib/llvm/lib/System/RWMutex.cpp
+++ b/contrib/llvm/lib/Support/RWMutex.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Config/config.h"
-#include "llvm/System/RWMutex.h"
+#include "llvm/Support/RWMutex.h"
#include <cstring>
//===----------------------------------------------------------------------===//
@@ -150,7 +150,7 @@ RWMutexImpl::writer_release()
#elif defined(LLVM_ON_UNIX)
#include "Unix/RWMutex.inc"
#elif defined( LLVM_ON_WIN32)
-#include "Win32/RWMutex.inc"
+#include "Windows/RWMutex.inc"
#else
#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
#endif
diff --git a/contrib/llvm/lib/System/SearchForAddressOfSpecialSymbol.cpp b/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp
index 73b484c..d638301 100644
--- a/contrib/llvm/lib/System/SearchForAddressOfSpecialSymbol.cpp
+++ b/contrib/llvm/lib/Support/SearchForAddressOfSpecialSymbol.cpp
@@ -32,7 +32,6 @@ static void *DoSearch(const char* symbolName) {
EXPLICIT_SYMBOL(__ashrdi3);
EXPLICIT_SYMBOL(__cmpdi2);
EXPLICIT_SYMBOL(__divdi3);
- EXPLICIT_SYMBOL(__eprintf);
EXPLICIT_SYMBOL(__fixdfdi);
EXPLICIT_SYMBOL(__fixsfdi);
EXPLICIT_SYMBOL(__fixunsdfdi);
@@ -43,6 +42,16 @@ static void *DoSearch(const char* symbolName) {
EXPLICIT_SYMBOL(__moddi3);
EXPLICIT_SYMBOL(__udivdi3);
EXPLICIT_SYMBOL(__umoddi3);
+
+ // __eprintf is sometimes used for assert() handling on x86.
+ //
+ // FIXME: Currently disabled when using Clang, as we don't always have our
+ // runtime support libraries available.
+#ifndef __clang__
+#ifdef __i386__
+ EXPLICIT_SYMBOL(__eprintf);
+#endif
+#endif
}
#endif
diff --git a/contrib/llvm/lib/System/Signals.cpp b/contrib/llvm/lib/Support/Signals.cpp
index d345b0a..a3af37d 100644
--- a/contrib/llvm/lib/System/Signals.cpp
+++ b/contrib/llvm/lib/Support/Signals.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/Signals.h"
+#include "llvm/Support/Signals.h"
#include "llvm/Config/config.h"
namespace llvm {
@@ -30,5 +30,5 @@ using namespace sys;
#include "Unix/Signals.inc"
#endif
#ifdef LLVM_ON_WIN32
-#include "Win32/Signals.inc"
+#include "Windows/Signals.inc"
#endif
diff --git a/contrib/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm/lib/Support/SourceMgr.cpp
index da5681c..ef09916 100644
--- a/contrib/llvm/lib/Support/SourceMgr.cpp
+++ b/contrib/llvm/lib/Support/SourceMgr.cpp
@@ -13,9 +13,12 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
using namespace llvm;
namespace {
@@ -47,18 +50,18 @@ SourceMgr::~SourceMgr() {
/// ~0, otherwise it returns the buffer ID of the stacked file.
unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
SMLoc IncludeLoc) {
-
- MemoryBuffer *NewBuf = MemoryBuffer::getFile(Filename.c_str());
+ OwningPtr<MemoryBuffer> NewBuf;
+ MemoryBuffer::getFile(Filename.c_str(), NewBuf);
// If the file didn't exist directly, see if it's in an include path.
for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
std::string IncFile = IncludeDirectories[i] + "/" + Filename;
- NewBuf = MemoryBuffer::getFile(IncFile.c_str());
+ MemoryBuffer::getFile(IncFile.c_str(), NewBuf);
}
if (NewBuf == 0) return ~0U;
- return AddNewSourceBuffer(NewBuf, IncludeLoc);
+ return AddNewSourceBuffer(NewBuf.take(), IncludeLoc);
}
@@ -135,7 +138,7 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
///
/// @param Type - If non-null, the kind of message (e.g., "error") which is
/// prefixed to the message.
-SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
+SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const Twine &Msg,
const char *Type, bool ShowLine) const {
// First thing to do: find the current buffer containing the specified
@@ -162,27 +165,25 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg,
}
std::string PrintedMsg;
- if (Type) {
- PrintedMsg = Type;
- PrintedMsg += ": ";
- }
- PrintedMsg += Msg;
+ raw_string_ostream OS(PrintedMsg);
+ if (Type)
+ OS << Type << ": ";
+ OS << Msg;
return SMDiagnostic(*this, Loc,
CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf),
- Loc.getPointer()-LineStart, PrintedMsg,
+ Loc.getPointer()-LineStart, OS.str(),
LineStr, ShowLine);
}
-void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg,
+void SourceMgr::PrintMessage(SMLoc Loc, const Twine &Msg,
const char *Type, bool ShowLine) const {
// Report the message with the diagnostic handler if present.
if (DiagHandler) {
- DiagHandler(GetMessage(Loc, Msg, Type, ShowLine),
- DiagContext, DiagLocCookie);
+ DiagHandler(GetMessage(Loc, Msg, Type, ShowLine), DiagContext);
return;
}
-
+
raw_ostream &OS = errs();
int CurBuf = FindBufferContainingLoc(Loc);
diff --git a/contrib/llvm/lib/Support/Statistic.cpp b/contrib/llvm/lib/Support/Statistic.cpp
index e32ab74..f0ed626 100644
--- a/contrib/llvm/lib/Support/Statistic.cpp
+++ b/contrib/llvm/lib/Support/Statistic.cpp
@@ -26,7 +26,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include "llvm/ADT/StringExtras.h"
#include <algorithm>
#include <cstring>
diff --git a/contrib/llvm/lib/Support/StringMap.cpp b/contrib/llvm/lib/Support/StringMap.cpp
index 6f28277..90ec299 100644
--- a/contrib/llvm/lib/Support/StringMap.cpp
+++ b/contrib/llvm/lib/Support/StringMap.cpp
@@ -155,7 +155,7 @@ int StringMapImpl::FindKey(StringRef Key) const {
void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
const char *VStr = (char*)V + ItemSize;
StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
- V2 = V2;
+ (void)V2;
assert(V == V2 && "Didn't find key?");
}
diff --git a/contrib/llvm/lib/Support/StringRef.cpp b/contrib/llvm/lib/Support/StringRef.cpp
index 46f26b2..5398051 100644
--- a/contrib/llvm/lib/Support/StringRef.cpp
+++ b/contrib/llvm/lib/Support/StringRef.cpp
@@ -9,6 +9,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/OwningPtr.h"
#include <bitset>
using namespace llvm;
@@ -67,8 +68,9 @@ int StringRef::compare_numeric(StringRef RHS) const {
}
// Compute the edit distance between the two given strings.
-unsigned StringRef::edit_distance(llvm::StringRef Other,
- bool AllowReplacements) {
+unsigned StringRef::edit_distance(llvm::StringRef Other,
+ bool AllowReplacements,
+ unsigned MaxEditDistance) {
// The algorithm implemented below is the "classic"
// dynamic-programming algorithm for computing the Levenshtein
// distance, which is described here:
@@ -83,17 +85,21 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
const unsigned SmallBufferSize = 64;
unsigned SmallBuffer[SmallBufferSize];
- unsigned *Allocated = 0;
+ llvm::OwningArrayPtr<unsigned> Allocated;
unsigned *previous = SmallBuffer;
- if (2*(n + 1) > SmallBufferSize)
- Allocated = previous = new unsigned [2*(n+1)];
+ if (2*(n + 1) > SmallBufferSize) {
+ previous = new unsigned [2*(n+1)];
+ Allocated.reset(previous);
+ }
unsigned *current = previous + (n + 1);
-
- for (unsigned i = 0; i <= n; ++i)
+
+ for (unsigned i = 0; i <= n; ++i)
previous[i] = i;
for (size_type y = 1; y <= m; ++y) {
current[0] = y;
+ unsigned BestThisRow = current[0];
+
for (size_type x = 1; x <= n; ++x) {
if (AllowReplacements) {
current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
@@ -103,16 +109,18 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
else current[x] = min(current[x-1], previous[x]) + 1;
}
+ BestThisRow = min(BestThisRow, current[x]);
}
-
+
+ if (MaxEditDistance && BestThisRow > MaxEditDistance)
+ return MaxEditDistance + 1;
+
unsigned *tmp = current;
current = previous;
previous = tmp;
}
unsigned Result = previous[n];
- delete [] Allocated;
-
return Result;
}
@@ -192,6 +200,21 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
return npos;
}
+/// find_last_of - Find the last character in the string that is in \arg C,
+/// or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_last_of(StringRef Chars,
+ size_t From) const {
+ std::bitset<1 << CHAR_BIT> CharBits;
+ for (size_type i = 0; i != Chars.size(); ++i)
+ CharBits.set((unsigned char)Chars[i]);
+
+ for (size_type i = min(From, Length) - 1, e = -1; i != e; --i)
+ if (CharBits.test((unsigned char)Data[i]))
+ return i;
+ return npos;
+}
//===----------------------------------------------------------------------===//
// Helpful Algorithms
@@ -232,10 +255,10 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
// Autosense radix if not specified.
if (Radix == 0)
Radix = GetAutoSenseRadix(Str);
-
+
// Empty strings (after the radix autosense) are invalid.
if (Str.empty()) return true;
-
+
// Parse all the bytes of the string given this radix. Watch for overflow.
Result = 0;
while (!Str.empty()) {
@@ -248,23 +271,23 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
CharVal = Str[0]-'A'+10;
else
return true;
-
+
// If the parsed value is larger than the integer radix, the string is
// invalid.
if (CharVal >= Radix)
return true;
-
+
// Add in this character.
unsigned long long PrevResult = Result;
Result = Result*Radix+CharVal;
-
+
// Check for overflow.
if (Result < PrevResult)
return true;
Str = Str.substr(1);
}
-
+
return false;
}
@@ -275,7 +298,7 @@ bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
unsigned long long ULLVal;
-
+
// Handle positive strings first.
if (empty() || front() != '-') {
if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
@@ -285,7 +308,7 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
Result = ULLVal;
return false;
}
-
+
// Get the positive part of the value.
if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
// Reject values so large they'd overflow as negative signed, but allow
@@ -293,7 +316,7 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
// on signed overflow.
(long long)-ULLVal > 0)
return true;
-
+
Result = -ULLVal;
return false;
}
@@ -314,7 +337,7 @@ bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
return true;
Result = Val;
return false;
-}
+}
bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
StringRef Str = *this;
@@ -324,7 +347,7 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
Radix = GetAutoSenseRadix(Str);
assert(Radix > 1 && Radix <= 36);
-
+
// Empty strings (after the radix autosense) are invalid.
if (Str.empty()) return true;
@@ -348,7 +371,7 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
if (BitWidth < Result.getBitWidth())
BitWidth = Result.getBitWidth(); // don't shrink the result
else
- Result.zext(BitWidth);
+ Result = Result.zext(BitWidth);
APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
if (!IsPowerOf2Radix) {
@@ -369,12 +392,12 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
CharVal = Str[0]-'A'+10;
else
return true;
-
+
// If the parsed value is larger than the integer radix, the string is
// invalid.
if (CharVal >= Radix)
return true;
-
+
// Add in this character.
if (IsPowerOf2Radix) {
Result <<= Log2Radix;
@@ -387,6 +410,6 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
Str = Str.substr(1);
}
-
+
return false;
}
diff --git a/contrib/llvm/lib/Support/SystemUtils.cpp b/contrib/llvm/lib/Support/SystemUtils.cpp
index c8b260c..54b5e97 100644
--- a/contrib/llvm/lib/Support/SystemUtils.cpp
+++ b/contrib/llvm/lib/Support/SystemUtils.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/SystemUtils.h"
-#include "llvm/System/Process.h"
-#include "llvm/System/Program.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -23,43 +23,33 @@ bool llvm::CheckBitcodeOutputToConsole(raw_ostream &stream_to_check,
if (stream_to_check.is_displayed()) {
if (print_warning) {
errs() << "WARNING: You're attempting to print out a bitcode file.\n"
- << "This is inadvisable as it may cause display problems. If\n"
- << "you REALLY want to taste LLVM bitcode first-hand, you\n"
- << "can force output with the `-f' option.\n\n";
+ "This is inadvisable as it may cause display problems. If\n"
+ "you REALLY want to taste LLVM bitcode first-hand, you\n"
+ "can force output with the `-f' option.\n\n";
}
return true;
}
return false;
}
-/// FindExecutable - Find a named executable, giving the argv[0] of program
-/// being executed. This allows us to find another LLVM tool if it is built in
-/// the same directory. If the executable cannot be found, return an
-/// empty string.
+/// PrependMainExecutablePath - Prepend the path to the program being executed
+/// to \p ExeName, given the value of argv[0] and the address of main()
+/// itself. This allows us to find another LLVM tool if it is built in the same
+/// directory. An empty string is returned on error; note that this function
+/// just mainpulates the path and doesn't check for executability.
/// @brief Find a named executable.
-#undef FindExecutable // needed on windows :(
-sys::Path llvm::FindExecutable(const std::string &ExeName,
- const char *Argv0, void *MainAddr) {
+sys::Path llvm::PrependMainExecutablePath(const std::string &ExeName,
+ const char *Argv0, void *MainAddr) {
// Check the directory that the calling program is in. We can do
// this if ProgramPath contains at least one / character, indicating that it
// is a relative path to the executable itself.
sys::Path Result = sys::Path::GetMainExecutable(Argv0, MainAddr);
Result.eraseComponent();
+
if (!Result.isEmpty()) {
Result.appendComponent(ExeName);
- if (Result.canExecute())
- return Result;
- // If the path is absolute (and it usually is), call FindProgramByName to
- // allow it to try platform-specific logic, such as appending a .exe suffix
- // on Windows. Don't do this if we somehow have a relative path, because
- // we don't want to go searching the PATH and accidentally find an unrelated
- // version of the program.
- if (Result.isAbsolute()) {
- Result = sys::Program::FindProgramByName(Result.str());
- if (!Result.empty())
- return Result;
- }
+ Result.appendSuffix(sys::Path::GetEXESuffix());
}
- return sys::Path();
+ return Result;
}
diff --git a/contrib/llvm/lib/Support/TargetRegistry.cpp b/contrib/llvm/lib/Support/TargetRegistry.cpp
index 5896447..293a5d7 100644
--- a/contrib/llvm/lib/Support/TargetRegistry.cpp
+++ b/contrib/llvm/lib/Support/TargetRegistry.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetRegistry.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/Host.h"
#include <cassert>
using namespace llvm;
diff --git a/contrib/llvm/lib/System/ThreadLocal.cpp b/contrib/llvm/lib/Support/ThreadLocal.cpp
index f6a55a1..6b43048 100644
--- a/contrib/llvm/lib/System/ThreadLocal.cpp
+++ b/contrib/llvm/lib/Support/ThreadLocal.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Config/config.h"
-#include "llvm/System/ThreadLocal.h"
+#include "llvm/Support/ThreadLocal.h"
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only TRULY operating system
@@ -77,9 +77,8 @@ void ThreadLocalImpl::removeInstance() {
#elif defined(LLVM_ON_UNIX)
#include "Unix/ThreadLocal.inc"
#elif defined( LLVM_ON_WIN32)
-#include "Win32/ThreadLocal.inc"
+#include "Windows/ThreadLocal.inc"
#else
#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/ThreadLocal.cpp
#endif
#endif
-
diff --git a/contrib/llvm/lib/System/Threading.cpp b/contrib/llvm/lib/Support/Threading.cpp
index 466c468..2957956 100644
--- a/contrib/llvm/lib/System/Threading.cpp
+++ b/contrib/llvm/lib/Support/Threading.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/System/Threading.cpp- Control multithreading mode --*- C++ -*-==//
+//===-- llvm/Support/Threading.cpp- Control multithreading mode --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/Threading.h"
-#include "llvm/System/Atomic.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Mutex.h"
#include "llvm/Config/config.h"
#include <cassert>
@@ -28,7 +28,7 @@ bool llvm::llvm_start_multithreaded() {
assert(!multithreaded_mode && "Already multithreaded!");
multithreaded_mode = true;
global_lock = new sys::Mutex(true);
-
+
// We fence here to ensure that all initialization is complete BEFORE we
// return from llvm_start_multithreaded().
sys::MemoryFence();
@@ -41,11 +41,11 @@ bool llvm::llvm_start_multithreaded() {
void llvm::llvm_stop_multithreaded() {
#ifdef LLVM_MULTITHREADED
assert(multithreaded_mode && "Not currently multithreaded!");
-
+
// We fence here to insure that all threaded operations are complete BEFORE we
// return from llvm_stop_multithreaded().
sys::MemoryFence();
-
+
multithreaded_mode = false;
delete global_lock;
#endif
@@ -62,3 +62,55 @@ void llvm::llvm_acquire_global_lock() {
void llvm::llvm_release_global_lock() {
if (multithreaded_mode) global_lock->release();
}
+
+#if defined(LLVM_MULTITHREADED) && defined(HAVE_PTHREAD_H)
+#include <pthread.h>
+
+struct ThreadInfo {
+ void (*UserFn)(void *);
+ void *UserData;
+};
+static void *ExecuteOnThread_Dispatch(void *Arg) {
+ ThreadInfo *TI = reinterpret_cast<ThreadInfo*>(Arg);
+ TI->UserFn(TI->UserData);
+ return 0;
+}
+
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
+ ThreadInfo Info = { Fn, UserData };
+ pthread_attr_t Attr;
+ pthread_t Thread;
+
+ // Construct the attributes object.
+ if (::pthread_attr_init(&Attr) != 0)
+ return;
+
+ // Set the requested stack size, if given.
+ if (RequestedStackSize != 0) {
+ if (::pthread_attr_setstacksize(&Attr, RequestedStackSize) != 0)
+ goto error;
+ }
+
+ // Construct and execute the thread.
+ if (::pthread_create(&Thread, &Attr, ExecuteOnThread_Dispatch, &Info) != 0)
+ goto error;
+
+ // Wait for the thread and clean up.
+ ::pthread_join(Thread, 0);
+
+ error:
+ ::pthread_attr_destroy(&Attr);
+}
+
+#else
+
+// No non-pthread implementation, currently.
+
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
+ (void) RequestedStackSize;
+ Fn(UserData);
+}
+
+#endif
diff --git a/contrib/llvm/lib/System/TimeValue.cpp b/contrib/llvm/lib/Support/TimeValue.cpp
index cf4984c..1a0f7bc 100644
--- a/contrib/llvm/lib/System/TimeValue.cpp
+++ b/contrib/llvm/lib/Support/TimeValue.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/TimeValue.h"
+#include "llvm/Support/TimeValue.h"
#include "llvm/Config/config.h"
namespace llvm {
@@ -53,6 +53,5 @@ TimeValue::normalize( void ) {
#include "Unix/TimeValue.inc"
#endif
#ifdef LLVM_ON_WIN32
-#include "Win32/TimeValue.inc"
+#include "Windows/TimeValue.inc"
#endif
-
diff --git a/contrib/llvm/lib/Support/Timer.cpp b/contrib/llvm/lib/Support/Timer.cpp
index 44ee177..a9ed5ee 100644
--- a/contrib/llvm/lib/Support/Timer.cpp
+++ b/contrib/llvm/lib/Support/Timer.cpp
@@ -17,8 +17,8 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Format.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/Process.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Process.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringMap.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Support/ToolOutputFile.cpp b/contrib/llvm/lib/Support/ToolOutputFile.cpp
new file mode 100644
index 0000000..e7ca927
--- /dev/null
+++ b/contrib/llvm/lib/Support/ToolOutputFile.cpp
@@ -0,0 +1,43 @@
+//===--- ToolOutputFile.cpp - Implement the tool_output_file class --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the tool_output_file class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+using namespace llvm;
+
+tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename)
+ : Filename(filename), Keep(false) {
+ // Arrange for the file to be deleted if the process is killed.
+ if (Filename != "-")
+ sys::RemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::CleanupInstaller::~CleanupInstaller() {
+ // Delete the file if the client hasn't told us not to.
+ if (!Keep && Filename != "-")
+ sys::Path(Filename).eraseFromDisk();
+
+ // Ok, the file is successfully written and closed, or deleted. There's no
+ // further need to clean it up on signals.
+ if (Filename != "-")
+ sys::DontRemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo,
+ unsigned Flags)
+ : Installer(filename),
+ OS(filename, ErrorInfo, Flags) {
+ // If open fails, no cleanup is needed.
+ if (!ErrorInfo.empty())
+ Installer.Keep = true;
+}
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
index 3a95b65..36edf6e 100644
--- a/contrib/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Twine.h"
#include <cassert>
#include <cstring>
@@ -21,7 +22,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
switch (Kind) {
case InvalidArch: return "<invalid>";
case UnknownArch: return "unknown";
-
+
case alpha: return "alpha";
case arm: return "arm";
case bfin: return "bfin";
@@ -29,7 +30,6 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case mips: return "mips";
case mipsel: return "mipsel";
case msp430: return "msp430";
- case pic16: return "pic16";
case ppc64: return "powerpc64";
case ppc: return "powerpc";
case sparc: return "sparc";
@@ -41,6 +41,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case x86_64: return "x86_64";
case xcore: return "xcore";
case mblaze: return "mblaze";
+ case ptx: return "ptx";
}
return "<invalid>";
@@ -70,7 +71,10 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case x86:
case x86_64: return "x86";
+
case xcore: return "xcore";
+
+ case ptx: return "ptx";
}
}
@@ -97,7 +101,6 @@ const char *Triple::getOSTypeName(OSType Kind) {
case Linux: return "linux";
case Lv2: return "lv2";
case MinGW32: return "mingw32";
- case MinGW64: return "mingw64";
case NetBSD: return "netbsd";
case OpenBSD: return "openbsd";
case Psp: return "psp";
@@ -110,6 +113,18 @@ const char *Triple::getOSTypeName(OSType Kind) {
return "<invalid>";
}
+const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
+ switch (Kind) {
+ case UnknownEnvironment: return "unknown";
+ case GNU: return "gnu";
+ case GNUEABI: return "gnueabi";
+ case EABI: return "eabi";
+ case MachO: return "macho";
+ }
+
+ return "<invalid>";
+}
+
Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
if (Name == "alpha")
return alpha;
@@ -125,8 +140,6 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
return mipsel;
if (Name == "msp430")
return msp430;
- if (Name == "pic16")
- return pic16;
if (Name == "ppc64")
return ppc64;
if (Name == "ppc")
@@ -149,6 +162,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
return x86_64;
if (Name == "xcore")
return xcore;
+ if (Name == "ptx")
+ return ptx;
return UnknownArch;
}
@@ -187,6 +202,9 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
Str == "armv6" || Str == "armv7")
return Triple::arm;
+ if (Str == "ptx")
+ return Triple::ptx;
+
return Triple::UnknownArch;
}
@@ -210,28 +228,29 @@ const char *Triple::getArchNameForAssembler() {
return "arm";
if (Str == "armv4t" || Str == "thumbv4t")
return "armv4t";
- if (Str == "armv5" || Str == "armv5e" || Str == "thumbv5" || Str == "thumbv5e")
+ if (Str == "armv5" || Str == "armv5e" || Str == "thumbv5"
+ || Str == "thumbv5e")
return "armv5";
if (Str == "armv6" || Str == "thumbv6")
return "armv6";
if (Str == "armv7" || Str == "thumbv7")
return "armv7";
+ if (Str == "ptx")
+ return "ptx";
return NULL;
}
//
Triple::ArchType Triple::ParseArch(StringRef ArchName) {
- if (ArchName.size() == 4 && ArchName[0] == 'i' &&
- ArchName[2] == '8' && ArchName[3] == '6' &&
+ if (ArchName.size() == 4 && ArchName[0] == 'i' &&
+ ArchName[2] == '8' && ArchName[3] == '6' &&
ArchName[1] - '3' < 6) // i[3-9]86
return x86;
else if (ArchName == "amd64" || ArchName == "x86_64")
return x86_64;
else if (ArchName == "bfin")
return bfin;
- else if (ArchName == "pic16")
- return pic16;
else if (ArchName == "powerpc")
return ppc;
else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
@@ -266,6 +285,8 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) {
return tce;
else if (ArchName == "xcore")
return xcore;
+ else if (ArchName == "ptx")
+ return ptx;
else
return UnknownArch;
}
@@ -296,8 +317,6 @@ Triple::OSType Triple::ParseOS(StringRef OSName) {
return Lv2;
else if (OSName.startswith("mingw32"))
return MinGW32;
- else if (OSName.startswith("mingw64"))
- return MinGW64;
else if (OSName.startswith("netbsd"))
return NetBSD;
else if (OSName.startswith("openbsd"))
@@ -316,12 +335,26 @@ Triple::OSType Triple::ParseOS(StringRef OSName) {
return UnknownOS;
}
+Triple::EnvironmentType Triple::ParseEnvironment(StringRef EnvironmentName) {
+ if (EnvironmentName.startswith("eabi"))
+ return EABI;
+ else if (EnvironmentName.startswith("gnueabi"))
+ return GNUEABI;
+ else if (EnvironmentName.startswith("gnu"))
+ return GNU;
+ else if (EnvironmentName.startswith("macho"))
+ return MachO;
+ else
+ return UnknownEnvironment;
+}
+
void Triple::Parse() const {
assert(!isInitialized() && "Invalid parse call.");
Arch = ParseArch(getArchName());
Vendor = ParseVendor(getVendorName());
OS = ParseOS(getOSName());
+ Environment = ParseEnvironment(getEnvironmentName());
assert(isInitialized() && "Failed to initialize!");
}
@@ -348,24 +381,28 @@ std::string Triple::normalize(StringRef Str) {
OSType OS = UnknownOS;
if (Components.size() > 2)
OS = ParseOS(Components[2]);
+ EnvironmentType Environment = UnknownEnvironment;
+ if (Components.size() > 3)
+ Environment = ParseEnvironment(Components[3]);
// Note which components are already in their final position. These will not
// be moved.
- bool Found[3];
+ bool Found[4];
Found[0] = Arch != UnknownArch;
Found[1] = Vendor != UnknownVendor;
Found[2] = OS != UnknownOS;
+ Found[3] = Environment != UnknownEnvironment;
// If they are not there already, permute the components into their canonical
// positions by seeing if they parse as a valid architecture, and if so moving
// the component to the architecture position etc.
- for (unsigned Pos = 0; Pos != 3; ++Pos) {
+ for (unsigned Pos = 0; Pos != array_lengthof(Found); ++Pos) {
if (Found[Pos])
continue; // Already in the canonical position.
for (unsigned Idx = 0; Idx != Components.size(); ++Idx) {
// Do not reparse any components that already matched.
- if (Idx < 3 && Found[Idx])
+ if (Idx < array_lengthof(Found) && Found[Idx])
continue;
// Does this component parse as valid for the target position?
@@ -386,6 +423,10 @@ std::string Triple::normalize(StringRef Str) {
OS = ParseOS(Comp);
Valid = OS != UnknownOS;
break;
+ case 3:
+ Environment = ParseEnvironment(Comp);
+ Valid = Environment != UnknownEnvironment;
+ break;
}
if (!Valid)
continue; // Nope, try the next component.
@@ -404,7 +445,7 @@ std::string Triple::normalize(StringRef Str) {
// components to the right.
for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
// Skip over any fixed components.
- while (i < 3 && Found[i]) ++i;
+ while (i < array_lengthof(Found) && Found[i]) ++i;
// Place the component at the new position, getting the component
// that was at this position - it will be moved right.
std::swap(CurrentComponent, Components[i]);
@@ -416,22 +457,23 @@ std::string Triple::normalize(StringRef Str) {
do {
// Insert one empty component at Idx.
StringRef CurrentComponent(""); // The empty component.
- for (unsigned i = Idx; i < Components.size(); ++i) {
- // Skip over any fixed components.
- while (i < 3 && Found[i]) ++i;
+ for (unsigned i = Idx; i < Components.size();) {
// Place the component at the new position, getting the component
// that was at this position - it will be moved right.
std::swap(CurrentComponent, Components[i]);
// If it was placed on top of an empty component then we are done.
if (CurrentComponent.empty())
break;
+ // Advance to the next component, skipping any fixed components.
+ while (++i < array_lengthof(Found) && Found[i])
+ ;
}
// The last component was pushed off the end - append it.
if (!CurrentComponent.empty())
Components.push_back(CurrentComponent);
// Advance Idx to the component's new position.
- while (++Idx < 3 && Found[Idx]) {}
+ while (++Idx < array_lengthof(Found) && Found[Idx]) {}
} while (Idx < Pos); // Add more until the final position is reached.
}
assert(Pos < Components.size() && Components[Pos] == Comp &&
@@ -482,17 +524,17 @@ StringRef Triple::getOSAndEnvironmentName() const {
static unsigned EatNumber(StringRef &Str) {
assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number");
unsigned Result = Str[0]-'0';
-
+
// Eat the digit.
Str = Str.substr(1);
-
+
// Handle "darwin11".
if (Result == 1 && !Str.empty() && Str[0] >= '0' && Str[0] <= '9') {
Result = Result*10 + (Str[0] - '0');
// Eat the digit.
Str = Str.substr(1);
}
-
+
return Result;
}
@@ -505,10 +547,10 @@ void Triple::getDarwinNumber(unsigned &Maj, unsigned &Min,
assert(getOS() == Darwin && "Not a darwin target triple!");
StringRef OSName = getOSName();
assert(OSName.startswith("darwin") && "Unknown darwin target triple!");
-
+
// Strip off "darwin".
OSName = OSName.substr(6);
-
+
Maj = Min = Revision = 0;
if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
@@ -517,27 +559,27 @@ void Triple::getDarwinNumber(unsigned &Maj, unsigned &Min,
// The major version is the first digit.
Maj = EatNumber(OSName);
if (OSName.empty()) return;
-
+
// Handle minor version: 10.4.9 -> darwin8.9.
if (OSName[0] != '.')
return;
-
+
// Eat the '.'.
OSName = OSName.substr(1);
if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
return;
-
+
Min = EatNumber(OSName);
if (OSName.empty()) return;
// Handle revision darwin8.9.1
if (OSName[0] != '.')
return;
-
+
// Eat the '.'.
OSName = OSName.substr(1);
-
+
if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
return;
@@ -561,6 +603,10 @@ void Triple::setOS(OSType Kind) {
setOSName(getOSTypeName(Kind));
}
+void Triple::setEnvironment(EnvironmentType Kind) {
+ setEnvironmentName(getEnvironmentTypeName(Kind));
+}
+
void Triple::setArchName(StringRef Str) {
// Work around a miscompilation bug for Twines in gcc 4.0.3.
SmallString<64> Triple;
diff --git a/contrib/llvm/lib/Support/Twine.cpp b/contrib/llvm/lib/Support/Twine.cpp
index b3ea013..75cea29 100644
--- a/contrib/llvm/lib/Support/Twine.cpp
+++ b/contrib/llvm/lib/Support/Twine.cpp
@@ -30,22 +30,42 @@ StringRef Twine::toStringRef(SmallVectorImpl<char> &Out) const {
return StringRef(Out.data(), Out.size());
}
-void Twine::printOneChild(raw_ostream &OS, const void *Ptr,
+StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
+ if (isUnary()) {
+ switch (getLHSKind()) {
+ case CStringKind:
+ // Already null terminated, yay!
+ return StringRef(static_cast<const char*>(LHS));
+ case StdStringKind: {
+ const std::string *str = static_cast<const std::string*>(LHS);
+ return StringRef(str->c_str(), str->size());
+ }
+ default:
+ break;
+ }
+ }
+ toVector(Out);
+ Out.push_back(0);
+ Out.pop_back();
+ return StringRef(Out.data(), Out.size());
+}
+
+void Twine::printOneChild(raw_ostream &OS, const void *Ptr,
NodeKind Kind) const {
switch (Kind) {
case Twine::NullKind: break;
case Twine::EmptyKind: break;
case Twine::TwineKind:
- static_cast<const Twine*>(Ptr)->print(OS);
+ static_cast<const Twine*>(Ptr)->print(OS);
break;
- case Twine::CStringKind:
- OS << static_cast<const char*>(Ptr);
+ case Twine::CStringKind:
+ OS << static_cast<const char*>(Ptr);
break;
case Twine::StdStringKind:
- OS << *static_cast<const std::string*>(Ptr);
+ OS << *static_cast<const std::string*>(Ptr);
break;
case Twine::StringRefKind:
- OS << *static_cast<const StringRef*>(Ptr);
+ OS << *static_cast<const StringRef*>(Ptr);
break;
case Twine::DecUIKind:
OS << (unsigned)(uintptr_t)Ptr;
@@ -71,7 +91,7 @@ void Twine::printOneChild(raw_ostream &OS, const void *Ptr,
}
}
-void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr,
+void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr,
NodeKind Kind) const {
switch (Kind) {
case Twine::NullKind:
diff --git a/contrib/llvm/lib/System/Unix/Host.inc b/contrib/llvm/lib/Support/Unix/Host.inc
index c76d6a4..ed74b67 100644
--- a/contrib/llvm/lib/System/Unix/Host.inc
+++ b/contrib/llvm/lib/Support/Unix/Host.inc
@@ -1,4 +1,4 @@
- //===- llvm/System/Unix/Host.inc -------------------------------*- C++ -*-===//
+ //===- llvm/Support/Unix/Host.inc -------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringRef.h"
#include "Unix.h"
#include <sys/utsname.h>
+#include <cctype>
#include <string>
using namespace llvm;
@@ -39,7 +40,7 @@ std::string sys::getHostTriple() {
StringRef HostTripleString(LLVM_HOSTTRIPLE);
std::pair<StringRef, StringRef> ArchSplit = HostTripleString.split('-');
-
+
// Normalize the arch, since the host triple may not actually match the host.
std::string Arch = ArchSplit.first;
@@ -77,16 +78,16 @@ std::string sys::getHostTriple() {
Triple += ArchSplit.second;
// Force i<N>86 to i386.
- if (Triple[0] == 'i' && isdigit(Triple[1]) &&
+ if (Triple[0] == 'i' && isdigit(Triple[1]) &&
Triple[2] == '8' && Triple[3] == '6')
Triple[1] = '3';
// On darwin, we want to update the version to match that of the
- // host.
+ // host.
std::string::size_type DarwinDashIdx = Triple.find("-darwin");
if (DarwinDashIdx != std::string::npos) {
Triple.resize(DarwinDashIdx + strlen("-darwin"));
-
+
// Only add the major part of the os version.
std::string Version = getOSVersion();
Triple += Version.substr(0, Version.find('.'));
diff --git a/contrib/llvm/lib/System/Unix/Memory.inc b/contrib/llvm/lib/Support/Unix/Memory.inc
index 1b038f9..4312d67 100644
--- a/contrib/llvm/lib/System/Unix/Memory.inc
+++ b/contrib/llvm/lib/Support/Unix/Memory.inc
@@ -1,10 +1,10 @@
//===- Unix/Memory.cpp - Generic UNIX System Configuration ------*- C++ -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines some functions for various memory management utilities.
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "Unix.h"
-#include "llvm/System/DataTypes.h"
-#include "llvm/System/Process.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Process.h"
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
@@ -28,7 +28,7 @@
/// to emit code to the memory then jump to it. Getting this type of memory
/// is very OS specific.
///
-llvm::sys::MemoryBlock
+llvm::sys::MemoryBlock
llvm::sys::Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
std::string *ErrMsg) {
if (NumBytes == 0) return MemoryBlock();
@@ -54,7 +54,7 @@ llvm::sys::Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
#endif
;
- void* start = NearBlock ? (unsigned char*)NearBlock->base() +
+ void* start = NearBlock ? (unsigned char*)NearBlock->base() +
NearBlock->size() : 0;
#if defined(__APPLE__) && defined(__arm__)
diff --git a/contrib/llvm/lib/System/Unix/Mutex.inc b/contrib/llvm/lib/Support/Unix/Mutex.inc
index 4a5e28d..fe6b170 100644
--- a/contrib/llvm/lib/System/Unix/Mutex.inc
+++ b/contrib/llvm/lib/Support/Unix/Mutex.inc
@@ -1,10 +1,10 @@
-//===- llvm/System/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===//
-//
+//===- llvm/Support/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file implements the Unix specific (non-pthread) Mutex class.
@@ -28,13 +28,13 @@ MutexImpl::~MutexImpl()
{
}
-bool
+bool
MutexImpl::release()
{
return true;
}
-bool
+bool
MutexImpl::tryacquire( void )
{
return true;
diff --git a/contrib/llvm/lib/System/Unix/Path.inc b/contrib/llvm/lib/Support/Unix/Path.inc
index 47e4d1a..0f6e800 100644
--- a/contrib/llvm/lib/System/Unix/Path.inc
+++ b/contrib/llvm/lib/Support/Unix/Path.inc
@@ -1,4 +1,4 @@
-//===- llvm/System/Unix/Path.cpp - Unix Path Implementation -----*- C++ -*-===//
+//===- llvm/Support/Unix/Path.cpp - Unix Path Implementation -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -78,6 +78,10 @@ using namespace sys;
const char sys::PathSeparator = ':';
+StringRef Path::GetEXESuffix() {
+ return StringRef();
+}
+
Path::Path(StringRef p)
: path(p) {}
@@ -92,10 +96,12 @@ Path::operator=(StringRef that) {
bool
Path::isValid() const {
- // Check some obvious things
- if (path.empty())
- return false;
- return path.length() < MAXPATHLEN;
+ // Empty paths are considered invalid here.
+ // This code doesn't check MAXPATHLEN because there's no need. Nothing in
+ // LLVM manipulates Paths with fixed-sizes arrays, and if the OS can't
+ // handle names longer than some limit, it'll report this on demand using
+ // ENAMETOLONG.
+ return !path.empty();
}
bool
@@ -113,18 +119,6 @@ Path::isAbsolute() const {
return path[0] == '/';
}
-void Path::makeAbsolute() {
- if (isAbsolute())
- return;
-
- Path CWD = Path::GetCurrentDirectory();
- assert(CWD.isAbsolute() && "GetCurrentDirectory returned relative path!");
-
- CWD.appendComponent(path);
-
- path = CWD.str();
-}
-
Path
Path::GetRootDirectory() {
Path result;
@@ -137,25 +131,20 @@ Path::GetTemporaryDirectory(std::string *ErrMsg) {
#if defined(HAVE_MKDTEMP)
// The best way is with mkdtemp but that's not available on many systems,
// Linux and FreeBSD have it. Others probably won't.
- char pathname[MAXPATHLEN];
- strcpy(pathname,"/tmp/llvm_XXXXXX");
+ char pathname[] = "/tmp/llvm_XXXXXX";
if (0 == mkdtemp(pathname)) {
MakeErrMsg(ErrMsg,
- std::string(pathname) + ": can't create temporary directory");
+ std::string(pathname) + ": can't create temporary directory");
return Path();
}
- Path result;
- result.set(pathname);
- assert(result.isValid() && "mkdtemp didn't create a valid pathname!");
- return result;
+ return Path(pathname);
#elif defined(HAVE_MKSTEMP)
// If no mkdtemp is available, mkstemp can be used to create a temporary file
// which is then removed and created as a directory. We prefer this over
// mktemp because of mktemp's inherent security and threading risks. We still
// have a slight race condition from the time the temporary file is created to
// the time it is re-created as a directoy.
- char pathname[MAXPATHLEN];
- strcpy(pathname, "/tmp/llvm_XXXXXX");
+ char pathname[] = "/tmp/llvm_XXXXXX";
int fd = 0;
if (-1 == (fd = mkstemp(pathname))) {
MakeErrMsg(ErrMsg,
@@ -169,18 +158,14 @@ Path::GetTemporaryDirectory(std::string *ErrMsg) {
std::string(pathname) + ": can't create temporary directory");
return Path();
}
- Path result;
- result.set(pathname);
- assert(result.isValid() && "mkstemp didn't create a valid pathname!");
- return result;
+ return Path(pathname);
#elif defined(HAVE_MKTEMP)
// If a system doesn't have mkdtemp(3) or mkstemp(3) but it does have
// mktemp(3) then we'll assume that system (e.g. AIX) has a reasonable
// implementation of mktemp(3) and doesn't follow BSD 4.3's lead of replacing
// the XXXXXX with the pid of the process and a letter. That leads to only
// twenty six temporary files that can be generated.
- char pathname[MAXPATHLEN];
- strcpy(pathname, "/tmp/llvm_XXXXXX");
+ char pathname[] = "/tmp/llvm_XXXXXX";
char *TmpName = ::mktemp(pathname);
if (TmpName == 0) {
MakeErrMsg(ErrMsg,
@@ -192,10 +177,7 @@ Path::GetTemporaryDirectory(std::string *ErrMsg) {
std::string(TmpName) + ": can't create temporary directory");
return Path();
}
- Path result;
- result.set(TmpName);
- assert(result.isValid() && "mktemp didn't create a valid pathname!");
- return result;
+ return Path(TmpName);
#else
// This is the worst case implementation. tempnam(3) leaks memory unless its
// on an SVID2 (or later) system. On BSD 4.3 it leaks. tmpnam(3) has thread
@@ -216,10 +198,7 @@ Path::GetTemporaryDirectory(std::string *ErrMsg) {
std::string(pathname) + ": can't create temporary directory");
return Path();
}
- Path result;
- result.set(pathname);
- assert(result.isValid() && "mkstemp didn't create a valid pathname!");
- return result;
+ return Path(pathname);
#endif
}
@@ -263,12 +242,11 @@ Path::GetLLVMDefaultConfigDir() {
Path
Path::GetUserHomeDirectory() {
const char* home = getenv("HOME");
- if (home) {
- Path result;
- if (result.set(home))
- return result;
- }
- return GetRootDirectory();
+ Path result;
+ if (home && result.set(home))
+ return result;
+ result.set("/");
+ return result;
}
Path
@@ -282,7 +260,8 @@ Path::GetCurrentDirectory() {
return Path(pathname);
}
-#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__minix)
+#if defined(__FreeBSD__) || defined (__NetBSD__) || \
+ defined(__OpenBSD__) || defined(__minix)
static int
test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
const char *dir, const char *bin)
@@ -348,18 +327,19 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
if (_NSGetExecutablePath(exe_path, &size) == 0) {
char link_path[MAXPATHLEN];
if (realpath(exe_path, link_path))
- return Path(std::string(link_path));
+ return Path(link_path);
}
-#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__minix)
+#elif defined(__FreeBSD__) || defined (__NetBSD__) || \
+ defined(__OpenBSD__) || defined(__minix)
char exe_path[PATH_MAX];
if (getprogpath(exe_path, argv0) != NULL)
- return Path(std::string(exe_path));
+ return Path(exe_path);
#elif defined(__linux__) || defined(__CYGWIN__)
char exe_path[MAXPATHLEN];
ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path));
if (len >= 0)
- return Path(std::string(exe_path, len));
+ return Path(StringRef(exe_path, len));
#elif defined(HAVE_DLFCN_H)
// Use dladdr to get executable path if available.
Dl_info DLInfo;
@@ -371,7 +351,9 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
// the actual executable.
char link_path[MAXPATHLEN];
if (realpath(DLInfo.dli_fname, link_path))
- return Path(std::string(link_path));
+ return Path(link_path);
+#else
+#error GetMainExecutable is not implemented on this host yet.
#endif
return Path();
}
@@ -437,9 +419,18 @@ Path::isDirectory() const {
struct stat buf;
if (0 != stat(path.c_str(), &buf))
return false;
- return buf.st_mode & S_IFDIR ? true : false;
+ return ((buf.st_mode & S_IFMT) == S_IFDIR) ? true : false;
+}
+
+bool
+Path::isSymLink() const {
+ struct stat buf;
+ if (0 != lstat(path.c_str(), &buf))
+ return false;
+ return S_ISLNK(buf.st_mode);
}
+
bool
Path::canRead() const {
return 0 == access(path.c_str(), R_OK);
@@ -590,12 +581,7 @@ bool
Path::set(StringRef a_path) {
if (a_path.empty())
return false;
- std::string save(path);
path = a_path;
- if (!isValid()) {
- path = save;
- return false;
- }
return true;
}
@@ -603,14 +589,9 @@ bool
Path::appendComponent(StringRef name) {
if (name.empty())
return false;
- std::string save(path);
if (!lastIsSlash(path))
path += '/';
path += name;
- if (!isValid()) {
- path = save;
- return false;
- }
return true;
}
@@ -632,20 +613,7 @@ Path::eraseComponent() {
}
bool
-Path::appendSuffix(StringRef suffix) {
- std::string save(path);
- path.append(".");
- path.append(suffix);
- if (!isValid()) {
- path = save;
- return false;
- }
- return true;
-}
-
-bool
Path::eraseSuffix() {
- std::string save = path;
size_t dotpos = path.rfind('.',path.size());
size_t slashpos = path.rfind('/',path.size());
if (dotpos != std::string::npos) {
@@ -654,8 +622,6 @@ Path::eraseSuffix() {
return true;
}
}
- if (!isValid())
- path = save;
return false;
}
@@ -690,8 +656,7 @@ static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
bool
Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) {
// Get a writeable copy of the path name
- char pathname[MAXPATHLEN];
- path.copy(pathname,MAXPATHLEN);
+ std::string pathname(path);
// Null-terminate the last component
size_t lastchar = path.length() - 1 ;
@@ -699,11 +664,10 @@ Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) {
if (pathname[lastchar] != '/')
++lastchar;
- pathname[lastchar] = 0;
+ pathname[lastchar] = '\0';
- if (createDirectoryHelper(pathname, pathname+lastchar, create_parents))
- return MakeErrMsg(ErrMsg,
- std::string(pathname) + ": can't create directory");
+ if (createDirectoryHelper(&pathname[0], &pathname[lastchar], create_parents))
+ return MakeErrMsg(ErrMsg, pathname + ": can't create directory");
return false;
}
@@ -768,17 +732,15 @@ Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
}
// Otherwise, try to just remove the one directory.
- char pathname[MAXPATHLEN];
- path.copy(pathname, MAXPATHLEN);
+ std::string pathname(path);
size_t lastchar = path.length() - 1;
if (pathname[lastchar] == '/')
- pathname[lastchar] = 0;
+ pathname[lastchar] = '\0';
else
- pathname[lastchar+1] = 0;
+ pathname[lastchar+1] = '\0';
- if (rmdir(pathname) != 0)
- return MakeErrMsg(ErrStr,
- std::string(pathname) + ": can't erase directory");
+ if (rmdir(pathname.c_str()) != 0)
+ return MakeErrMsg(ErrStr, pathname + ": can't erase directory");
return false;
}
@@ -851,7 +813,8 @@ sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){
bool
Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
- if (reuse_current && !exists())
+ bool Exists;
+ if (reuse_current && (fs::exists(path, Exists) || !Exists))
return false; // File doesn't exist already, just use it!
// Append an XXXXXX pattern to the end of the file for use with mkstemp,
@@ -862,7 +825,8 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
Buf.resize(path.size()+8);
char *FNBuffer = &Buf[0];
path.copy(FNBuffer,path.size());
- if (isDirectory())
+ bool isdir;
+ if (!fs::is_directory(path, isdir) && isdir)
strcpy(FNBuffer+path.size(), "/XXXXXX");
else
strcpy(FNBuffer+path.size(), "-XXXXXX");
diff --git a/contrib/llvm/lib/Support/Unix/PathV2.inc b/contrib/llvm/lib/Support/Unix/PathV2.inc
new file mode 100644
index 0000000..03ff283
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/PathV2.inc
@@ -0,0 +1,507 @@
+//===- llvm/Support/Unix/PathV2.cpp - Unix Path Implementation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific implementation of the PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+# include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+# include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+# include <ndir.h>
+# endif
+#endif
+#if HAVE_STDIO_H
+#include <stdio.h>
+#endif
+
+using namespace llvm;
+
+namespace {
+ /// This class automatically closes the given file descriptor when it goes out
+ /// of scope. You can take back explicit ownership of the file descriptor by
+ /// calling take(). The destructor does not verify that close was successful.
+ /// Therefore, never allow this class to call close on a file descriptor that
+ /// has been read from or written to.
+ struct AutoFD {
+ int FileDescriptor;
+
+ AutoFD(int fd) : FileDescriptor(fd) {}
+ ~AutoFD() {
+ if (FileDescriptor >= 0)
+ ::close(FileDescriptor);
+ }
+
+ int take() {
+ int ret = FileDescriptor;
+ FileDescriptor = -1;
+ return ret;
+ }
+
+ operator int() const {return FileDescriptor;}
+ };
+
+ error_code TempDir(SmallVectorImpl<char> &result) {
+ // FIXME: Don't use TMPDIR if program is SUID or SGID enabled.
+ const char *dir = 0;
+ (dir = std::getenv("TMPDIR" )) ||
+ (dir = std::getenv("TMP" )) ||
+ (dir = std::getenv("TEMP" )) ||
+ (dir = std::getenv("TEMPDIR")) ||
+#ifdef P_tmpdir
+ (dir = P_tmpdir) ||
+#endif
+ (dir = "/tmp");
+
+ result.clear();
+ StringRef d(dir);
+ result.append(d.begin(), d.end());
+ return success;
+ }
+}
+
+namespace llvm {
+namespace sys {
+namespace fs {
+
+error_code current_path(SmallVectorImpl<char> &result) {
+ result.reserve(MAXPATHLEN);
+
+ while (true) {
+ if (::getcwd(result.data(), result.capacity()) == 0) {
+ // See if there was a real error.
+ if (errno != errc::not_enough_memory)
+ return error_code(errno, system_category());
+ // Otherwise there just wasn't enough space.
+ result.reserve(result.capacity() * 2);
+ } else
+ break;
+ }
+
+ result.set_size(strlen(result.data()));
+ return success;
+}
+
+error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toNullTerminatedStringRef(from_storage);
+ StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+ const size_t buf_sz = 32768;
+ char buffer[buf_sz];
+ int from_file = -1, to_file = -1;
+
+ // Open from.
+ if ((from_file = ::open(f.begin(), O_RDONLY)) < 0)
+ return error_code(errno, system_category());
+ AutoFD from_fd(from_file);
+
+ // Stat from.
+ struct stat from_stat;
+ if (::stat(f.begin(), &from_stat) != 0)
+ return error_code(errno, system_category());
+
+ // Setup to flags.
+ int to_flags = O_CREAT | O_WRONLY;
+ if (copt == copy_option::fail_if_exists)
+ to_flags |= O_EXCL;
+
+ // Open to.
+ if ((to_file = ::open(t.begin(), to_flags, from_stat.st_mode)) < 0)
+ return error_code(errno, system_category());
+ AutoFD to_fd(to_file);
+
+ // Copy!
+ ssize_t sz, sz_read = 1, sz_write;
+ while (sz_read > 0 &&
+ (sz_read = ::read(from_fd, buffer, buf_sz)) > 0) {
+ // Allow for partial writes - see Advanced Unix Programming (2nd Ed.),
+ // Marc Rochkind, Addison-Wesley, 2004, page 94
+ sz_write = 0;
+ do {
+ if ((sz = ::write(to_fd, buffer + sz_write, sz_read - sz_write)) < 0) {
+ sz_read = sz; // cause read loop termination.
+ break; // error.
+ }
+ sz_write += sz;
+ } while (sz_write < sz_read);
+ }
+
+ // After all the file operations above the return value of close actually
+ // matters.
+ if (::close(from_fd.take()) < 0) sz_read = -1;
+ if (::close(to_fd.take()) < 0) sz_read = -1;
+
+ // Check for errors.
+ if (sz_read < 0)
+ return error_code(errno, system_category());
+
+ return success;
+}
+
+error_code create_directory(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) {
+ if (errno != errc::file_exists)
+ return error_code(errno, system_category());
+ existed = true;
+ } else
+ existed = false;
+
+ return success;
+}
+
+error_code create_hard_link(const Twine &to, const Twine &from) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toNullTerminatedStringRef(from_storage);
+ StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+ if (::link(t.begin(), f.begin()) == -1)
+ return error_code(errno, system_category());
+
+ return success;
+}
+
+error_code create_symlink(const Twine &to, const Twine &from) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toNullTerminatedStringRef(from_storage);
+ StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+ if (::symlink(t.begin(), f.begin()) == -1)
+ return error_code(errno, system_category());
+
+ return success;
+}
+
+error_code remove(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ if (::remove(p.begin()) == -1) {
+ if (errno != errc::no_such_file_or_directory)
+ return error_code(errno, system_category());
+ existed = false;
+ } else
+ existed = true;
+
+ return success;
+}
+
+error_code rename(const Twine &from, const Twine &to) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toNullTerminatedStringRef(from_storage);
+ StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+ if (::rename(f.begin(), t.begin()) == -1) {
+ // If it's a cross device link, copy then delete, otherwise return the error
+ if (errno == EXDEV) {
+ if (error_code ec = copy_file(from, to, copy_option::overwrite_if_exists))
+ return ec;
+ bool Existed;
+ if (error_code ec = remove(from, Existed))
+ return ec;
+ } else
+ return error_code(errno, system_category());
+ }
+
+ return success;
+}
+
+error_code resize_file(const Twine &path, uint64_t size) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ if (::truncate(p.begin(), size) == -1)
+ return error_code(errno, system_category());
+
+ return success;
+}
+
+error_code exists(const Twine &path, bool &result) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ struct stat status;
+ if (::stat(p.begin(), &status) == -1) {
+ if (errno != errc::no_such_file_or_directory)
+ return error_code(errno, system_category());
+ result = false;
+ } else
+ result = true;
+
+ return success;
+}
+
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+ // Get arguments.
+ SmallString<128> a_storage;
+ SmallString<128> b_storage;
+ StringRef a = A.toNullTerminatedStringRef(a_storage);
+ StringRef b = B.toNullTerminatedStringRef(b_storage);
+
+ struct stat stat_a, stat_b;
+ int error_b = ::stat(b.begin(), &stat_b);
+ int error_a = ::stat(a.begin(), &stat_a);
+
+ // If both are invalid, it's an error. If only one is, the result is false.
+ if (error_a != 0 || error_b != 0) {
+ if (error_a == error_b)
+ return error_code(errno, system_category());
+ result = false;
+ } else {
+ result =
+ stat_a.st_dev == stat_b.st_dev &&
+ stat_a.st_ino == stat_b.st_ino;
+ }
+
+ return success;
+}
+
+error_code file_size(const Twine &path, uint64_t &result) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ struct stat status;
+ if (::stat(p.begin(), &status) == -1)
+ return error_code(errno, system_category());
+ if (!S_ISREG(status.st_mode))
+ return make_error_code(errc::operation_not_permitted);
+
+ result = status.st_size;
+ return success;
+}
+
+error_code status(const Twine &path, file_status &result) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ struct stat status;
+ if (::stat(p.begin(), &status) != 0) {
+ error_code ec(errno, system_category());
+ if (ec == errc::no_such_file_or_directory)
+ result = file_status(file_type::file_not_found);
+ else
+ result = file_status(file_type::status_error);
+ return ec;
+ }
+
+ if (S_ISDIR(status.st_mode))
+ result = file_status(file_type::directory_file);
+ else if (S_ISREG(status.st_mode))
+ result = file_status(file_type::regular_file);
+ else if (S_ISBLK(status.st_mode))
+ result = file_status(file_type::block_file);
+ else if (S_ISCHR(status.st_mode))
+ result = file_status(file_type::character_file);
+ else if (S_ISFIFO(status.st_mode))
+ result = file_status(file_type::fifo_file);
+ else if (S_ISSOCK(status.st_mode))
+ result = file_status(file_type::socket_file);
+ else
+ result = file_status(file_type::type_unknown);
+
+ return success;
+}
+
+error_code unique_file(const Twine &model, int &result_fd,
+ SmallVectorImpl<char> &result_path) {
+ SmallString<128> Model;
+ model.toVector(Model);
+ // Null terminate.
+ Model.c_str();
+
+ // Make model absolute by prepending a temp directory if it's not already.
+ bool absolute = path::is_absolute(Twine(Model));
+ if (!absolute) {
+ SmallString<128> TDir;
+ if (error_code ec = TempDir(TDir)) return ec;
+ path::append(TDir, Twine(Model));
+ Model.swap(TDir);
+ }
+
+ // Replace '%' with random chars. From here on, DO NOT modify model. It may be
+ // needed if the randomly chosen path already exists.
+ SmallString<128> RandomPath;
+ RandomPath.reserve(Model.size() + 1);
+ ::srand(::time(NULL));
+
+retry_random_path:
+ // This is opened here instead of above to make it easier to track when to
+ // close it. Collisions should be rare enough for the possible extra syscalls
+ // not to matter.
+ FILE *RandomSource = ::fopen("/dev/urandom", "r");
+ RandomPath.set_size(0);
+ for (SmallVectorImpl<char>::const_iterator i = Model.begin(),
+ e = Model.end(); i != e; ++i) {
+ if (*i == '%') {
+ char val = 0;
+ if (RandomSource)
+ val = fgetc(RandomSource);
+ else
+ val = ::rand();
+ RandomPath.push_back("0123456789abcdef"[val & 15]);
+ } else
+ RandomPath.push_back(*i);
+ }
+
+ if (RandomSource)
+ ::fclose(RandomSource);
+
+ // Try to open + create the file.
+rety_open_create:
+ int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600);
+ if (RandomFD == -1) {
+ // If the file existed, try again, otherwise, error.
+ if (errno == errc::file_exists)
+ goto retry_random_path;
+ // The path prefix doesn't exist.
+ if (errno == errc::no_such_file_or_directory) {
+ StringRef p(RandomPath.begin(), RandomPath.size());
+ SmallString<64> dir_to_create;
+ for (path::const_iterator i = path::begin(p),
+ e = --path::end(p); i != e; ++i) {
+ path::append(dir_to_create, *i);
+ bool Exists;
+ if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec;
+ if (!Exists) {
+ // Don't try to create network paths.
+ if (i->size() > 2 && (*i)[0] == '/' &&
+ (*i)[1] == '/' &&
+ (*i)[2] != '/')
+ return make_error_code(errc::no_such_file_or_directory);
+ if (::mkdir(dir_to_create.c_str(), 0700) == -1)
+ return error_code(errno, system_category());
+ }
+ }
+ goto rety_open_create;
+ }
+ return error_code(errno, system_category());
+ }
+
+ // Make the path absolute.
+ char real_path_buff[PATH_MAX + 1];
+ if (realpath(RandomPath.c_str(), real_path_buff) == NULL) {
+ int error = errno;
+ ::close(RandomFD);
+ ::unlink(RandomPath.c_str());
+ return error_code(error, system_category());
+ }
+
+ result_path.clear();
+ StringRef d(real_path_buff);
+ result_path.append(d.begin(), d.end());
+
+ result_fd = RandomFD;
+ return success;
+}
+
+error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+ SmallString<128> path_null(path);
+ DIR *directory = ::opendir(path_null.c_str());
+ if (directory == 0)
+ return error_code(errno, system_category());
+
+ it.IterationHandle = reinterpret_cast<intptr_t>(directory);
+ // Add something for replace_filename to replace.
+ path::append(path_null, ".");
+ it.CurrentEntry = directory_entry(path_null.str());
+ return directory_iterator_increment(it);
+}
+
+error_code directory_iterator_destruct(directory_iterator& it) {
+ if (it.IterationHandle)
+ ::closedir(reinterpret_cast<DIR *>(it.IterationHandle));
+ it.IterationHandle = 0;
+ it.CurrentEntry = directory_entry();
+ return success;
+}
+
+error_code directory_iterator_increment(directory_iterator& it) {
+ errno = 0;
+ dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
+ if (cur_dir == 0 && errno != 0) {
+ return error_code(errno, system_category());
+ } else if (cur_dir != 0) {
+ StringRef name(cur_dir->d_name, NAMLEN(cur_dir));
+ if ((name.size() == 1 && name[0] == '.') ||
+ (name.size() == 2 && name[0] == '.' && name[1] == '.'))
+ return directory_iterator_increment(it);
+ it.CurrentEntry.replace_filename(name);
+ } else
+ return directory_iterator_destruct(it);
+
+ return success;
+}
+
+error_code get_magic(const Twine &path, uint32_t len,
+ SmallVectorImpl<char> &result) {
+ SmallString<128> PathStorage;
+ StringRef Path = path.toNullTerminatedStringRef(PathStorage);
+ result.set_size(0);
+
+ // Open path.
+ std::FILE *file = std::fopen(Path.data(), "rb");
+ if (file == 0)
+ return error_code(errno, system_category());
+
+ // Reserve storage.
+ result.reserve(len);
+
+ // Read magic!
+ size_t size = std::fread(result.data(), 1, len, file);
+ if (std::ferror(file) != 0) {
+ std::fclose(file);
+ return error_code(errno, system_category());
+ } else if (size != result.size()) {
+ if (std::feof(file) != 0) {
+ std::fclose(file);
+ result.set_size(size);
+ return make_error_code(errc::value_too_large);
+ }
+ }
+ std::fclose(file);
+ result.set_size(len);
+ return success;
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
diff --git a/contrib/llvm/lib/System/Unix/Process.inc b/contrib/llvm/lib/Support/Unix/Process.inc
index cf6a47a..5cdb11c 100644
--- a/contrib/llvm/lib/System/Unix/Process.inc
+++ b/contrib/llvm/lib/Support/Unix/Process.inc
@@ -1,10 +1,10 @@
//===- Unix/Process.cpp - Unix Process Implementation --------- -*- C++ -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file provides the generic Unix implementation of the Process class.
@@ -41,8 +41,8 @@
using namespace llvm;
using namespace sys;
-unsigned
-Process::GetPageSize()
+unsigned
+Process::GetPageSize()
{
#if defined(__CYGWIN__)
// On Cygwin, getpagesize() returns 64k but the page size for the purposes of
@@ -104,20 +104,20 @@ Process::GetTotalMemoryUsage()
}
void
-Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time,
+Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time,
TimeValue& sys_time)
{
elapsed = TimeValue::now();
#if defined(HAVE_GETRUSAGE)
struct rusage usage;
::getrusage(RUSAGE_SELF, &usage);
- user_time = TimeValue(
- static_cast<TimeValue::SecondsType>( usage.ru_utime.tv_sec ),
- static_cast<TimeValue::NanoSecondsType>( usage.ru_utime.tv_usec *
+ user_time = TimeValue(
+ static_cast<TimeValue::SecondsType>( usage.ru_utime.tv_sec ),
+ static_cast<TimeValue::NanoSecondsType>( usage.ru_utime.tv_usec *
TimeValue::NANOSECONDS_PER_MICROSECOND ) );
- sys_time = TimeValue(
- static_cast<TimeValue::SecondsType>( usage.ru_stime.tv_sec ),
- static_cast<TimeValue::NanoSecondsType>( usage.ru_stime.tv_usec *
+ sys_time = TimeValue(
+ static_cast<TimeValue::SecondsType>( usage.ru_stime.tv_sec ),
+ static_cast<TimeValue::NanoSecondsType>( usage.ru_stime.tv_usec *
TimeValue::NANOSECONDS_PER_MICROSECOND ) );
#else
#warning Cannot get usage times on this platform
@@ -159,14 +159,14 @@ void Process::PreventCoreFiles() {
exception_port_t OriginalPorts[EXC_TYPES_COUNT];
exception_behavior_t OriginalBehaviors[EXC_TYPES_COUNT];
thread_state_flavor_t OriginalFlavors[EXC_TYPES_COUNT];
- kern_return_t err =
+ kern_return_t err =
task_get_exception_ports(mach_task_self(), EXC_MASK_ALL, OriginalMasks,
&Count, OriginalPorts, OriginalBehaviors,
OriginalFlavors);
if (err == KERN_SUCCESS) {
// replace each with MACH_PORT_NULL.
for (unsigned i = 0; i != Count; ++i)
- task_set_exception_ports(mach_task_self(), OriginalMasks[i],
+ task_set_exception_ports(mach_task_self(), OriginalMasks[i],
MACH_PORT_NULL, OriginalBehaviors[i],
OriginalFlavors[i]);
}
diff --git a/contrib/llvm/lib/System/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc
index 0209f5a..1104bc7 100644
--- a/contrib/llvm/lib/System/Unix/Program.inc
+++ b/contrib/llvm/lib/Support/Unix/Program.inc
@@ -1,4 +1,4 @@
-//===- llvm/System/Unix/Program.cpp -----------------------------*- C++ -*-===//
+//===- llvm/Support/Unix/Program.cpp -----------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,6 +17,7 @@
//===----------------------------------------------------------------------===//
#include <llvm/Config/config.h>
+#include "llvm/Support/FileSystem.h"
#include "Unix.h"
#if HAVE_SYS_STAT_H
#include <sys/stat.h>
@@ -66,8 +67,8 @@ Program::FindProgramByName(const std::string& progName) {
if (progName.find('/') != std::string::npos)
return temp;
- // At this point, the file name does not contain slashes. Search for it
- // through the directories specified in the PATH environment variable.
+ // At this point, the file name is valid and does not contain slashes. Search
+ // for it through the directories specified in the PATH environment variable.
// Get the path. If its empty, we can't do anything to find it.
const char *PathStr = getenv("PATH");
@@ -196,7 +197,7 @@ Program::Execute(const Path &path, const char **args, const char **envp,
*redirects[1] != *redirects[2]) {
// Just redirect stderr
if (RedirectIO_PS(redirects[2], 2, ErrMsg, FileActions)) return false;
- } else {
+ } else {
// If stdout and stderr should go to the same place, redirect stderr
// to the FD already open for stdout.
if (int Err = posix_spawn_file_actions_adddup2(&FileActions, 1, 2))
@@ -212,25 +213,21 @@ Program::Execute(const Path &path, const char **args, const char **envp,
envp = const_cast<const char **>(*_NSGetEnviron());
#endif
- pid_t PID;
+ // Explicitly initialized to prevent what appears to be a valgrind false
+ // positive.
+ pid_t PID = 0;
int Err = posix_spawn(&PID, path.c_str(), &FileActions, /*attrp*/0,
const_cast<char **>(args), const_cast<char **>(envp));
-
+
posix_spawn_file_actions_destroy(&FileActions);
if (Err)
return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err);
-
+
Data_ = reinterpret_cast<void*>(PID);
return true;
}
#endif
-
- if (!path.canExecute()) {
- if (ErrMsg)
- *ErrMsg = path.str() + " is not executable";
- return false;
- }
// Create a child process.
int child = fork();
@@ -295,7 +292,8 @@ Program::Execute(const Path &path, const char **args, const char **envp,
}
int
-Program::Wait(unsigned secondsToWait,
+Program::Wait(const sys::Path &path,
+ unsigned secondsToWait,
std::string* ErrMsg)
{
#ifdef HAVE_SYS_WAIT_H
@@ -348,22 +346,46 @@ Program::Wait(unsigned secondsToWait,
sigaction(SIGALRM, &Old, 0);
}
- // Return the proper exit status. 0=success, >0 is programs' exit status,
- // <0 means a signal was returned, -9999999 means the program dumped core.
+ // Return the proper exit status. Detect error conditions
+ // so we can return -1 for them and set ErrMsg informatively.
int result = 0;
- if (WIFEXITED(status))
+ if (WIFEXITED(status)) {
result = WEXITSTATUS(status);
- else if (WIFSIGNALED(status))
- result = 0 - WTERMSIG(status);
+#ifdef HAVE_POSIX_SPAWN
+ // The posix_spawn child process returns 127 on any kind of error.
+ // Following the POSIX convention for command-line tools (which posix_spawn
+ // itself apparently does not), check to see if the failure was due to some
+ // reason other than the file not existing, and return 126 in this case.
+ bool Exists;
+ if (result == 127 && !llvm::sys::fs::exists(path.str(), Exists) && Exists)
+ result = 126;
+#endif
+ if (result == 127) {
+ if (ErrMsg)
+ *ErrMsg = llvm::sys::StrError(ENOENT);
+ return -1;
+ }
+ if (result == 126) {
+ if (ErrMsg)
+ *ErrMsg = "Program could not be executed";
+ return -1;
+ }
+ } else if (WIFSIGNALED(status)) {
+ if (ErrMsg) {
+ *ErrMsg = strsignal(WTERMSIG(status));
#ifdef WCOREDUMP
- else if (WCOREDUMP(status))
- result |= 0x01000000;
+ if (WCOREDUMP(status))
+ *ErrMsg += " (core dumped)";
#endif
+ }
+ return -1;
+ }
return result;
#else
- return -99;
+ if (ErrMsg)
+ *ErrMsg = "Program::Wait is not implemented on this platform yet!";
+ return -1;
#endif
-
}
bool
diff --git a/contrib/llvm/lib/Support/Unix/README.txt b/contrib/llvm/lib/Support/Unix/README.txt
new file mode 100644
index 0000000..3d547c2
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/README.txt
@@ -0,0 +1,16 @@
+llvm/lib/Support/Unix README
+===========================
+
+This directory provides implementations of the lib/System classes that
+are common to two or more variants of UNIX. For example, the directory
+structure underneath this directory could look like this:
+
+Unix - only code that is truly generic to all UNIX platforms
+ Posix - code that is specific to Posix variants of UNIX
+ SUS - code that is specific to the Single Unix Specification
+ SysV - code that is specific to System V variants of UNIX
+
+As a rule, only those directories actually needing to be created should be
+created. Also, further subdirectories could be created to reflect versions of
+the various standards. For example, under SUS there could be v1, v2, and v3
+subdirectories to reflect the three major versions of SUS.
diff --git a/contrib/llvm/lib/System/Unix/RWMutex.inc b/contrib/llvm/lib/Support/Unix/RWMutex.inc
index e83d41e..40e87ff 100644
--- a/contrib/llvm/lib/System/Unix/RWMutex.inc
+++ b/contrib/llvm/lib/Support/Unix/RWMutex.inc
@@ -1,10 +1,10 @@
-//= llvm/System/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock =//
-//
+//= llvm/Support/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock =//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file implements the Unix specific (non-pthread) RWMutex class.
diff --git a/contrib/llvm/lib/System/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc
index 7b7c43e..0a61759 100644
--- a/contrib/llvm/lib/System/Unix/Signals.inc
+++ b/contrib/llvm/lib/Support/Unix/Signals.inc
@@ -1,10 +1,10 @@
//===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines some helpful functions for dealing with the possibility of
@@ -14,7 +14,7 @@
#include "Unix.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include <vector>
#include <algorithm>
#if HAVE_EXECINFO_H
@@ -28,7 +28,7 @@
#endif
#if HAVE_DLFCN_H && __GNUG__
#include <dlfcn.h>
-#include <cxxabi.h>
+#include <cxxabi.h>
#endif
using namespace llvm;
@@ -82,11 +82,11 @@ static void RegisterHandler(int Signal) {
"Out of space for signal handlers!");
struct sigaction NewHandler;
-
+
NewHandler.sa_handler = SignalHandler;
NewHandler.sa_flags = SA_NODEFER|SA_RESETHAND;
- sigemptyset(&NewHandler.sa_mask);
-
+ sigemptyset(&NewHandler.sa_mask);
+
// Install the new handler, save the old one in RegisteredSignalInfo.
sigaction(Signal, &NewHandler,
&RegisteredSignalInfo[NumRegisteredSignals].SA);
@@ -144,7 +144,7 @@ static RETSIGTYPE SignalHandler(int Sig) {
IF(); // run the interrupt function.
return;
}
-
+
SignalsMutex.release();
raise(Sig); // Execute the default handler.
return;
@@ -205,7 +205,7 @@ void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
// trace so that the user has an indication of why and where we died.
//
// On glibc systems we have the 'backtrace' function, which works nicely, but
-// doesn't demangle symbols.
+// doesn't demangle symbols.
static void PrintStackTrace(void *) {
#ifdef HAVE_BACKTRACE
static void* StackTrace[256];
@@ -274,6 +274,10 @@ void llvm::sys::PrintStackTraceOnErrorSignal() {
#ifdef __APPLE__
+int raise(int sig) {
+ return pthread_kill(pthread_self(), sig);
+}
+
void __assert_rtn(const char *func,
const char *file,
int line,
@@ -291,7 +295,7 @@ void __assert_rtn(const char *func,
#include <pthread.h>
void abort() {
- pthread_kill(pthread_self(), SIGABRT);
+ raise(SIGABRT);
usleep(1000);
__builtin_trap();
}
diff --git a/contrib/llvm/lib/System/Unix/ThreadLocal.inc b/contrib/llvm/lib/Support/Unix/ThreadLocal.inc
index 6769520..2b4c901 100644
--- a/contrib/llvm/lib/System/Unix/ThreadLocal.inc
+++ b/contrib/llvm/lib/Support/Unix/ThreadLocal.inc
@@ -1,10 +1,10 @@
-//=== llvm/System/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===//
-//
+//=== llvm/Support/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file implements the Unix specific (non-pthread) ThreadLocal class.
diff --git a/contrib/llvm/lib/System/Unix/TimeValue.inc b/contrib/llvm/lib/Support/Unix/TimeValue.inc
index d8cc8f5..5cf5a9d 100644
--- a/contrib/llvm/lib/System/Unix/TimeValue.inc
+++ b/contrib/llvm/lib/Support/Unix/TimeValue.inc
@@ -1,10 +1,10 @@
//===- Unix/TimeValue.cpp - Unix TimeValue Implementation -------*- C++ -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file implements the Unix specific portion of the TimeValue class.
@@ -26,7 +26,7 @@ std::string TimeValue::str() const {
time_t ourTime = time_t(this->toEpochTime());
#ifdef __hpux
-// note that the following line needs -D_REENTRANT on HP-UX to be picked up
+// note that the following line needs -D_REENTRANT on HP-UX to be picked up
asctime_r(localtime(&ourTime), buffer);
#else
::asctime_r(::localtime(&ourTime), buffer);
@@ -43,13 +43,13 @@ TimeValue TimeValue::now() {
// This is *really* unlikely to occur because the only gettimeofday
// errors concern the timezone parameter which we're passing in as 0.
// In the unlikely case it does happen, just return MinTime, no error
- // message needed.
+ // message needed.
return MinTime;
}
return TimeValue(
- static_cast<TimeValue::SecondsType>( the_time.tv_sec + PosixZeroTime.seconds_ ),
- static_cast<TimeValue::NanoSecondsType>( the_time.tv_usec *
+ static_cast<TimeValue::SecondsType>( the_time.tv_sec + PosixZeroTime.seconds_ ),
+ static_cast<TimeValue::NanoSecondsType>( the_time.tv_usec *
NANOSECONDS_PER_MICROSECOND ) );
}
diff --git a/contrib/llvm/lib/System/Unix/Unix.h b/contrib/llvm/lib/Support/Unix/Unix.h
index c15866f..b7be311 100644
--- a/contrib/llvm/lib/System/Unix/Unix.h
+++ b/contrib/llvm/lib/Support/Unix/Unix.h
@@ -1,4 +1,4 @@
-//===- llvm/System/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===//
+//===- llvm/Support/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,7 +20,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Config/config.h" // Get autoconf configuration settings
-#include "llvm/System/Errno.h"
+#include "llvm/Support/Errno.h"
#include <cstdlib>
#include <cstdio>
#include <cstring>
diff --git a/contrib/llvm/lib/Support/Unix/system_error.inc b/contrib/llvm/lib/Support/Unix/system_error.inc
new file mode 100644
index 0000000..681e919
--- /dev/null
+++ b/contrib/llvm/lib/Support/Unix/system_error.inc
@@ -0,0 +1,34 @@
+//===- llvm/Support/Unix/system_error.inc - Unix error_code ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Unix specific implementation of the error_code
+// and error_condition classes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+std::string
+_system_error_category::message(int ev) const {
+ return _do_message::message(ev);
+}
+
+error_condition
+_system_error_category::default_error_condition(int ev) const {
+#ifdef ELAST
+ if (ev > ELAST)
+ return error_condition(ev, system_category());
+#endif // ELAST
+ return error_condition(ev, generic_category());
+}
diff --git a/contrib/llvm/lib/System/Valgrind.cpp b/contrib/llvm/lib/Support/Valgrind.cpp
index c76cfe4..7034485 100644
--- a/contrib/llvm/lib/System/Valgrind.cpp
+++ b/contrib/llvm/lib/Support/Valgrind.cpp
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/System/Valgrind.h"
+#include "llvm/Support/Valgrind.h"
#include "llvm/Config/config.h"
#if HAVE_VALGRIND_VALGRIND_H
diff --git a/contrib/llvm/lib/System/Win32/DynamicLibrary.inc b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc
index c9a89e5..2c14366 100644
--- a/contrib/llvm/lib/System/Win32/DynamicLibrary.inc
+++ b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc
@@ -1,17 +1,17 @@
//===- Win32/DynamicLibrary.cpp - Win32 DL Implementation -------*- C++ -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file provides the Win32 specific implementation of DynamicLibrary.
//
//===----------------------------------------------------------------------===//
-#include "Win32.h"
+#include "Windows.h"
#ifdef __MINGW32__
#include <imagehlp.h>
@@ -35,7 +35,7 @@ namespace llvm {
using namespace sys;
//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only Win32 specific code
+//=== WARNING: Implementation here must contain only Win32 specific code
//=== and must not be UNIX code.
//===----------------------------------------------------------------------===//
@@ -50,12 +50,22 @@ static std::vector<HMODULE> OpenedHandles;
extern "C" {
// Use old callback if:
// - Not using Visual Studio
-// - Visual Studio 2005 or earlier but only if we are not using the Windows SDK
+// - Visual Studio 2005 or earlier but only if we are not using the Windows SDK
// or Windows SDK version is older than 6.0
// Use new callback if:
// - Newer Visual Studio (comes with newer SDK).
// - Visual Studio 2005 with Windows SDK 6.0+
-#if !defined(_MSC_VER) || _MSC_VER < 1500 && (!defined(VER_PRODUCTBUILD) || VER_PRODUCTBUILD < 6000)
+#if defined(_MSC_VER)
+ #if _MSC_VER < 1500 && (!defined(VER_PRODUCTBUILD) || VER_PRODUCTBUILD < 6000)
+ #define OLD_ELM_CALLBACK_DECL 1
+ #endif
+#elif defined(__MINGW64__)
+ // Use new callback.
+#elif defined(__MINGW32__)
+ #define OLD_ELM_CALLBACK_DECL 1
+#endif
+
+#ifdef OLD_ELM_CALLBACK_DECL
static BOOL CALLBACK ELM_Callback(PSTR ModuleName,
ModuleBaseType ModuleBase,
ULONG ModuleSize,
@@ -89,7 +99,7 @@ extern "C" {
}
bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
- std::string *ErrMsg) {
+ std::string *ErrMsg) {
if (filename) {
HMODULE a_handle = LoadLibrary(filename);
@@ -110,40 +120,19 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
// Stack probing routines are in the support library (e.g. libgcc), but we don't
// have dynamic linking on windows. Provide a hook.
-#if defined(__MINGW32__) || defined (_MSC_VER)
- #define EXPLICIT_SYMBOL(SYM) \
- if (!strcmp(symbolName, #SYM)) return (void*)&SYM
- #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) \
- if (!strcmp(symbolName, #SYMFROM)) return (void*)&SYMTO
- #define EXPLICIT_SYMBOL_DEF(SYM) \
- extern "C" { extern void *SYM; }
-
- #if defined(__MINGW32__)
- EXPLICIT_SYMBOL_DEF(_alloca)
- EXPLICIT_SYMBOL_DEF(__main)
- EXPLICIT_SYMBOL_DEF(__ashldi3)
- EXPLICIT_SYMBOL_DEF(__ashrdi3)
- EXPLICIT_SYMBOL_DEF(__cmpdi2)
- EXPLICIT_SYMBOL_DEF(__divdi3)
- EXPLICIT_SYMBOL_DEF(__fixdfdi)
- EXPLICIT_SYMBOL_DEF(__fixsfdi)
- EXPLICIT_SYMBOL_DEF(__fixunsdfdi)
- EXPLICIT_SYMBOL_DEF(__fixunssfdi)
- EXPLICIT_SYMBOL_DEF(__floatdidf)
- EXPLICIT_SYMBOL_DEF(__floatdisf)
- EXPLICIT_SYMBOL_DEF(__lshrdi3)
- EXPLICIT_SYMBOL_DEF(__moddi3)
- EXPLICIT_SYMBOL_DEF(__udivdi3)
- EXPLICIT_SYMBOL_DEF(__umoddi3)
- #elif defined(_MSC_VER)
- EXPLICIT_SYMBOL_DEF(_alloca_probe)
- #endif
-#endif
+#define EXPLICIT_SYMBOL(SYM) \
+ extern "C" { extern void *SYM; }
+#define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) EXPLICIT_SYMBOL(SYMTO)
+
+#include "explicit_symbols.inc"
+
+#undef EXPLICIT_SYMBOL
+#undef EXPLICIT_SYMBOL2
void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
// First check symbols added via AddSymbol().
if (ExplicitSymbols) {
- std::map<std::string, void *>::iterator I =
+ std::map<std::string, void *>::iterator I =
ExplicitSymbols->find(symbolName);
std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
if (I != E)
@@ -159,42 +148,19 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
}
}
-#if defined(__MINGW32__)
- {
- EXPLICIT_SYMBOL(_alloca);
- EXPLICIT_SYMBOL(__main);
- EXPLICIT_SYMBOL(__ashldi3);
- EXPLICIT_SYMBOL(__ashrdi3);
- EXPLICIT_SYMBOL(__cmpdi2);
- EXPLICIT_SYMBOL(__divdi3);
- EXPLICIT_SYMBOL(__fixdfdi);
- EXPLICIT_SYMBOL(__fixsfdi);
- EXPLICIT_SYMBOL(__fixunsdfdi);
- EXPLICIT_SYMBOL(__fixunssfdi);
- EXPLICIT_SYMBOL(__floatdidf);
- EXPLICIT_SYMBOL(__floatdisf);
- EXPLICIT_SYMBOL(__lshrdi3);
- EXPLICIT_SYMBOL(__moddi3);
- EXPLICIT_SYMBOL(__udivdi3);
- EXPLICIT_SYMBOL(__umoddi3);
-
- EXPLICIT_SYMBOL2(alloca, _alloca);
-#undef EXPLICIT_SYMBOL
-#undef EXPLICIT_SYMBOL2
-#undef EXPLICIT_SYMBOL_DEF
- }
-#elif defined(_MSC_VER)
+ #define EXPLICIT_SYMBOL(SYM) \
+ if (!strcmp(symbolName, #SYM)) return (void*)&SYM;
+ #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) \
+ if (!strcmp(symbolName, #SYMFROM)) return (void*)&SYMTO;
+
{
- EXPLICIT_SYMBOL2(alloca, _alloca_probe);
- EXPLICIT_SYMBOL2(_alloca, _alloca_probe);
-#undef EXPLICIT_SYMBOL
-#undef EXPLICIT_SYMBOL2
-#undef EXPLICIT_SYMBOL_DEF
+ #include "explicit_symbols.inc"
}
-#endif
+
+ #undef EXPLICIT_SYMBOL
+ #undef EXPLICIT_SYMBOL2
return 0;
}
}
-
diff --git a/contrib/llvm/lib/System/Win32/Host.inc b/contrib/llvm/lib/Support/Windows/Host.inc
index 18f00f8..733830e 100644
--- a/contrib/llvm/lib/System/Win32/Host.inc
+++ b/contrib/llvm/lib/Support/Windows/Host.inc
@@ -1,4 +1,4 @@
-//===- llvm/System/Win32/Host.inc -------------------------------*- C++ -*-===//
+//===- llvm/Support/Win32/Host.inc -------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Win32.h"
+#include "Windows.h"
#include <cstdio>
#include <string>
diff --git a/contrib/llvm/lib/System/Win32/Memory.inc b/contrib/llvm/lib/Support/Windows/Memory.inc
index 19fccbd..9f69e73 100644
--- a/contrib/llvm/lib/System/Win32/Memory.inc
+++ b/contrib/llvm/lib/Support/Windows/Memory.inc
@@ -1,10 +1,10 @@
//===- Win32/Memory.cpp - Win32 Memory Implementation -----------*- C++ -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file provides the Win32 specific implementation of various Memory
@@ -12,15 +12,15 @@
//
//===----------------------------------------------------------------------===//
-#include "Win32.h"
-#include "llvm/System/DataTypes.h"
-#include "llvm/System/Process.h"
+#include "Windows.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Process.h"
namespace llvm {
using namespace sys;
//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only Win32 specific code
+//=== WARNING: Implementation here must contain only Win32 specific code
//=== and must not be UNIX code
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/System/Win32/Mutex.inc b/contrib/llvm/lib/Support/Windows/Mutex.inc
index 75f01fe..583dc63 100644
--- a/contrib/llvm/lib/System/Win32/Mutex.inc
+++ b/contrib/llvm/lib/Support/Windows/Mutex.inc
@@ -1,10 +1,10 @@
-//===- llvm/System/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===//
-//
+//===- llvm/Support/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file implements the Win32 specific (non-pthread) Mutex class.
@@ -16,8 +16,8 @@
//=== is guaranteed to work on *all* Win32 variants.
//===----------------------------------------------------------------------===//
-#include "Win32.h"
-#include "llvm/System/Mutex.h"
+#include "Windows.h"
+#include "llvm/Support/Mutex.h"
namespace llvm {
using namespace sys;
@@ -35,21 +35,21 @@ MutexImpl::~MutexImpl()
data_ = 0;
}
-bool
+bool
MutexImpl::acquire()
{
EnterCriticalSection((LPCRITICAL_SECTION)data_);
return true;
}
-bool
+bool
MutexImpl::release()
{
LeaveCriticalSection((LPCRITICAL_SECTION)data_);
return true;
}
-bool
+bool
MutexImpl::tryacquire()
{
return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
diff --git a/contrib/llvm/lib/System/Win32/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc
index 4a6dbd3..625f67a 100644
--- a/contrib/llvm/lib/System/Win32/Path.inc
+++ b/contrib/llvm/lib/Support/Windows/Path.inc
@@ -1,13 +1,10 @@
-//===- llvm/System/Win32/Path.cpp - Win32 Path Implementation ---*- C++ -*-===//
+//===- llvm/Support/Win32/Path.cpp - Win32 Path Implementation ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-// Modified by Henrik Bach to comply with at least MinGW.
-// Ported to Win32 by Jeff Cohen.
-//
//===----------------------------------------------------------------------===//
//
// This file provides the Win32 specific implementation of the Path class.
@@ -19,7 +16,7 @@
//=== is guaranteed to work on *all* Win32 variants.
//===----------------------------------------------------------------------===//
-#include "Win32.h"
+#include "Windows.h"
#include <malloc.h>
#include <cstdio>
@@ -45,8 +42,13 @@ static void FlipBackSlashes(std::string& s) {
namespace llvm {
namespace sys {
+
const char PathSeparator = ';';
+StringRef Path::GetEXESuffix() {
+ return "exe";
+}
+
Path::Path(llvm::StringRef p)
: path(p) {
FlipBackSlashes(path);
@@ -64,6 +66,18 @@ Path::operator=(StringRef that) {
return *this;
}
+// push_back 0 on create, and pop_back on delete.
+struct ScopedNullTerminator {
+ std::string &str;
+ ScopedNullTerminator(std::string &s) : str(s) { str.push_back(0); }
+ ~ScopedNullTerminator() {
+ // str.pop_back(); But wait, C++03 doesn't have this...
+ assert(!str.empty() && str[str.size() - 1] == 0
+ && "Null char not present!");
+ str.resize(str.size() - 1);
+ }
+};
+
bool
Path::isValid() const {
if (path.empty())
@@ -72,6 +86,8 @@ Path::isValid() const {
// If there is a colon, it must be the second character, preceded by a letter
// and followed by something.
size_t len = path.size();
+ // This code assumes that path is null terminated, so make sure it is.
+ ScopedNullTerminator snt(path);
size_t pos = path.rfind(':',len);
size_t rootslash = 0;
if (pos != std::string::npos) {
@@ -156,8 +172,9 @@ Path::isAbsolute(const char *NameStart, unsigned NameLen) {
case 2:
return NameStart[0] == '/';
default:
- return (NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/')) ||
- (NameStart[0] == '\\' || (NameStart[1] == ':' && NameStart[2] == '\\'));
+ return
+ (NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/')) ||
+ (NameStart[0] == '\\' || (NameStart[1] == ':' && NameStart[2] == '\\'));
}
}
@@ -216,15 +233,39 @@ Path::GetTemporaryDirectory(std::string* ErrMsg) {
// FIXME: the following set of functions don't map to Windows very well.
Path
Path::GetRootDirectory() {
- Path result;
- result.set("C:/");
- return result;
+ // This is the only notion that that Windows has of a root directory. Nothing
+ // is here except for drives.
+ return Path("file:///");
}
void
Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
- Paths.push_back(sys::Path("C:/WINDOWS/SYSTEM32"));
- Paths.push_back(sys::Path("C:/WINDOWS"));
+ char buff[MAX_PATH];
+ // Generic form of C:\Windows\System32
+ HRESULT res = SHGetFolderPathA(NULL,
+ CSIDL_FLAG_CREATE | CSIDL_SYSTEM,
+ NULL,
+ SHGFP_TYPE_CURRENT,
+ buff);
+ if (res != S_OK) {
+ assert(0 && "Failed to get system directory");
+ return;
+ }
+ Paths.push_back(sys::Path(buff));
+
+ // Reset buff.
+ buff[0] = 0;
+ // Generic form of C:\Windows
+ res = SHGetFolderPathA(NULL,
+ CSIDL_FLAG_CREATE | CSIDL_WINDOWS,
+ NULL,
+ SHGFP_TYPE_CURRENT,
+ buff);
+ if (res != S_OK) {
+ assert(0 && "Failed to get windows directory");
+ return;
+ }
+ Paths.push_back(sys::Path(buff));
}
void
@@ -246,20 +287,23 @@ Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
Path
Path::GetLLVMDefaultConfigDir() {
- // TODO: this isn't going to fly on Windows
- return Path("/etc/llvm");
+ Path ret = GetUserHomeDirectory();
+ if (!ret.appendComponent(".llvm"))
+ assert(0 && "Failed to append .llvm");
+ return ret;
}
Path
Path::GetUserHomeDirectory() {
- // TODO: Typical Windows setup doesn't define HOME.
- const char* home = getenv("HOME");
- if (home) {
- Path result;
- if (result.set(home))
- return result;
- }
- return GetRootDirectory();
+ char buff[MAX_PATH];
+ HRESULT res = SHGetFolderPathA(NULL,
+ CSIDL_FLAG_CREATE | CSIDL_APPDATA,
+ NULL,
+ SHGFP_TYPE_CURRENT,
+ buff);
+ if (res != S_OK)
+ assert(0 && "Failed to get user home directory");
+ return Path(buff);
}
Path
@@ -331,6 +375,19 @@ Path::isDirectory() const {
}
bool
+Path::isSymLink() const {
+ DWORD attributes = GetFileAttributes(path.c_str());
+
+ if (attributes == INVALID_FILE_ATTRIBUTES)
+ // There's no sane way to report this :(.
+ assert(0 && "GetFileAttributes returned INVALID_FILE_ATTRIBUTES");
+
+ // This isn't exactly what defines a NTFS symlink, but it is only true for
+ // paths that act like a symlink.
+ return attributes & FILE_ATTRIBUTE_REPARSE_POINT;
+}
+
+bool
Path::canRead() const {
// FIXME: take security attributes into account.
DWORD attr = GetFileAttributes(path.c_str());
@@ -353,9 +410,10 @@ Path::canExecute() const {
bool
Path::isRegularFile() const {
- if (isDirectory())
+ bool res;
+ if (fs::is_regular_file(path, res))
return false;
- return true;
+ return res;
}
StringRef
@@ -532,18 +590,6 @@ Path::eraseComponent() {
}
bool
-Path::appendSuffix(StringRef suffix) {
- std::string save(path);
- path.append(".");
- path.append(suffix);
- if (!isValid()) {
- path = save;
- return false;
- }
- return true;
-}
-
-bool
Path::eraseSuffix() {
size_t dotpos = path.rfind('.',path.size());
size_t slashpos = path.rfind('/',path.size());
@@ -622,7 +668,8 @@ Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) {
pathname[len-1] = 0;
if (!CreateDirectory(pathname, NULL) &&
GetLastError() != ERROR_ALREADY_EXISTS) {
- return MakeErrMsg(ErrMsg, std::string(pathname) + ": Can't create directory: ");
+ return MakeErrMsg(ErrMsg, std::string(pathname) +
+ ": Can't create directory: ");
}
}
return false;
@@ -648,7 +695,8 @@ Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
if (fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
// If it doesn't exist, we're done.
- if (!exists())
+ bool Exists;
+ if (fs::exists(path, Exists) || !Exists)
return false;
char *pathname = reinterpret_cast<char *>(_alloca(path.length()+3));
@@ -822,7 +870,8 @@ CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg) {
bool
Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
- if (reuse_current && !exists())
+ bool Exists;
+ if (reuse_current && (fs::exists(path, Exists) || !Exists))
return false; // File doesn't exist already, just use it!
// Reserve space for -XXXXXX at the end.
@@ -839,7 +888,7 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
if (++FCounter > 999999)
FCounter = 0;
path = FNBuffer;
- } while (exists());
+ } while (!fs::exists(path, Exists) && Exists);
return false;
}
diff --git a/contrib/llvm/lib/Support/Windows/PathV2.inc b/contrib/llvm/lib/Support/Windows/PathV2.inc
new file mode 100644
index 0000000..8effb0c
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/PathV2.inc
@@ -0,0 +1,750 @@
+//===- llvm/Support/Windows/PathV2.inc - Windows Path Impl ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Windows specific implementation of the PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Windows code that
+//=== is guaranteed to work on *all* Windows variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <wincrypt.h>
+#include <fcntl.h>
+#include <io.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+// MinGW doesn't define this.
+#ifndef _ERRNO_T_DEFINED
+#define _ERRNO_T_DEFINED
+typedef int errno_t;
+#endif
+
+using namespace llvm;
+
+namespace {
+ typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)(
+ /*__in*/ LPCWSTR lpSymlinkFileName,
+ /*__in*/ LPCWSTR lpTargetFileName,
+ /*__in*/ DWORD dwFlags);
+
+ PtrCreateSymbolicLinkW create_symbolic_link_api = PtrCreateSymbolicLinkW(
+ ::GetProcAddress(::GetModuleHandleA("kernel32.dll"),
+ "CreateSymbolicLinkW"));
+
+ error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16) {
+ int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+ utf8.begin(), utf8.size(),
+ utf16.begin(), 0);
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ utf16.reserve(len + 1);
+ utf16.set_size(len);
+
+ len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+ utf8.begin(), utf8.size(),
+ utf16.begin(), utf16.size());
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ // Make utf16 null terminated.
+ utf16.push_back(0);
+ utf16.pop_back();
+
+ return success;
+ }
+
+ error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+ SmallVectorImpl<char> &utf8) {
+ // Get length.
+ int len = ::WideCharToMultiByte(CP_UTF8, 0,
+ utf16, utf16_len,
+ utf8.begin(), 0,
+ NULL, NULL);
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ utf8.reserve(len);
+ utf8.set_size(len);
+
+ // Now do the actual conversion.
+ len = ::WideCharToMultiByte(CP_UTF8, 0,
+ utf16, utf16_len,
+ utf8.data(), utf8.size(),
+ NULL, NULL);
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ // Make utf8 null terminated.
+ utf8.push_back(0);
+ utf8.pop_back();
+
+ return success;
+ }
+
+ error_code TempDir(SmallVectorImpl<wchar_t> &result) {
+ retry_temp_dir:
+ DWORD len = ::GetTempPathW(result.capacity(), result.begin());
+
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ if (len > result.capacity()) {
+ result.reserve(len);
+ goto retry_temp_dir;
+ }
+
+ result.set_size(len);
+ return success;
+ }
+
+ // Forwarder for ScopedHandle.
+ BOOL WINAPI CryptReleaseContext(HCRYPTPROV Provider) {
+ return ::CryptReleaseContext(Provider, 0);
+ }
+
+ typedef ScopedHandle<HCRYPTPROV, uintptr_t(-1),
+ BOOL (WINAPI*)(HCRYPTPROV), CryptReleaseContext>
+ ScopedCryptContext;
+ bool is_separator(const wchar_t value) {
+ switch (value) {
+ case L'\\':
+ case L'/':
+ return true;
+ default:
+ return false;
+ }
+ }
+}
+
+namespace llvm {
+namespace sys {
+namespace fs {
+
+error_code current_path(SmallVectorImpl<char> &result) {
+ SmallVector<wchar_t, 128> cur_path;
+ cur_path.reserve(128);
+retry_cur_dir:
+ DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());
+
+ // A zero return value indicates a failure other than insufficient space.
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ // If there's insufficient space, the len returned is larger than the len
+ // given.
+ if (len > cur_path.capacity()) {
+ cur_path.reserve(len);
+ goto retry_cur_dir;
+ }
+
+ cur_path.set_size(len);
+ // cur_path now holds the current directory in utf-16. Convert to utf-8.
+
+ // Find out how much space we need. Sadly, this function doesn't return the
+ // size needed unless you tell it the result size is 0, which means you
+ // _always_ have to call it twice.
+ len = ::WideCharToMultiByte(CP_UTF8, 0,
+ cur_path.data(), cur_path.size(),
+ result.data(), 0,
+ NULL, NULL);
+
+ if (len == 0)
+ return make_error_code(windows_error(::GetLastError()));
+
+ result.reserve(len);
+ result.set_size(len);
+ // Now do the actual conversion.
+ len = ::WideCharToMultiByte(CP_UTF8, 0,
+ cur_path.data(), cur_path.size(),
+ result.data(), result.size(),
+ NULL, NULL);
+ if (len == 0)
+ return windows_error(::GetLastError());
+
+ return success;
+}
+
+error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toStringRef(from_storage);
+ StringRef t = to.toStringRef(to_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_from;
+ SmallVector<wchar_t, 128> wide_to;
+ if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+ if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+ // Copy the file.
+ BOOL res = ::CopyFileW(wide_from.begin(), wide_to.begin(),
+ copt != copy_option::overwrite_if_exists);
+
+ if (res == 0)
+ return windows_error(::GetLastError());
+
+ return success;
+}
+
+error_code create_directory(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ if (!::CreateDirectoryW(path_utf16.begin(), NULL)) {
+ error_code ec = windows_error(::GetLastError());
+ if (ec == windows_error::already_exists)
+ existed = true;
+ else
+ return ec;
+ } else
+ existed = false;
+
+ return success;
+}
+
+error_code create_hard_link(const Twine &to, const Twine &from) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toStringRef(from_storage);
+ StringRef t = to.toStringRef(to_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_from;
+ SmallVector<wchar_t, 128> wide_to;
+ if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+ if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+ if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL))
+ return windows_error(::GetLastError());
+
+ return success;
+}
+
+error_code create_symlink(const Twine &to, const Twine &from) {
+ // Only do it if the function is available at runtime.
+ if (!create_symbolic_link_api)
+ return make_error_code(errc::function_not_supported);
+
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toStringRef(from_storage);
+ StringRef t = to.toStringRef(to_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_from;
+ SmallVector<wchar_t, 128> wide_to;
+ if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+ if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+ if (!create_symbolic_link_api(wide_from.begin(), wide_to.begin(), 0))
+ return windows_error(::GetLastError());
+
+ return success;
+}
+
+error_code remove(const Twine &path, bool &existed) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ file_status st;
+ if (error_code ec = status(path, st))
+ return ec;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ if (st.type() == file_type::directory_file) {
+ if (!::RemoveDirectoryW(c_str(path_utf16))) {
+ error_code ec = windows_error(::GetLastError());
+ if (ec != windows_error::file_not_found)
+ return ec;
+ existed = false;
+ } else
+ existed = true;
+ } else {
+ if (!::DeleteFileW(c_str(path_utf16))) {
+ error_code ec = windows_error(::GetLastError());
+ if (ec != windows_error::file_not_found)
+ return ec;
+ existed = false;
+ } else
+ existed = true;
+ }
+
+ return success;
+}
+
+error_code rename(const Twine &from, const Twine &to) {
+ // Get arguments.
+ SmallString<128> from_storage;
+ SmallString<128> to_storage;
+ StringRef f = from.toStringRef(from_storage);
+ StringRef t = to.toStringRef(to_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_from;
+ SmallVector<wchar_t, 128> wide_to;
+ if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+ if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+ if (!::MoveFileExW(wide_from.begin(), wide_to.begin(),
+ MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
+ return windows_error(::GetLastError());
+
+ return success;
+}
+
+error_code resize_file(const Twine &path, uint64_t size) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ int fd = ::_wopen(path_utf16.begin(), O_BINARY, S_IREAD | S_IWRITE);
+ if (fd == -1)
+ return error_code(errno, generic_category());
+#ifdef HAVE__CHSIZE_S
+ errno_t error = ::_chsize_s(fd, size);
+#else
+ errno_t error = ::_chsize(fd, size);
+#endif
+ ::close(fd);
+ return error_code(error, generic_category());
+}
+
+error_code exists(const Twine &path, bool &result) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ DWORD attributes = ::GetFileAttributesW(path_utf16.begin());
+
+ if (attributes == INVALID_FILE_ATTRIBUTES) {
+ // See if the file didn't actually exist.
+ error_code ec = make_error_code(windows_error(::GetLastError()));
+ if (ec != windows_error::file_not_found &&
+ ec != windows_error::path_not_found)
+ return ec;
+ result = false;
+ } else
+ result = true;
+ return success;
+}
+
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+ // Get arguments.
+ SmallString<128> a_storage;
+ SmallString<128> b_storage;
+ StringRef a = A.toStringRef(a_storage);
+ StringRef b = B.toStringRef(b_storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_a;
+ SmallVector<wchar_t, 128> wide_b;
+ if (error_code ec = UTF8ToUTF16(a, wide_a)) return ec;
+ if (error_code ec = UTF8ToUTF16(b, wide_b)) return ec;
+
+ AutoHandle HandleB(
+ ::CreateFileW(wide_b.begin(),
+ 0,
+ FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+ 0,
+ OPEN_EXISTING,
+ FILE_FLAG_BACKUP_SEMANTICS,
+ 0));
+
+ AutoHandle HandleA(
+ ::CreateFileW(wide_a.begin(),
+ 0,
+ FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+ 0,
+ OPEN_EXISTING,
+ FILE_FLAG_BACKUP_SEMANTICS,
+ 0));
+
+ // If both handles are invalid, it's an error.
+ if (HandleA == INVALID_HANDLE_VALUE &&
+ HandleB == INVALID_HANDLE_VALUE)
+ return windows_error(::GetLastError());
+
+ // If only one is invalid, it's false.
+ if (HandleA == INVALID_HANDLE_VALUE &&
+ HandleB == INVALID_HANDLE_VALUE) {
+ result = false;
+ return success;
+ }
+
+ // Get file information.
+ BY_HANDLE_FILE_INFORMATION InfoA, InfoB;
+ if (!::GetFileInformationByHandle(HandleA, &InfoA))
+ return windows_error(::GetLastError());
+ if (!::GetFileInformationByHandle(HandleB, &InfoB))
+ return windows_error(::GetLastError());
+
+ // See if it's all the same.
+ result =
+ InfoA.dwVolumeSerialNumber == InfoB.dwVolumeSerialNumber &&
+ InfoA.nFileIndexHigh == InfoB.nFileIndexHigh &&
+ InfoA.nFileIndexLow == InfoB.nFileIndexLow &&
+ InfoA.nFileSizeHigh == InfoB.nFileSizeHigh &&
+ InfoA.nFileSizeLow == InfoB.nFileSizeLow &&
+ InfoA.ftLastWriteTime.dwLowDateTime ==
+ InfoB.ftLastWriteTime.dwLowDateTime &&
+ InfoA.ftLastWriteTime.dwHighDateTime ==
+ InfoB.ftLastWriteTime.dwHighDateTime;
+
+ return success;
+}
+
+error_code file_size(const Twine &path, uint64_t &result) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ WIN32_FILE_ATTRIBUTE_DATA FileData;
+ if (!::GetFileAttributesExW(path_utf16.begin(),
+ ::GetFileExInfoStandard,
+ &FileData))
+ return windows_error(::GetLastError());
+
+ result =
+ (uint64_t(FileData.nFileSizeHigh) << (sizeof(FileData.nFileSizeLow) * 8))
+ + FileData.nFileSizeLow;
+
+ return success;
+}
+
+error_code status(const Twine &path, file_status &result) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ DWORD attr = ::GetFileAttributesW(path_utf16.begin());
+ if (attr == INVALID_FILE_ATTRIBUTES)
+ goto handle_status_error;
+
+ // Handle reparse points.
+ if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
+ AutoHandle h(
+ ::CreateFileW(path_utf16.begin(),
+ 0, // Attributes only.
+ FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL,
+ OPEN_EXISTING,
+ FILE_FLAG_BACKUP_SEMANTICS,
+ 0));
+ if (h == INVALID_HANDLE_VALUE)
+ goto handle_status_error;
+ }
+
+ if (attr & FILE_ATTRIBUTE_DIRECTORY)
+ result = file_status(file_type::directory_file);
+ else
+ result = file_status(file_type::regular_file);
+
+ return success;
+
+handle_status_error:
+ error_code ec = windows_error(::GetLastError());
+ if (ec == windows_error::file_not_found ||
+ ec == windows_error::path_not_found)
+ result = file_status(file_type::file_not_found);
+ else if (ec == windows_error::sharing_violation)
+ result = file_status(file_type::type_unknown);
+ else {
+ result = file_status(file_type::status_error);
+ return ec;
+ }
+
+ return success;
+}
+
+error_code unique_file(const Twine &model, int &result_fd,
+ SmallVectorImpl<char> &result_path) {
+ // Use result_path as temp storage.
+ result_path.set_size(0);
+ StringRef m = model.toStringRef(result_path);
+
+ SmallVector<wchar_t, 128> model_utf16;
+ if (error_code ec = UTF8ToUTF16(m, model_utf16)) return ec;
+
+ // Make model absolute by prepending a temp directory if it's not already.
+ bool absolute = path::is_absolute(m);
+
+ if (!absolute) {
+ SmallVector<wchar_t, 64> temp_dir;
+ if (error_code ec = TempDir(temp_dir)) return ec;
+ // Handle c: by removing it.
+ if (model_utf16.size() > 2 && model_utf16[1] == L':') {
+ model_utf16.erase(model_utf16.begin(), model_utf16.begin() + 2);
+ }
+ model_utf16.insert(model_utf16.begin(), temp_dir.begin(), temp_dir.end());
+ }
+
+ // Replace '%' with random chars. From here on, DO NOT modify model. It may be
+ // needed if the randomly chosen path already exists.
+ SmallVector<wchar_t, 128> random_path_utf16;
+
+ // Get a Crypto Provider for CryptGenRandom.
+ HCRYPTPROV HCPC;
+ if (!::CryptAcquireContextW(&HCPC,
+ NULL,
+ NULL,
+ PROV_RSA_FULL,
+ CRYPT_VERIFYCONTEXT))
+ return windows_error(::GetLastError());
+ ScopedCryptContext CryptoProvider(HCPC);
+
+retry_random_path:
+ random_path_utf16.set_size(0);
+ for (SmallVectorImpl<wchar_t>::const_iterator i = model_utf16.begin(),
+ e = model_utf16.end();
+ i != e; ++i) {
+ if (*i == L'%') {
+ BYTE val = 0;
+ if (!::CryptGenRandom(CryptoProvider, 1, &val))
+ return windows_error(::GetLastError());
+ random_path_utf16.push_back("0123456789abcdef"[val & 15]);
+ }
+ else
+ random_path_utf16.push_back(*i);
+ }
+ // Make random_path_utf16 null terminated.
+ random_path_utf16.push_back(0);
+ random_path_utf16.pop_back();
+
+ // Try to create + open the path.
+retry_create_file:
+ HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(),
+ GENERIC_READ | GENERIC_WRITE,
+ FILE_SHARE_READ,
+ NULL,
+ // Return ERROR_FILE_EXISTS if the file
+ // already exists.
+ CREATE_NEW,
+ FILE_ATTRIBUTE_TEMPORARY,
+ NULL);
+ if (TempFileHandle == INVALID_HANDLE_VALUE) {
+ // If the file existed, try again, otherwise, error.
+ error_code ec = windows_error(::GetLastError());
+ if (ec == windows_error::file_exists)
+ goto retry_random_path;
+ // Check for non-existing parent directories.
+ if (ec == windows_error::path_not_found) {
+ // Create the directories using result_path as temp storage.
+ if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
+ random_path_utf16.size(), result_path))
+ return ec;
+ StringRef p(result_path.begin(), result_path.size());
+ SmallString<64> dir_to_create;
+ for (path::const_iterator i = path::begin(p),
+ e = --path::end(p); i != e; ++i) {
+ path::append(dir_to_create, *i);
+ bool Exists;
+ if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec;
+ if (!Exists) {
+ // If c: doesn't exist, bail.
+ if (i->endswith(":"))
+ return ec;
+
+ SmallVector<wchar_t, 64> dir_to_create_utf16;
+ if (error_code ec = UTF8ToUTF16(dir_to_create, dir_to_create_utf16))
+ return ec;
+
+ // Create the directory.
+ if (!::CreateDirectoryW(dir_to_create_utf16.begin(), NULL))
+ return windows_error(::GetLastError());
+ }
+ }
+ goto retry_create_file;
+ }
+ return ec;
+ }
+
+ // Set result_path to the utf-8 representation of the path.
+ if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
+ random_path_utf16.size(), result_path)) {
+ ::CloseHandle(TempFileHandle);
+ ::DeleteFileW(random_path_utf16.begin());
+ return ec;
+ }
+
+ // Convert the Windows API file handle into a C-runtime handle.
+ int fd = ::_open_osfhandle(intptr_t(TempFileHandle), 0);
+ if (fd == -1) {
+ ::CloseHandle(TempFileHandle);
+ ::DeleteFileW(random_path_utf16.begin());
+ // MSDN doesn't say anything about _open_osfhandle setting errno or
+ // GetLastError(), so just return invalid_handle.
+ return windows_error::invalid_handle;
+ }
+
+ result_fd = fd;
+ return success;
+}
+
+error_code get_magic(const Twine &path, uint32_t len,
+ SmallVectorImpl<char> &result) {
+ SmallString<128> path_storage;
+ SmallVector<wchar_t, 128> path_utf16;
+ result.set_size(0);
+
+ // Convert path to UTF-16.
+ if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+ path_utf16))
+ return ec;
+
+ // Open file.
+ HANDLE file = ::CreateFileW(c_str(path_utf16),
+ GENERIC_READ,
+ FILE_SHARE_READ,
+ NULL,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_READONLY,
+ NULL);
+ if (file == INVALID_HANDLE_VALUE)
+ return windows_error(::GetLastError());
+
+ // Allocate buffer.
+ result.reserve(len);
+
+ // Get magic!
+ DWORD bytes_read = 0;
+ BOOL read_success = ::ReadFile(file, result.data(), len, &bytes_read, NULL);
+ error_code ec = windows_error(::GetLastError());
+ ::CloseHandle(file);
+ if (!read_success || (bytes_read != len)) {
+ // Set result size to the number of bytes read if it's valid.
+ if (bytes_read >= 0 && bytes_read <= len)
+ result.set_size(bytes_read);
+ // ERROR_HANDLE_EOF is mapped to errc::value_too_large.
+ return ec;
+ }
+
+ result.set_size(len);
+ return success;
+}
+
+error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+ SmallVector<wchar_t, 128> path_utf16;
+
+ if (error_code ec = UTF8ToUTF16(path,
+ path_utf16))
+ return ec;
+
+ // Convert path to the format that Windows is happy with.
+ if (path_utf16.size() > 0 &&
+ !is_separator(path_utf16[path.size() - 1]) &&
+ path_utf16[path.size() - 1] != L':') {
+ path_utf16.push_back(L'\\');
+ path_utf16.push_back(L'*');
+ } else {
+ path_utf16.push_back(L'*');
+ }
+
+ // Get the first directory entry.
+ WIN32_FIND_DATAW FirstFind;
+ ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind));
+ if (!FindHandle)
+ return windows_error(::GetLastError());
+
+ size_t FilenameLen = ::wcslen(FirstFind.cFileName);
+ while ((FilenameLen == 1 && FirstFind.cFileName[0] == L'.') ||
+ (FilenameLen == 2 && FirstFind.cFileName[0] == L'.' &&
+ FirstFind.cFileName[1] == L'.'))
+ if (!::FindNextFileW(FindHandle, &FirstFind)) {
+ error_code ec = windows_error(::GetLastError());
+ // Check for end.
+ if (ec == windows_error::no_more_files)
+ return directory_iterator_destruct(it);
+ return ec;
+ } else
+ FilenameLen = ::wcslen(FirstFind.cFileName);
+
+ // Construct the current directory entry.
+ SmallString<128> directory_entry_name_utf8;
+ if (error_code ec = UTF16ToUTF8(FirstFind.cFileName,
+ ::wcslen(FirstFind.cFileName),
+ directory_entry_name_utf8))
+ return ec;
+
+ it.IterationHandle = intptr_t(FindHandle.take());
+ SmallString<128> directory_entry_path(path);
+ path::append(directory_entry_path, directory_entry_name_utf8.str());
+ it.CurrentEntry = directory_entry(directory_entry_path.str());
+
+ return success;
+}
+
+error_code directory_iterator_destruct(directory_iterator& it) {
+ if (it.IterationHandle != 0)
+ // Closes the handle if it's valid.
+ ScopedFindHandle close(HANDLE(it.IterationHandle));
+ it.IterationHandle = 0;
+ it.CurrentEntry = directory_entry();
+ return success;
+}
+
+error_code directory_iterator_increment(directory_iterator& it) {
+ WIN32_FIND_DATAW FindData;
+ if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
+ error_code ec = windows_error(::GetLastError());
+ // Check for end.
+ if (ec == windows_error::no_more_files)
+ return directory_iterator_destruct(it);
+ return ec;
+ }
+
+ size_t FilenameLen = ::wcslen(FindData.cFileName);
+ if ((FilenameLen == 1 && FindData.cFileName[0] == L'.') ||
+ (FilenameLen == 2 && FindData.cFileName[0] == L'.' &&
+ FindData.cFileName[1] == L'.'))
+ return directory_iterator_increment(it);
+
+ SmallString<128> directory_entry_path_utf8;
+ if (error_code ec = UTF16ToUTF8(FindData.cFileName,
+ ::wcslen(FindData.cFileName),
+ directory_entry_path_utf8))
+ return ec;
+
+ it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8));
+ return success;
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
diff --git a/contrib/llvm/lib/System/Win32/Process.inc b/contrib/llvm/lib/Support/Windows/Process.inc
index feb0806..06a7f00 100644
--- a/contrib/llvm/lib/System/Win32/Process.inc
+++ b/contrib/llvm/lib/Support/Windows/Process.inc
@@ -1,17 +1,17 @@
//===- Win32/Process.cpp - Win32 Process Implementation ------- -*- C++ -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file provides the Win32 specific implementation of the Process class.
//
//===----------------------------------------------------------------------===//
-#include "Win32.h"
+#include "Windows.h"
#include <psapi.h>
#include <malloc.h>
#include <io.h>
@@ -25,7 +25,7 @@
#endif
//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only Win32 specific code
+//=== WARNING: Implementation here must contain only Win32 specific code
//=== and must not be UNIX code
//===----------------------------------------------------------------------===//
@@ -51,13 +51,13 @@ inline unsigned GetPageSizeOnce() {
return static_cast<unsigned>(info.dwPageSize);
}
-unsigned
+unsigned
Process::GetPageSize() {
static const unsigned PageSize = GetPageSizeOnce();
return PageSize;
}
-size_t
+size_t
Process::GetMallocUsage()
{
_HEAPINFO hinfo;
@@ -86,7 +86,7 @@ Process::GetTimeUsage(
elapsed = TimeValue::now();
uint64_t ProcCreate, ProcExit, KernelTime, UserTime;
- GetProcessTimes(GetCurrentProcess(), (FILETIME*)&ProcCreate,
+ GetProcessTimes(GetCurrentProcess(), (FILETIME*)&ProcCreate,
(FILETIME*)&ProcExit, (FILETIME*)&KernelTime,
(FILETIME*)&UserTime);
@@ -132,7 +132,8 @@ bool Process::StandardErrIsDisplayed() {
}
bool Process::FileDescriptorIsDisplayed(int fd) {
- return GetFileType((HANDLE)_get_osfhandle(fd)) == FILE_TYPE_CHAR;
+ DWORD Mode; // Unused
+ return (GetConsoleMode((HANDLE)_get_osfhandle(fd), &Mode) != 0);
}
unsigned Process::StandardOutColumns() {
diff --git a/contrib/llvm/lib/System/Win32/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc
index 16bb28e..350363c 100644
--- a/contrib/llvm/lib/System/Win32/Program.inc
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Win32.h"
+#include "Windows.h"
#include <cstdio>
#include <malloc.h>
#include <io.h>
@@ -58,10 +58,12 @@ Program::FindProgramByName(const std::string& progName) {
Path temp;
if (!temp.set(progName)) // invalid name
return Path();
- if (temp.canExecute()) // already executable as is
+ // Return paths with slashes verbatim.
+ if (progName.find('\\') != std::string::npos ||
+ progName.find('/') != std::string::npos)
return temp;
- // At this point, the file name is valid and its not executable.
+ // At this point, the file name is valid and does not contain slashes.
// Let Windows search for it.
char buffer[MAX_PATH];
char *dummy = NULL;
@@ -123,19 +125,10 @@ static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) {
return h;
}
-#ifdef __MINGW32__
- // Due to unknown reason, mingw32's w32api doesn't have this declaration.
- extern "C"
- BOOL WINAPI SetInformationJobObject(HANDLE hJob,
- JOBOBJECTINFOCLASS JobObjectInfoClass,
- LPVOID lpJobObjectInfo,
- DWORD cbJobObjectInfoLength);
-#endif
-
/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling
/// CreateProcess.
static bool ArgNeedsQuotes(const char *Str) {
- return Str[0] == '\0' || strchr(Str, ' ') != 0;
+ return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0;
}
@@ -337,7 +330,8 @@ Program::Execute(const Path& path,
}
int
-Program::Wait(unsigned secondsToWait,
+Program::Wait(const Path &path,
+ unsigned secondsToWait,
std::string* ErrMsg) {
if (Data_ == 0) {
MakeErrMsg(ErrMsg, "Process not started!");
diff --git a/contrib/llvm/lib/System/Win32/RWMutex.inc b/contrib/llvm/lib/Support/Windows/RWMutex.inc
index e269226..471f8fa 100644
--- a/contrib/llvm/lib/System/Win32/RWMutex.inc
+++ b/contrib/llvm/lib/Support/Windows/RWMutex.inc
@@ -1,10 +1,10 @@
-//= llvm/System/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock =//
-//
+//= llvm/Support/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock =//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file implements the Win32 specific (non-pthread) RWMutex class.
@@ -16,10 +16,10 @@
//=== is guaranteed to work on *all* Win32 variants.
//===----------------------------------------------------------------------===//
-#include "Win32.h"
+#include "Windows.h"
// FIXME: Windows does not have reader-writer locks pre-Vista. If you want
-// real reader-writer locks, you a pthreads implementation for Windows.
+// real reader-writer locks, you a threads implementation for Windows.
namespace llvm {
using namespace sys;
diff --git a/contrib/llvm/lib/System/Win32/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc
index 2498a26e..14f3f21 100644
--- a/contrib/llvm/lib/System/Win32/Signals.inc
+++ b/contrib/llvm/lib/Support/Windows/Signals.inc
@@ -1,17 +1,17 @@
//===- Win32/Signals.cpp - Win32 Signals Implementation ---------*- C++ -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file provides the Win32 specific implementation of the Signals class.
//
//===----------------------------------------------------------------------===//
-#include "Win32.h"
+#include "Windows.h"
#include <stdio.h>
#include <vector>
#include <algorithm>
@@ -43,9 +43,7 @@ static std::vector<llvm::sys::Path> *FilesToRemove = NULL;
static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
static bool RegisteredUnhandledExceptionFilter = false;
static bool CleanupExecuted = false;
-#ifdef _MSC_VER
static bool ExitOnUnhandledExceptions = false;
-#endif
static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
// Windows creates a new thread to execute the console handler when an event
@@ -56,7 +54,7 @@ static CRITICAL_SECTION CriticalSection;
namespace llvm {
//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only Win32 specific code
+//=== WARNING: Implementation here must contain only Win32 specific code
//=== and must not be UNIX code
//===----------------------------------------------------------------------===//
@@ -110,12 +108,15 @@ static void RegisterHandler() {
SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
// Environment variable to disable any kind of crash dialog.
-#ifdef _MSC_VER
if (getenv("LLVM_DISABLE_CRT_DEBUG")) {
+#ifdef _MSC_VER
_CrtSetReportHook(CRTReportHook);
+#endif
+ SetErrorMode(SEM_FAILCRITICALERRORS |
+ SEM_NOGPFAULTERRORBOX |
+ SEM_NOOPENFILEERRORBOX);
ExitOnUnhandledExceptions = true;
}
-#endif
// IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or
// else multi-threading problems will ensue.
@@ -145,6 +146,8 @@ void sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
if (FilesToRemove == NULL)
return;
+ RegisterHandler();
+
FilesToRemove->push_back(Filename);
std::vector<sys::Path>::reverse_iterator I =
std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
@@ -208,97 +211,91 @@ void llvm::sys::RunInterruptHandlers() {
}
static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
- try {
- Cleanup();
-
+ Cleanup();
+
#ifdef _WIN64
// TODO: provide a x64 friendly version of the following
#else
-
- // Initialize the STACKFRAME structure.
- STACKFRAME StackFrame;
- memset(&StackFrame, 0, sizeof(StackFrame));
-
- StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
- StackFrame.AddrPC.Mode = AddrModeFlat;
- StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
- StackFrame.AddrStack.Mode = AddrModeFlat;
- StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp;
- StackFrame.AddrFrame.Mode = AddrModeFlat;
-
- HANDLE hProcess = GetCurrentProcess();
- HANDLE hThread = GetCurrentThread();
-
- // Initialize the symbol handler.
- SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES);
- SymInitialize(hProcess, NULL, TRUE);
-
- while (true) {
- if (!StackWalk(IMAGE_FILE_MACHINE_I386, hProcess, hThread, &StackFrame,
- ep->ContextRecord, NULL, SymFunctionTableAccess,
- SymGetModuleBase, NULL)) {
- break;
- }
-
- if (StackFrame.AddrFrame.Offset == 0)
- break;
-
- // Print the PC in hexadecimal.
- DWORD PC = StackFrame.AddrPC.Offset;
- fprintf(stderr, "%08lX", PC);
-
- // Print the parameters. Assume there are four.
- fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)", StackFrame.Params[0],
- StackFrame.Params[1], StackFrame.Params[2], StackFrame.Params[3]);
-
- // Verify the PC belongs to a module in this process.
- if (!SymGetModuleBase(hProcess, PC)) {
- fputs(" <unknown module>\n", stderr);
- continue;
- }
-
- // Print the symbol name.
- char buffer[512];
- IMAGEHLP_SYMBOL *symbol = reinterpret_cast<IMAGEHLP_SYMBOL *>(buffer);
- memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL));
- symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL);
- symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL);
-
- DWORD dwDisp;
- if (!SymGetSymFromAddr(hProcess, PC, &dwDisp, symbol)) {
- fputc('\n', stderr);
- continue;
- }
-
- buffer[511] = 0;
- if (dwDisp > 0)
- fprintf(stderr, ", %s()+%04lu bytes(s)", symbol->Name, dwDisp);
- else
- fprintf(stderr, ", %s", symbol->Name);
-
- // Print the source file and line number information.
- IMAGEHLP_LINE line;
- memset(&line, 0, sizeof(line));
- line.SizeOfStruct = sizeof(line);
- if (SymGetLineFromAddr(hProcess, PC, &dwDisp, &line)) {
- fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber);
- if (dwDisp > 0)
- fprintf(stderr, "+%04lu byte(s)", dwDisp);
- }
+ // Initialize the STACKFRAME structure.
+ STACKFRAME StackFrame;
+ memset(&StackFrame, 0, sizeof(StackFrame));
+
+ StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
+ StackFrame.AddrPC.Mode = AddrModeFlat;
+ StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
+ StackFrame.AddrStack.Mode = AddrModeFlat;
+ StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp;
+ StackFrame.AddrFrame.Mode = AddrModeFlat;
+
+ HANDLE hProcess = GetCurrentProcess();
+ HANDLE hThread = GetCurrentThread();
+
+ // Initialize the symbol handler.
+ SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES);
+ SymInitialize(hProcess, NULL, TRUE);
+
+ while (true) {
+ if (!StackWalk(IMAGE_FILE_MACHINE_I386, hProcess, hThread, &StackFrame,
+ ep->ContextRecord, NULL, SymFunctionTableAccess,
+ SymGetModuleBase, NULL)) {
+ break;
+ }
+
+ if (StackFrame.AddrFrame.Offset == 0)
+ break;
+
+ // Print the PC in hexadecimal.
+ DWORD PC = StackFrame.AddrPC.Offset;
+ fprintf(stderr, "%08lX", PC);
+
+ // Print the parameters. Assume there are four.
+ fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)",
+ StackFrame.Params[0],
+ StackFrame.Params[1], StackFrame.Params[2], StackFrame.Params[3]);
+
+ // Verify the PC belongs to a module in this process.
+ if (!SymGetModuleBase(hProcess, PC)) {
+ fputs(" <unknown module>\n", stderr);
+ continue;
+ }
+
+ // Print the symbol name.
+ char buffer[512];
+ IMAGEHLP_SYMBOL *symbol = reinterpret_cast<IMAGEHLP_SYMBOL *>(buffer);
+ memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL));
+ symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL);
+ symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL);
+
+ DWORD dwDisp;
+ if (!SymGetSymFromAddr(hProcess, PC, &dwDisp, symbol)) {
fputc('\n', stderr);
+ continue;
}
-#endif
+ buffer[511] = 0;
+ if (dwDisp > 0)
+ fprintf(stderr, ", %s()+%04lu bytes(s)", symbol->Name, dwDisp);
+ else
+ fprintf(stderr, ", %s", symbol->Name);
+
+ // Print the source file and line number information.
+ IMAGEHLP_LINE line;
+ memset(&line, 0, sizeof(line));
+ line.SizeOfStruct = sizeof(line);
+ if (SymGetLineFromAddr(hProcess, PC, &dwDisp, &line)) {
+ fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber);
+ if (dwDisp > 0)
+ fprintf(stderr, "+%04lu byte(s)", dwDisp);
+ }
- } catch (...) {
- assert(0 && "Crashed in LLVMUnhandledExceptionFilter");
+ fputc('\n', stderr);
}
-#ifdef _MSC_VER
+#endif
+
if (ExitOnUnhandledExceptions)
_exit(-3);
-#endif
// Allow dialog box to pop up allowing choice to start debugger.
if (OldFilter)
@@ -329,4 +326,3 @@ static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) {
LeaveCriticalSection(&CriticalSection);
return FALSE;
}
-
diff --git a/contrib/llvm/lib/System/Win32/ThreadLocal.inc b/contrib/llvm/lib/Support/Windows/ThreadLocal.inc
index b8b933c..512462d 100644
--- a/contrib/llvm/lib/System/Win32/ThreadLocal.inc
+++ b/contrib/llvm/lib/Support/Windows/ThreadLocal.inc
@@ -1,10 +1,10 @@
-//= llvm/System/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===//
-//
+//= llvm/Support/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file implements the Win32 specific (non-pthread) ThreadLocal class.
@@ -16,8 +16,8 @@
//=== is guaranteed to work on *all* Win32 variants.
//===----------------------------------------------------------------------===//
-#include "Win32.h"
-#include "llvm/System/ThreadLocal.h"
+#include "Windows.h"
+#include "llvm/Support/ThreadLocal.h"
namespace llvm {
using namespace sys;
@@ -44,6 +44,7 @@ void ThreadLocalImpl::setInstance(const void* d){
DWORD* tls = static_cast<DWORD*>(data);
int errorcode = TlsSetValue(*tls, const_cast<void*>(d));
assert(errorcode != 0);
+ (void)errorcode;
}
void ThreadLocalImpl::removeInstance() {
diff --git a/contrib/llvm/lib/System/Win32/TimeValue.inc b/contrib/llvm/lib/Support/Windows/TimeValue.inc
index e37f111..1227552 100644
--- a/contrib/llvm/lib/System/Win32/TimeValue.inc
+++ b/contrib/llvm/lib/Support/Windows/TimeValue.inc
@@ -1,17 +1,17 @@
//===- Win32/TimeValue.cpp - Win32 TimeValue Implementation -----*- C++ -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file provides the Win32 implementation of the TimeValue class.
//
//===----------------------------------------------------------------------===//
-#include "Win32.h"
+#include "Windows.h"
#include <time.h>
namespace llvm {
diff --git a/contrib/llvm/lib/Support/Windows/Windows.h b/contrib/llvm/lib/Support/Windows/Windows.h
new file mode 100644
index 0000000..4a1553b
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/Windows.h
@@ -0,0 +1,120 @@
+//===- Win32/Win32.h - Common Win32 Include File ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Win32 implementations.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+// mingw-w64 tends to define it as 0x0502 in its headers.
+#undef _WIN32_WINNT
+
+// Require at least Windows 2000 API.
+#define _WIN32_WINNT 0x0500
+#define _WIN32_IE 0x0500 // MinGW at it again.
+#define WIN32_LEAN_AND_MEAN
+
+#include "llvm/Config/config.h" // Get build system configuration settings
+#include <windows.h>
+#include <shlobj.h>
+#include <cassert>
+#include <string>
+
+inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
+ if (!ErrMsg)
+ return true;
+ char *buffer = NULL;
+ FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
+ NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
+ *ErrMsg = prefix + buffer;
+ LocalFree(buffer);
+ return true;
+}
+
+class AutoHandle {
+ HANDLE handle;
+
+public:
+ AutoHandle(HANDLE h) : handle(h) {}
+
+ ~AutoHandle() {
+ if (handle)
+ CloseHandle(handle);
+ }
+
+ operator HANDLE() {
+ return handle;
+ }
+
+ AutoHandle &operator=(HANDLE h) {
+ handle = h;
+ return *this;
+ }
+};
+
+template <class HandleType, uintptr_t InvalidHandle,
+ class DeleterType, DeleterType D>
+class ScopedHandle {
+ HandleType Handle;
+
+public:
+ ScopedHandle() : Handle(InvalidHandle) {}
+ ScopedHandle(HandleType handle) : Handle(handle) {}
+
+ ~ScopedHandle() {
+ if (Handle != HandleType(InvalidHandle))
+ D(Handle);
+ }
+
+ HandleType take() {
+ HandleType temp = Handle;
+ Handle = HandleType(InvalidHandle);
+ return temp;
+ }
+
+ operator HandleType() const { return Handle; }
+
+ ScopedHandle &operator=(HandleType handle) {
+ Handle = handle;
+ return *this;
+ }
+
+ typedef void (*unspecified_bool_type)();
+ static void unspecified_bool_true() {}
+
+ // True if Handle is valid.
+ operator unspecified_bool_type() const {
+ return Handle == HandleType(InvalidHandle) ? 0 : unspecified_bool_true;
+ }
+
+ bool operator!() const {
+ return Handle == HandleType(InvalidHandle);
+ }
+};
+
+typedef ScopedHandle<HANDLE, uintptr_t(-1),
+ BOOL (WINAPI*)(HANDLE), ::FindClose>
+ ScopedFindHandle;
+
+namespace llvm {
+template <class T>
+class SmallVectorImpl;
+
+template <class T>
+typename SmallVectorImpl<T>::const_pointer
+c_str(SmallVectorImpl<T> &str) {
+ str.push_back(0);
+ str.pop_back();
+ return str.data();
+}
+} // end namespace llvm.
diff --git a/contrib/llvm/lib/Support/Windows/explicit_symbols.inc b/contrib/llvm/lib/Support/Windows/explicit_symbols.inc
new file mode 100644
index 0000000..84862d6
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/explicit_symbols.inc
@@ -0,0 +1,66 @@
+/* in libgcc.a */
+
+#ifdef HAVE__ALLOCA
+ EXPLICIT_SYMBOL(_alloca)
+ EXPLICIT_SYMBOL2(alloca, _alloca);
+#endif
+#ifdef HAVE___ALLOCA
+ EXPLICIT_SYMBOL(__alloca)
+#endif
+#ifdef HAVE___CHKSTK
+ EXPLICIT_SYMBOL(__chkstk)
+#endif
+#ifdef HAVE____CHKSTK
+ EXPLICIT_SYMBOL(___chkstk)
+#endif
+#ifdef HAVE___MAIN
+ EXPLICIT_SYMBOL(__main) // FIXME: Don't call it.
+#endif
+
+#ifdef HAVE___ASHLDI3
+ EXPLICIT_SYMBOL(__ashldi3)
+#endif
+#ifdef HAVE___ASHRDI3
+ EXPLICIT_SYMBOL(__ashrdi3)
+#endif
+#ifdef HAVE___CMPDI2 // FIXME: unused
+ EXPLICIT_SYMBOL(__cmpdi2)
+#endif
+#ifdef HAVE___DIVDI3
+ EXPLICIT_SYMBOL(__divdi3)
+#endif
+#ifdef HAVE___FIXDFDI
+ EXPLICIT_SYMBOL(__fixdfdi)
+#endif
+#ifdef HAVE___FIXSFDI
+ EXPLICIT_SYMBOL(__fixsfdi)
+#endif
+#ifdef HAVE___FIXUNSDFDI
+ EXPLICIT_SYMBOL(__fixunsdfdi)
+#endif
+#ifdef HAVE___FIXUNSSFDI
+ EXPLICIT_SYMBOL(__fixunssfdi)
+#endif
+#ifdef HAVE___FLOATDIDF
+ EXPLICIT_SYMBOL(__floatdidf)
+#endif
+#ifdef HAVE___FLOATDISF
+ EXPLICIT_SYMBOL(__floatdisf)
+#endif
+#ifdef HAVE___LSHRDI3
+ EXPLICIT_SYMBOL(__lshrdi3)
+#endif
+#ifdef HAVE___MODDI3
+ EXPLICIT_SYMBOL(__moddi3)
+#endif
+#ifdef HAVE___UDIVDI3
+ EXPLICIT_SYMBOL(__udivdi3)
+#endif
+#ifdef HAVE___UMODDI3
+ EXPLICIT_SYMBOL(__umoddi3)
+#endif
+
+/* msvcrt */
+#if defined(_MSC_VER)
+ EXPLICIT_SYMBOL2(alloca, _alloca_probe);
+#endif
diff --git a/contrib/llvm/lib/Support/Windows/system_error.inc b/contrib/llvm/lib/Support/Windows/system_error.inc
new file mode 100644
index 0000000..37ec81d
--- /dev/null
+++ b/contrib/llvm/lib/Support/Windows/system_error.inc
@@ -0,0 +1,142 @@
+//===- llvm/Support/Win32/system_error.inc - Windows error_code --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Windows specific implementation of the error_code
+// and error_condition classes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Windows code that
+//=== is guaranteed to work on *all* Windows variants.
+//===----------------------------------------------------------------------===//
+
+#include <windows.h>
+#include <winerror.h>
+
+using namespace llvm;
+
+std::string
+_system_error_category::message(int ev) const {
+ LPVOID lpMsgBuf = 0;
+ DWORD retval = ::FormatMessageA(
+ FORMAT_MESSAGE_ALLOCATE_BUFFER |
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS,
+ NULL,
+ ev,
+ MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
+ (LPSTR) &lpMsgBuf,
+ 0,
+ NULL);
+ if (retval == 0) {
+ ::LocalFree(lpMsgBuf);
+ return std::string("Unknown error");
+ }
+
+ std::string str( static_cast<LPCSTR>(lpMsgBuf) );
+ ::LocalFree(lpMsgBuf);
+
+ while (str.size()
+ && (str[str.size()-1] == '\n' || str[str.size()-1] == '\r'))
+ str.erase( str.size()-1 );
+ if (str.size() && str[str.size()-1] == '.')
+ str.erase( str.size()-1 );
+ return str;
+}
+
+// I'd rather not double the line count of the following.
+#define MAP_ERR_TO_COND(x, y) case x: return make_error_condition(errc::y)
+
+error_condition
+_system_error_category::default_error_condition(int ev) const {
+ switch (ev) {
+ MAP_ERR_TO_COND(0, success);
+ // Windows system -> posix_errno decode table ---------------------------//
+ // see WinError.h comments for descriptions of errors
+ MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
+ MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
+ MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
+ MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
+ MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied);
+ MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error);
+ MAP_ERR_TO_COND(ERROR_CANTREAD, io_error);
+ MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error);
+ MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied);
+ MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device);
+ MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty);
+ MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device);
+ MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists);
+ MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device);
+ MAP_ERR_TO_COND(ERROR_HANDLE_EOF, value_too_large);
+ MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied);
+ MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device);
+ MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
+ MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
+ MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
+ MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
+ MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
+ MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
+ MAP_ERR_TO_COND(ERROR_NOT_SAME_DEVICE, cross_device_link);
+ MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
+ MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_OPERATION_ABORTED, operation_canceled);
+ MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
+ MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
+ MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
+ MAP_ERR_TO_COND(ERROR_SEEK, io_error);
+ MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
+ MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
+ MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error);
+ MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied);
+ MAP_ERR_TO_COND(ERROR_SEM_TIMEOUT, timed_out);
+ MAP_ERR_TO_COND(WSAEACCES, permission_denied);
+ MAP_ERR_TO_COND(WSAEADDRINUSE, address_in_use);
+ MAP_ERR_TO_COND(WSAEADDRNOTAVAIL, address_not_available);
+ MAP_ERR_TO_COND(WSAEAFNOSUPPORT, address_family_not_supported);
+ MAP_ERR_TO_COND(WSAEALREADY, connection_already_in_progress);
+ MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor);
+ MAP_ERR_TO_COND(WSAECONNABORTED, connection_aborted);
+ MAP_ERR_TO_COND(WSAECONNREFUSED, connection_refused);
+ MAP_ERR_TO_COND(WSAECONNRESET, connection_reset);
+ MAP_ERR_TO_COND(WSAEDESTADDRREQ, destination_address_required);
+ MAP_ERR_TO_COND(WSAEFAULT, bad_address);
+ MAP_ERR_TO_COND(WSAEHOSTUNREACH, host_unreachable);
+ MAP_ERR_TO_COND(WSAEINPROGRESS, operation_in_progress);
+ MAP_ERR_TO_COND(WSAEINTR, interrupted);
+ MAP_ERR_TO_COND(WSAEINVAL, invalid_argument);
+ MAP_ERR_TO_COND(WSAEISCONN, already_connected);
+ MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open);
+ MAP_ERR_TO_COND(WSAEMSGSIZE, message_size);
+ MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long);
+ MAP_ERR_TO_COND(WSAENETDOWN, network_down);
+ MAP_ERR_TO_COND(WSAENETRESET, network_reset);
+ MAP_ERR_TO_COND(WSAENETUNREACH, network_unreachable);
+ MAP_ERR_TO_COND(WSAENOBUFS, no_buffer_space);
+ MAP_ERR_TO_COND(WSAENOPROTOOPT, no_protocol_option);
+ MAP_ERR_TO_COND(WSAENOTCONN, not_connected);
+ MAP_ERR_TO_COND(WSAENOTSOCK, not_a_socket);
+ MAP_ERR_TO_COND(WSAEOPNOTSUPP, operation_not_supported);
+ MAP_ERR_TO_COND(WSAEPROTONOSUPPORT, protocol_not_supported);
+ MAP_ERR_TO_COND(WSAEPROTOTYPE, wrong_protocol_type);
+ MAP_ERR_TO_COND(WSAETIMEDOUT, timed_out);
+ MAP_ERR_TO_COND(WSAEWOULDBLOCK, operation_would_block);
+ default: return error_condition(ev, system_category());
+ }
+}
diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp
index dba46df..80ea740 100644
--- a/contrib/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@@ -13,13 +13,13 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Format.h"
-#include "llvm/System/Program.h"
-#include "llvm/System/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Process.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/System/Signals.h"
#include "llvm/ADT/STLExtras.h"
#include <cctype>
#include <cerrno>
@@ -32,6 +32,13 @@
#if defined(HAVE_FCNTL_H)
# include <fcntl.h>
#endif
+#if defined(HAVE_SYS_UIO_H) && defined(HAVE_WRITEV)
+# include <sys/uio.h>
+#endif
+
+#if defined(__CYGWIN__)
+#include <io.h>
+#endif
#if defined(_MSC_VER)
#include <io.h>
@@ -164,7 +171,8 @@ raw_ostream &raw_ostream::write_hex(unsigned long long N) {
return write(CurPtr, EndPtr-CurPtr);
}
-raw_ostream &raw_ostream::write_escaped(StringRef Str) {
+raw_ostream &raw_ostream::write_escaped(StringRef Str,
+ bool UseHexEscapes) {
for (unsigned i = 0, e = Str.size(); i != e; ++i) {
unsigned char c = Str[i];
@@ -187,11 +195,18 @@ raw_ostream &raw_ostream::write_escaped(StringRef Str) {
break;
}
- // Always expand to a 3-character octal escape.
- *this << '\\';
- *this << char('0' + ((c >> 6) & 7));
- *this << char('0' + ((c >> 3) & 7));
- *this << char('0' + ((c >> 0) & 7));
+ // Write out the escaped representation.
+ if (UseHexEscapes) {
+ *this << '\\' << 'x';
+ *this << hexdigit((c >> 4 & 0xF));
+ *this << hexdigit((c >> 0) & 0xF);
+ } else {
+ // Always use a full 3-character octal escape.
+ *this << '\\';
+ *this << char('0' + ((c >> 6) & 7));
+ *this << char('0' + ((c >> 3) & 7));
+ *this << char('0' + ((c >> 0) & 7));
+ }
}
}
@@ -363,7 +378,9 @@ void format_object_base::home() {
/// stream should be immediately destroyed; the string will be empty
/// if no error occurred.
raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
- unsigned Flags) : Error(false), pos(0) {
+ unsigned Flags)
+ : Error(false), UseAtomicWrites(false), pos(0)
+{
assert(Filename != 0 && "Filename is null");
// Verify that we don't have both "append" and "excl".
assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
@@ -410,6 +427,26 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
ShouldClose = true;
}
+/// raw_fd_ostream ctor - FD is the file descriptor that this writes to. If
+/// ShouldClose is true, this closes the file when the stream is destroyed.
+raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
+ : raw_ostream(unbuffered), FD(fd),
+ ShouldClose(shouldClose), Error(false), UseAtomicWrites(false) {
+#ifdef O_BINARY
+ // Setting STDOUT and STDERR to binary mode is necessary in Win32
+ // to avoid undesirable linefeed conversion.
+ if (fd == STDOUT_FILENO || fd == STDERR_FILENO)
+ setmode(fd, O_BINARY);
+#endif
+
+ // Get the starting position.
+ off_t loc = ::lseek(FD, 0, SEEK_CUR);
+ if (loc == (off_t)-1)
+ pos = 0;
+ else
+ pos = static_cast<uint64_t>(loc);
+}
+
raw_fd_ostream::~raw_fd_ostream() {
if (FD >= 0) {
flush();
@@ -435,7 +472,20 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
pos += Size;
do {
- ssize_t ret = ::write(FD, Ptr, Size);
+ ssize_t ret;
+
+ // Check whether we should attempt to use atomic writes.
+ if (BUILTIN_EXPECT(!UseAtomicWrites, true)) {
+ ret = ::write(FD, Ptr, Size);
+ } else {
+ // Use ::writev() where available.
+#if defined(HAVE_WRITEV)
+ struct iovec IOV = { (void*) Ptr, Size };
+ ret = ::writev(FD, &IOV, 1);
+#else
+ ret = ::write(FD, Ptr, Size);
+#endif
+ }
if (ret < 0) {
// If it's a recoverable error, swallow it and retry the write.
@@ -665,34 +715,3 @@ void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
uint64_t raw_null_ostream::current_pos() const {
return 0;
}
-
-//===----------------------------------------------------------------------===//
-// tool_output_file
-//===----------------------------------------------------------------------===//
-
-tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename)
- : Filename(filename), Keep(false) {
- // Arrange for the file to be deleted if the process is killed.
- if (Filename != "-")
- sys::RemoveFileOnSignal(sys::Path(Filename));
-}
-
-tool_output_file::CleanupInstaller::~CleanupInstaller() {
- // Delete the file if the client hasn't told us not to.
- if (!Keep && Filename != "-")
- sys::Path(Filename).eraseFromDisk();
-
- // Ok, the file is successfully written and closed, or deleted. There's no
- // further need to clean it up on signals.
- if (Filename != "-")
- sys::DontRemoveFileOnSignal(sys::Path(Filename));
-}
-
-tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo,
- unsigned Flags)
- : Installer(filename),
- OS(filename, ErrorInfo, Flags) {
- // If open fails, no cleanup is needed.
- if (!ErrorInfo.empty())
- Installer.Keep = true;
-}
diff --git a/contrib/llvm/lib/Support/regexec.c b/contrib/llvm/lib/Support/regexec.c
index 41fb2ea..0078616 100644
--- a/contrib/llvm/lib/Support/regexec.c
+++ b/contrib/llvm/lib/Support/regexec.c
@@ -54,8 +54,9 @@
#include "regex2.h"
/* macros for manipulating states, small version */
-#define states long
-#define states1 states /* for later use in llvm_regexec() decision */
+/* FIXME: 'states' is assumed as 'long' on small version. */
+#define states1 long /* for later use in llvm_regexec() decision */
+#define states states1
#define CLEAR(v) ((v) = 0)
#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n)))
#define SET1(v, n) ((v) |= (unsigned long)1 << (n))
diff --git a/contrib/llvm/lib/Support/system_error.cpp b/contrib/llvm/lib/Support/system_error.cpp
new file mode 100644
index 0000000..56898de
--- /dev/null
+++ b/contrib/llvm/lib/Support/system_error.cpp
@@ -0,0 +1,130 @@
+//===---------------------- system_error.cpp ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This was lifted from libc++ and modified for C++03.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Errno.h"
+#include <string>
+#include <cstring>
+
+namespace llvm {
+
+// class error_category
+
+error_category::error_category() {
+}
+
+error_category::~error_category() {
+}
+
+error_condition
+error_category::default_error_condition(int ev) const {
+ return error_condition(ev, *this);
+}
+
+bool
+error_category::equivalent(int code, const error_condition& condition) const {
+ return default_error_condition(code) == condition;
+}
+
+bool
+error_category::equivalent(const error_code& code, int condition) const {
+ return *this == code.category() && code.value() == condition;
+}
+
+std::string
+_do_message::message(int ev) const {
+ return std::string(sys::StrError(ev));
+}
+
+class _generic_error_category : public _do_message {
+public:
+ virtual const char* name() const;
+ virtual std::string message(int ev) const;
+};
+
+const char*
+_generic_error_category::name() const {
+ return "generic";
+}
+
+std::string
+_generic_error_category::message(int ev) const {
+#ifdef ELAST
+ if (ev > ELAST)
+ return std::string("unspecified generic_category error");
+#endif // ELAST
+ return _do_message::message(ev);
+}
+
+const error_category&
+generic_category() {
+ static _generic_error_category s;
+ return s;
+}
+
+class _system_error_category : public _do_message {
+public:
+ virtual const char* name() const;
+ virtual std::string message(int ev) const;
+ virtual error_condition default_error_condition(int ev) const;
+};
+
+const char*
+_system_error_category::name() const {
+ return "system";
+}
+
+// std::string _system_error_category::message(int ev) const {
+// Is in Platform/system_error.inc
+
+// error_condition _system_error_category::default_error_condition(int ev) const
+// Is in Platform/system_error.inc
+
+const error_category&
+system_category() {
+ static _system_error_category s;
+ return s;
+}
+
+const error_category&
+posix_category() {
+#ifdef LLVM_ON_WIN32
+ return generic_category();
+#else
+ return system_category();
+#endif
+}
+
+// error_condition
+
+std::string
+error_condition::message() const {
+ return _cat_->message(_val_);
+}
+
+// error_code
+
+std::string
+error_code::message() const {
+ return _cat_->message(_val_);
+}
+
+} // end namespace llvm
+
+// Include the truly platform-specific parts of this class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/system_error.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/system_error.inc"
+#endif
diff --git a/contrib/llvm/lib/System/Alarm.cpp b/contrib/llvm/lib/System/Alarm.cpp
deleted file mode 100644
index 0014ca7..0000000
--- a/contrib/llvm/lib/System/Alarm.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===- Alarm.cpp - Alarm Generation Support ---------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Alarm functionality
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/System/Alarm.h"
-#include "llvm/Config/config.h"
-
-namespace llvm {
-using namespace sys;
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only TRULY operating system
-//=== independent code.
-//===----------------------------------------------------------------------===//
-
-}
-
-// Include the platform-specific parts of this class.
-#ifdef LLVM_ON_UNIX
-#include "Unix/Alarm.inc"
-#endif
-#ifdef LLVM_ON_WIN32
-#include "Win32/Alarm.inc"
-#endif
diff --git a/contrib/llvm/lib/System/Unix/Alarm.inc b/contrib/llvm/lib/System/Unix/Alarm.inc
deleted file mode 100644
index fb42b6c..0000000
--- a/contrib/llvm/lib/System/Unix/Alarm.inc
+++ /dev/null
@@ -1,72 +0,0 @@
-//===-- Alarm.inc - Implement Unix Alarm Support ----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the UNIX Alarm support.
-//
-//===----------------------------------------------------------------------===//
-
-#include <signal.h>
-#include <unistd.h>
-#include <cassert>
-using namespace llvm;
-
-/// AlarmCancelled - This flag is set by the SIGINT signal handler if the
-/// user presses CTRL-C.
-static volatile bool AlarmCancelled = false;
-
-/// AlarmTriggered - This flag is set by the SIGALRM signal handler if the
-/// alarm was triggered.
-static volatile bool AlarmTriggered = false;
-
-/// NestedSOI - Sanity check. Alarms cannot be nested or run in parallel.
-/// This ensures that they never do.
-static bool NestedSOI = false;
-
-static RETSIGTYPE SigIntHandler(int Sig) {
- AlarmCancelled = true;
- signal(SIGINT, SigIntHandler);
-}
-
-static RETSIGTYPE SigAlarmHandler(int Sig) {
- AlarmTriggered = true;
-}
-
-static void (*OldSigIntHandler) (int);
-
-void sys::SetupAlarm(unsigned seconds) {
- assert(!NestedSOI && "sys::SetupAlarm calls cannot be nested!");
- NestedSOI = true;
- AlarmCancelled = false;
- AlarmTriggered = false;
- ::signal(SIGALRM, SigAlarmHandler);
- OldSigIntHandler = ::signal(SIGINT, SigIntHandler);
- ::alarm(seconds);
-}
-
-void sys::TerminateAlarm() {
- assert(NestedSOI && "sys::TerminateAlarm called without sys::SetupAlarm!");
- ::alarm(0);
- ::signal(SIGALRM, SIG_DFL);
- ::signal(SIGINT, OldSigIntHandler);
- AlarmCancelled = false;
- AlarmTriggered = false;
- NestedSOI = false;
-}
-
-int sys::AlarmStatus() {
- if (AlarmCancelled)
- return -1;
- if (AlarmTriggered)
- return 1;
- return 0;
-}
-
-void sys::Sleep(unsigned n) {
- ::sleep(n);
-}
diff --git a/contrib/llvm/lib/System/Win32/Alarm.inc b/contrib/llvm/lib/System/Win32/Alarm.inc
deleted file mode 100644
index e0d00a0..0000000
--- a/contrib/llvm/lib/System/Win32/Alarm.inc
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- Alarm.inc - Implement Win32 Alarm Support ---------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Win32 Alarm support.
-//
-//===----------------------------------------------------------------------===//
-
-#include <cassert>
-using namespace llvm;
-
-/// NestedSOI - Sanity check. Alarms cannot be nested or run in parallel.
-/// This ensures that they never do.
-static bool NestedSOI = false;
-
-void sys::SetupAlarm(unsigned seconds) {
- assert(!NestedSOI && "sys::SetupAlarm calls cannot be nested!");
- NestedSOI = true;
- // FIXME: Implement for Win32
-}
-
-void sys::TerminateAlarm() {
- assert(NestedSOI && "sys::TerminateAlarm called without sys::SetupAlarm!");
- // FIXME: Implement for Win32
- NestedSOI = false;
-}
-
-int sys::AlarmStatus() {
- // FIXME: Implement for Win32
- return 0;
-}
-
-// Don't pull in all of the Windows headers.
-extern "C" void __stdcall Sleep(unsigned long);
-
-void sys::Sleep(unsigned n) {
- ::Sleep(n*1000);
-}
diff --git a/contrib/llvm/lib/System/Win32/Win32.h b/contrib/llvm/lib/System/Win32/Win32.h
deleted file mode 100644
index 8f505b1..0000000
--- a/contrib/llvm/lib/System/Win32/Win32.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//===- Win32/Win32.h - Common Win32 Include File ----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines things specific to Win32 implementations.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic Win32 code that
-//=== is guaranteed to work on *all* Win32 variants.
-//===----------------------------------------------------------------------===//
-
-// Require at least Windows 2000 API.
-#define _WIN32_WINNT 0x0500
-
-#include "llvm/Config/config.h" // Get autoconf configuration settings
-#include "windows.h"
-#include <cassert>
-#include <string>
-
-inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
- if (!ErrMsg)
- return true;
- char *buffer = NULL;
- FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
- NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
- *ErrMsg = prefix + buffer;
- LocalFree(buffer);
- return true;
-}
-
-class AutoHandle {
- HANDLE handle;
-
-public:
- AutoHandle(HANDLE h) : handle(h) {}
-
- ~AutoHandle() {
- if (handle)
- CloseHandle(handle);
- }
-
- operator HANDLE() {
- return handle;
- }
-
- AutoHandle &operator=(HANDLE h) {
- handle = h;
- return *this;
- }
-};
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
index 271ca44..4679f74 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -15,6 +15,7 @@
#ifndef TARGET_ARM_H
#define TARGET_ARM_H
+#include "ARMBaseInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
#include <cassert>
@@ -25,97 +26,17 @@ class ARMBaseTargetMachine;
class FunctionPass;
class JITCodeEmitter;
class formatted_raw_ostream;
+class MCCodeEmitter;
+class TargetAsmBackend;
+class MachineInstr;
+class ARMAsmPrinter;
+class MCInst;
-// Enums corresponding to ARM condition codes
-namespace ARMCC {
- // The CondCodes constants map directly to the 4-bit encoding of the
- // condition field for predicated instructions.
- enum CondCodes { // Meaning (integer) Meaning (floating-point)
- EQ, // Equal Equal
- NE, // Not equal Not equal, or unordered
- HS, // Carry set >, ==, or unordered
- LO, // Carry clear Less than
- MI, // Minus, negative Less than
- PL, // Plus, positive or zero >, ==, or unordered
- VS, // Overflow Unordered
- VC, // No overflow Not unordered
- HI, // Unsigned higher Greater than, or unordered
- LS, // Unsigned lower or same Less than or equal
- GE, // Greater than or equal Greater than or equal
- LT, // Less than Less than, or unordered
- GT, // Greater than Greater than
- LE, // Less than or equal <, ==, or unordered
- AL // Always (unconditional) Always (unconditional)
- };
+MCCodeEmitter *createARMMCCodeEmitter(const Target &,
+ TargetMachine &TM,
+ MCContext &Ctx);
- inline static CondCodes getOppositeCondition(CondCodes CC) {
- switch (CC) {
- default: llvm_unreachable("Unknown condition code");
- case EQ: return NE;
- case NE: return EQ;
- case HS: return LO;
- case LO: return HS;
- case MI: return PL;
- case PL: return MI;
- case VS: return VC;
- case VC: return VS;
- case HI: return LS;
- case LS: return HI;
- case GE: return LT;
- case LT: return GE;
- case GT: return LE;
- case LE: return GT;
- }
- }
-} // namespace ARMCC
-
-inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
- switch (CC) {
- default: llvm_unreachable("Unknown condition code");
- case ARMCC::EQ: return "eq";
- case ARMCC::NE: return "ne";
- case ARMCC::HS: return "hs";
- case ARMCC::LO: return "lo";
- case ARMCC::MI: return "mi";
- case ARMCC::PL: return "pl";
- case ARMCC::VS: return "vs";
- case ARMCC::VC: return "vc";
- case ARMCC::HI: return "hi";
- case ARMCC::LS: return "ls";
- case ARMCC::GE: return "ge";
- case ARMCC::LT: return "lt";
- case ARMCC::GT: return "gt";
- case ARMCC::LE: return "le";
- case ARMCC::AL: return "al";
- }
-}
-
-namespace ARM_MB {
- // The Memory Barrier Option constants map directly to the 4-bit encoding of
- // the option field for memory barrier operations.
- enum MemBOpt {
- ST = 14,
- ISH = 11,
- ISHST = 10,
- NSH = 7,
- NSHST = 6,
- OSH = 3,
- OSHST = 2
- };
-
- inline static const char *MemBOptToString(unsigned val) {
- switch (val) {
- default: llvm_unreachable("Unknown memory opetion");
- case ST: return "st";
- case ISH: return "ish";
- case ISHST: return "ishst";
- case NSH: return "nsh";
- case NSHST: return "nshst";
- case OSH: return "osh";
- case OSHST: return "oshst";
- }
- }
-} // namespace ARM_MB
+TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &);
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -127,23 +48,16 @@ FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
-FunctionPass *createNEONPreAllocPass();
FunctionPass *createNEONMoveFixPass();
+FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createThumb2SizeReductionPass();
extern Target TheARMTarget, TheThumbTarget;
-} // end namespace llvm;
-
-// Defines symbolic names for ARM registers. This defines a mapping from
-// register name to register number.
-//
-#include "ARMGenRegisterNames.inc"
-
-// Defines symbolic names for the ARM instructions.
-//
-#include "ARMGenInstrNames.inc"
+void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ ARMAsmPrinter &AP);
+} // end namespace llvm;
#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index d6a8f19..bf4315f 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -16,6 +16,7 @@
include "llvm/Target/Target.td"
+
//===----------------------------------------------------------------------===//
// ARM Subtarget features.
//
@@ -32,6 +33,8 @@ def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
"Does not support ARM mode execution">;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
+def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
+ "Restrict VFP3 to 16 double registers">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
@@ -43,14 +46,11 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
"Floating point unit supports single precision only">;
-// Some processors have multiply-accumulate instructions that don't
-// play nicely with other VFP instructions, and it's generally better
+// Some processors have FP multiply-accumulate instructions that don't
+// play nicely with other VFP / NEON instructions, and it's generally better
// to just not use them.
-// FIXME: Currently, this is only flagged for Cortex-A8. It may be true for
-// others as well. We should do more benchmarking and confirm one way or
-// the other.
-def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true",
- "Disable VFP MAC instructions">;
+def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
+ "Disable VFP / NEON MAC instructions">;
// Some processors benefit from using NEON instructions for scalar
// single-precision FP operations.
def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
@@ -61,6 +61,9 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
"Prefer 32-bit Thumb instrs">;
+// Multiprocessing extension.
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+ "Supports Multiprocessing extension">;
// ARM architectures.
def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
@@ -91,6 +94,18 @@ def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
include "ARMSchedule.td"
+// ARM processor families.
+def ProcOthers : SubtargetFeature<"others", "ARMProcFamily", "Others",
+ "One of the other ARM processor families">;
+def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
+ "Cortex-A8 ARM processors",
+ [FeatureSlowFPBrcc, FeatureNEONForFP,
+ FeatureHasSlowFPVMLx, FeatureT2XtPk]>;
+def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
+ "Cortex-A9 ARM processors",
+ [FeatureHasSlowFPVMLx, FeatureT2XtPk,
+ FeatureFP16]>;
+
class ProcNoItin<string Name, list<SubtargetFeature> Features>
: Processor<Name, GenericItineraries, Features>;
@@ -135,25 +150,27 @@ def : ProcNoItin<"iwmmxt", [ArchV5TE]>;
// V6 Processors.
def : Processor<"arm1136j-s", ARMV6Itineraries, [ArchV6]>;
def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2,
- FeatureHasSlowVMLx]>;
+ FeatureHasSlowFPVMLx]>;
def : Processor<"arm1176jz-s", ARMV6Itineraries, [ArchV6]>;
-def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>;
-def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
// V6M Processors.
def : Processor<"cortex-m0", ARMV6Itineraries, [ArchV6M]>;
// V6T2 Processors.
def : Processor<"arm1156t2-s", ARMV6Itineraries, [ArchV6T2]>;
-def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
- [ArchV7A, FeatureHasSlowVMLx,
- FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>;
+ [ArchV7A, ProcA8]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
- [ArchV7A, FeatureT2XtPk]>;
+ [ArchV7A, ProcA9]>;
// V7M Processors.
def : ProcNoItin<"cortex-m3", [ArchV7M]>;
@@ -175,6 +192,17 @@ include "ARMInstrInfo.td"
def ARMInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// ARM Uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def ARMAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
//===----------------------------------------------------------------------===//
@@ -182,4 +210,6 @@ def ARMInstrInfo : InstrInfo;
def ARM : Target {
// Pull in Instruction Info:
let InstructionSet = ARMInstrInfo;
+
+ let AssemblyWriters = [ARMAsmWriter];
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h
index db48100..19fbf05 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h
+++ b/contrib/llvm/lib/Target/ARM/ARMAddressingModes.h
@@ -50,6 +50,16 @@ namespace ARM_AM {
}
}
+ static inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
+ switch (Op) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return 2;
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::ror: return 3;
+ }
+ }
+
static inline ShiftOpc getShiftOpcForNode(SDValue N) {
switch (N.getOpcode()) {
default: return ARM_AM::no_shift;
@@ -566,6 +576,8 @@ namespace ARM_AM {
return Val;
}
+ AMSubMode getLoadStoreMultipleSubMode(int Opcode);
+
} // end namespace ARM_AM
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp
new file mode 100644
index 0000000..ec23449
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmBackend.cpp
@@ -0,0 +1,512 @@
+//===-- ARMAsmBackend.cpp - ARM Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+ ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/true) {}
+};
+
+class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ ARMELFObjectWriter(Triple::OSType OSType)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM,
+ /*HasRelocationAddend*/ false) {}
+};
+
+class ARMAsmBackend : public TargetAsmBackend {
+ bool isThumbMode; // Currently emitting Thumb code.
+public:
+ ARMAsmBackend(const Target &T) : TargetAsmBackend(), isThumbMode(false) {}
+
+ unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// ARMFixupKinds.h.
+//
+// Name Offset (bits) Size (bits) Flags
+{ "fixup_arm_ldst_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_pcrel_10", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_adr_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_blx", 7, 21, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cp", 1, 8, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bcc", 1, 8, MCFixupKindInfo::FKF_IsPCRel },
+// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
+{ "fixup_arm_movt_hi16", 0, 20, 0 },
+{ "fixup_arm_movw_lo16", 0, 20, 0 },
+{ "fixup_t2_movt_hi16", 0, 20, 0 },
+{ "fixup_t2_movw_lo16", 0, 20, 0 },
+{ "fixup_arm_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel },
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return TargetAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ bool MayNeedRelaxation(const MCInst &Inst) const;
+
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+ void HandleAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ default: break;
+ case MCAF_Code16:
+ setIsThumb(true);
+ break;
+ case MCAF_Code32:
+ setIsThumb(false);
+ break;
+ }
+ }
+
+ unsigned getPointerSize() const { return 4; }
+ bool isThumb() const { return isThumbMode; }
+ void setIsThumb(bool it) { isThumbMode = it; }
+};
+} // end anonymous namespace
+
+bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ // FIXME: Thumb targets, different move constant targets..
+ return false;
+}
+
+void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
+ return;
+}
+
+bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ if (isThumb()) {
+ // FIXME: 0xbf00 is the ARMv7 value. For v6 and before, we'll need to
+ // use 0x46c0 (which is a 'mov r8, r8' insn).
+ uint64_t NumNops = Count / 2;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write16(0xbf00);
+ if (Count & 1)
+ OW->Write8(0);
+ return true;
+ }
+ // ARM mode
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write32(0xe1a00000);
+ switch (Count % 4) {
+ default: break; // No leftover bytes to write
+ case 1: OW->Write8(0); break;
+ case 2: OW->Write16(0); break;
+ case 3: OW->Write16(0); OW->Write8(0xa0); break;
+ }
+
+ return true;
+}
+
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ return Value;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ Value >>= 16;
+ // Fallthrough
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movw_lo16_pcrel: {
+ unsigned Hi4 = (Value & 0xF000) >> 12;
+ unsigned Lo12 = Value & 0x0FFF;
+ // inst{19-16} = Hi4;
+ // inst{11-0} = Lo12;
+ Value = (Hi4 << 16) | (Lo12);
+ return Value;
+ }
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ Value >>= 16;
+ // Fallthrough
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movw_lo16_pcrel: {
+ unsigned Hi4 = (Value & 0xF000) >> 12;
+ unsigned i = (Value & 0x800) >> 11;
+ unsigned Mid3 = (Value & 0x700) >> 8;
+ unsigned Lo8 = Value & 0x0FF;
+ // inst{19-16} = Hi4;
+ // inst{26} = i;
+ // inst{14-12} = Mid3;
+ // inst{7-0} = Lo8;
+ Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
+
+ uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_arm_ldst_pcrel_12:
+ // ARM PC-relative values are offset by 8.
+ Value -= 4;
+ // FALLTHROUGH
+ case ARM::fixup_t2_ldst_pcrel_12: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value -= 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ assert ((Value < 4096) && "Out of range pc-relative fixup value!");
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_10,
+ // but with 16-bit halfwords swapped.
+ if (Kind == ARM::fixup_t2_ldst_pcrel_12) {
+ uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ return Value;
+ }
+ case ARM::fixup_thumb_adr_pcrel_10:
+ return ((Value - 4) >> 2) & 0xff;
+ case ARM::fixup_arm_adr_pcrel_12: {
+ // ARM PC-relative values are offset by 8.
+ Value -= 8;
+ unsigned opc = 4; // bits {24-21}. Default to add: 0b0100
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ opc = 2; // 0b0010
+ }
+ assert(ARM_AM::getSOImmVal(Value) != -1 &&
+ "Out of range pc-relative fixup value!");
+ // Encode the immediate and shift the opcode into place.
+ return ARM_AM::getSOImmVal(Value) | (opc << 21);
+ }
+
+ case ARM::fixup_t2_adr_pcrel_12: {
+ Value -= 4;
+ unsigned opc = 0;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ opc = 5;
+ }
+
+ uint32_t out = (opc << 21);
+ out |= (Value & 0x800) << 14;
+ out |= (Value & 0x700) << 4;
+ out |= (Value & 0x0FF);
+
+ uint64_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ // These values don't encode the low two bits since they're always zero.
+ // Offset by 8 just as above.
+ return 0xffffff & ((Value - 8) >> 2);
+ case ARM::fixup_t2_uncondbranch: {
+ Value = Value - 4;
+ Value >>= 1; // Low bit is not encoded.
+
+ uint32_t out = 0;
+ bool I = Value & 0x800000;
+ bool J1 = Value & 0x400000;
+ bool J2 = Value & 0x200000;
+ J1 ^= I;
+ J2 ^= I;
+
+ out |= I << 26; // S bit
+ out |= !J1 << 13; // J1 bit
+ out |= !J2 << 11; // J2 bit
+ out |= (Value & 0x1FF800) << 5; // imm6 field
+ out |= (Value & 0x0007FF); // imm11 field
+
+ uint64_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_t2_condbranch: {
+ Value = Value - 4;
+ Value >>= 1; // Low bit is not encoded.
+
+ uint64_t out = 0;
+ out |= (Value & 0x80000) << 7; // S bit
+ out |= (Value & 0x40000) >> 7; // J2 bit
+ out |= (Value & 0x20000) >> 4; // J1 bit
+ out |= (Value & 0x1F800) << 5; // imm6 field
+ out |= (Value & 0x007FF); // imm11 field
+
+ uint32_t swapped = (out & 0xFFFF0000) >> 16;
+ swapped |= (out & 0x0000FFFF) << 16;
+ return swapped;
+ }
+ case ARM::fixup_arm_thumb_bl: {
+ // The value doesn't encode the low bit (always zero) and is offset by
+ // four. The value is encoded into disjoint bit positions in the destination
+ // opcode. x = unchanged, I = immediate value bit, S = sign extension bit
+ //
+ // BL: xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ unsigned isNeg = (int64_t(Value) < 0) ? 1 : 0;
+ uint32_t Binary = 0;
+ Value = 0x3fffff & ((Value - 4) >> 1);
+ Binary = (Value & 0x7ff) << 16; // Low imm11 value.
+ Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value.
+ Binary |= isNeg << 10; // Sign bit.
+ return Binary;
+ }
+ case ARM::fixup_arm_thumb_blx: {
+ // The value doesn't encode the low two bits (always zero) and is offset by
+ // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit
+ // positions in the destination opcode. x = unchanged, I = immediate value
+ // bit, S = sign extension bit, 0 = zero.
+ //
+ // BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0
+ //
+ // Note that the halfwords are stored high first, low second; so we need
+ // to transpose the fixup value here to map properly.
+ unsigned isNeg = (int64_t(Value) < 0) ? 1 : 0;
+ uint32_t Binary = 0;
+ Value = 0xfffff & ((Value - 2) >> 2);
+ Binary = (Value & 0x3ff) << 17; // Low imm10L value.
+ Binary |= (Value & 0xffc00) >> 10; // High imm10H value.
+ Binary |= isNeg << 10; // Sign bit.
+ return Binary;
+ }
+ case ARM::fixup_arm_thumb_cp:
+ // Offset by 4, and don't encode the low two bits. Two bytes of that
+ // 'off by 4' is implicitly handled by the half-word ordering of the
+ // Thumb encoding, so we only need to adjust by 2 here.
+ return ((Value - 2) >> 2) & 0xff;
+ case ARM::fixup_arm_thumb_cb: {
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ uint32_t Binary = (Value - 4) >> 1;
+ return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
+ }
+ case ARM::fixup_arm_thumb_br:
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ return ((Value - 4) >> 1) & 0x7ff;
+ case ARM::fixup_arm_thumb_bcc:
+ // Offset by 4 and don't encode the lower bit, which is always 0.
+ return ((Value - 4) >> 1) & 0xff;
+ case ARM::fixup_arm_pcrel_10:
+ Value = Value - 4; // ARM fixups offset by an additional word and don't
+ // need to adjust for the half-word ordering.
+ // Fall through.
+ case ARM::fixup_t2_pcrel_10: {
+ // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+ Value = Value - 4;
+ bool isAdd = true;
+ if ((int64_t)Value < 0) {
+ Value = -Value;
+ isAdd = false;
+ }
+ // These values don't encode the low two bits since they're always zero.
+ Value >>= 2;
+ assert ((Value < 256) && "Out of range pc-relative fixup value!");
+ Value |= isAdd << 23;
+
+ // Same addressing mode as fixup_arm_pcrel_10,
+ // but with 16-bit halfwords swapped.
+ if (Kind == ARM::fixup_t2_pcrel_10) {
+ uint32_t swapped = (Value & 0xFFFF0000) >> 16;
+ swapped |= (Value & 0x0000FFFF) << 16;
+ return swapped;
+ }
+
+ return Value;
+ }
+ }
+}
+
+namespace {
+
+// FIXME: This should be in a separate file.
+// ELF is an ELF of course...
+class ELFARMAsmBackend : public ARMAsmBackend {
+public:
+ Triple::OSType OSType;
+ ELFARMAsmBackend(const Target &T, Triple::OSType _OSType)
+ : ARMAsmBackend(T), OSType(_OSType) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS,
+ /*IsLittleEndian*/ true);
+ }
+};
+
+// FIXME: Raise this to share code between Darwin and ELF.
+void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned NumBytes = 4; // FIXME: 2 for Thumb
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset % NumBytes == 0 && "Offset mod NumBytes is nonzero!");
+
+ // For each byte of the fragment that the fixup touches, mask in the bits from
+ // the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+// FIXME: This should be in a separate file.
+class DarwinARMAsmBackend : public ARMAsmBackend {
+public:
+ DarwinARMAsmBackend(const Target &T) : ARMAsmBackend(T) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ // FIXME: Subtarget info should be derived. Force v7 for now.
+ return createMachObjectWriter(new ARMMachObjectWriter(
+ /*Is64Bit=*/false,
+ object::mach::CTM_ARM,
+ object::mach::CSARM_V7),
+ OS,
+ /*IsLittleEndian=*/true);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+};
+
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+
+ case FK_Data_1:
+ case ARM::fixup_arm_thumb_bcc:
+ case ARM::fixup_arm_thumb_cp:
+ case ARM::fixup_thumb_adr_pcrel_10:
+ return 1;
+
+ case FK_Data_2:
+ case ARM::fixup_arm_thumb_br:
+ case ARM::fixup_arm_thumb_cb:
+ return 2;
+
+ case ARM::fixup_arm_ldst_pcrel_12:
+ case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_condbranch:
+ case ARM::fixup_arm_uncondbranch:
+ return 3;
+
+ case FK_Data_4:
+ case ARM::fixup_t2_ldst_pcrel_12:
+ case ARM::fixup_t2_condbranch:
+ case ARM::fixup_t2_uncondbranch:
+ case ARM::fixup_t2_pcrel_10:
+ case ARM::fixup_t2_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movw_lo16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_arm_movw_lo16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ case ARM::fixup_t2_movw_lo16_pcrel:
+ return 4;
+ }
+}
+
+void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the
+ // bits from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createARMAsmBackend(const Target &T,
+ const std::string &TT) {
+ switch (Triple(TT).getOS()) {
+ case Triple::Darwin:
+ return new DarwinARMAsmBackend(T);
+ case Triple::MinGW32:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ assert(0 && "Windows not supported on ARM");
+ default:
+ return new ELFARMAsmBackend(T, Triple(TT).getOS());
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 6cfd596..db12b8e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -14,28 +14,31 @@
#define DEBUG_TYPE "asm-printer"
#include "ARM.h"
-#include "ARMBuildAttrs.h"
+#include "ARMAsmPrinter.h"
#include "ARMAddressingModes.h"
+#include "ARMBuildAttrs.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
-#include "AsmPrinter/ARMInstPrinter.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMMCInstLower.h"
+#include "ARMMCExpr.h"
#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "InstPrinter/ARMInstPrinter.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
@@ -53,270 +56,127 @@
#include <cctype>
using namespace llvm;
-static cl::opt<bool>
-EnableMCInst("enable-arm-mcinst-printer", cl::Hidden,
- cl::desc("enable experimental asmprinter gunk in the arm backend"));
-
-namespace llvm {
- namespace ARM {
- enum DW_ISA {
- DW_ISA_ARM_thumb = 1,
- DW_ISA_ARM_arm = 2
- };
- }
-}
-
namespace {
- class ARMAsmPrinter : public AsmPrinter {
-
- /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
- /// make the right decision when printing asm code for different targets.
- const ARMSubtarget *Subtarget;
- /// AFI - Keep a pointer to ARMFunctionInfo for the current
- /// MachineFunction.
- ARMFunctionInfo *AFI;
+ // Per section and per symbol attributes are not supported.
+ // To implement them we would need the ability to delay this emission
+ // until the assembly file is fully parsed/generated as only then do we
+ // know the symbol and section numbers.
+ class AttributeEmitter {
+ public:
+ virtual void MaybeSwitchVendor(StringRef Vendor) = 0;
+ virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0;
+ virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0;
+ virtual void Finish() = 0;
+ virtual ~AttributeEmitter() {}
+ };
- /// MCP - Keep a pointer to constantpool entries of the current
- /// MachineFunction.
- const MachineConstantPool *MCP;
+ class AsmAttributeEmitter : public AttributeEmitter {
+ MCStreamer &Streamer;
public:
- explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) {
- Subtarget = &TM.getSubtarget<ARMSubtarget>();
- }
+ AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {}
+ void MaybeSwitchVendor(StringRef Vendor) { }
- virtual const char *getPassName() const {
- return "ARM Assembly Printer";
+ void EmitAttribute(unsigned Attribute, unsigned Value) {
+ Streamer.EmitRawText("\t.eabi_attribute " +
+ Twine(Attribute) + ", " + Twine(Value));
}
- void printInstructionThroughMCStreamer(const MachineInstr *MI);
-
-
- void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
- const char *Modifier = 0);
- void printSOImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
- void printSOImm2PartOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printSORegOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printAddrMode2Operand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printAddrMode3Operand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printAddrMode4Operand(const MachineInstr *MI, int OpNum,raw_ostream &O,
- const char *Modifier = 0);
- void printAddrMode5Operand(const MachineInstr *MI, int OpNum,raw_ostream &O,
- const char *Modifier = 0);
- void printAddrMode6Operand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printAddrMode6OffsetOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printAddrModePCOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O,
- const char *Modifier = 0);
- void printBitfieldInvMaskImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printMemBOption(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printShiftImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
-
- void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printThumbITMask(const MachineInstr *MI, int OpNum, raw_ostream &O);
- void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNum,
- raw_ostream &O,
- unsigned Scale);
- void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
-
- void printT2SOOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
- void printT2AddrModeImm12Operand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printT2AddrModeImm8Operand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printT2AddrModeImm8s4Operand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printT2AddrModeImm8s4OffsetOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {}
- void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
-
- void printCPSOptionOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {}
- void printMSRMaskOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {}
- void printNegZeroOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {}
- void printPredicateOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printMandatoryPredicateOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printSBitModifierOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printPCLabel(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printRegisterList(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printCPInstOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O,
- const char *Modifier);
- void printJTBlockOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printJT2BlockOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printTBAddrMode(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printNoHashImmediate(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printVFPf32ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
- void printNEONModImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O);
-
- virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
- virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O);
-
- void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen
- static const char *getRegisterName(unsigned RegNo);
-
- virtual void EmitInstruction(const MachineInstr *MI);
- bool runOnMachineFunction(MachineFunction &F);
-
- virtual void EmitConstantPool() {} // we emit constant pools customly!
- virtual void EmitFunctionEntryLabel();
- void EmitStartOfAsmFile(Module &M);
- void EmitEndOfAsmFile(Module &M);
-
- MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
- MachineLocation Location;
- assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
- // Frame address. Currently handles register +- offset only.
- if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
- Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
- else {
- DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ void EmitTextAttribute(unsigned Attribute, StringRef String) {
+ switch (Attribute) {
+ case ARMBuildAttrs::CPU_name:
+ Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
+ break;
+ default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
}
- return Location;
+ }
+ void Finish() { }
+ };
+
+ class ObjectAttributeEmitter : public AttributeEmitter {
+ MCObjectStreamer &Streamer;
+ StringRef CurrentVendor;
+ SmallString<64> Contents;
+
+ public:
+ ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
+ Streamer(Streamer_), CurrentVendor("") { }
+
+ void MaybeSwitchVendor(StringRef Vendor) {
+ assert(!Vendor.empty() && "Vendor cannot be empty.");
+
+ if (CurrentVendor.empty())
+ CurrentVendor = Vendor;
+ else if (CurrentVendor == Vendor)
+ return;
+ else
+ Finish();
+
+ CurrentVendor = Vendor;
+
+ assert(Contents.size() == 0);
}
- virtual unsigned getISAEncoding() {
- // ARM/Darwin adds ISA to the DWARF info for each function.
- if (!Subtarget->isTargetDarwin())
- return 0;
- return Subtarget->isThumb() ?
- llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+ void EmitAttribute(unsigned Attribute, unsigned Value) {
+ // FIXME: should be ULEB
+ Contents += Attribute;
+ Contents += Value;
}
- MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
- const MachineBasicBlock *MBB) const;
- MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
-
- /// EmitMachineConstantPoolValue - Print a machine constantpool value to
- /// the .s file.
- virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- EmitMachineConstantPoolValue(MCPV, OS);
- OutStreamer.EmitRawText(OS.str());
+ void EmitTextAttribute(unsigned Attribute, StringRef String) {
+ Contents += Attribute;
+ Contents += UppercaseString(String);
+ Contents += 0;
}
- void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV,
- raw_ostream &O) {
- switch (TM.getTargetData()->getTypeAllocSize(MCPV->getType())) {
- case 1: O << MAI->getData8bitsDirective(0); break;
- case 2: O << MAI->getData16bitsDirective(0); break;
- case 4: O << MAI->getData32bitsDirective(0); break;
- default: assert(0 && "Unknown CPV size");
- }
+ void Finish() {
+ const size_t ContentsSize = Contents.size();
- ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
-
- if (ACPV->isLSDA()) {
- O << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
- } else if (ACPV->isBlockAddress()) {
- O << *GetBlockAddressSymbol(ACPV->getBlockAddress());
- } else if (ACPV->isGlobalValue()) {
- const GlobalValue *GV = ACPV->getGV();
- bool isIndirect = Subtarget->isTargetDarwin() &&
- Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
- if (!isIndirect)
- O << *Mang->getSymbol(GV);
- else {
- // FIXME: Remove this when Darwin transition to @GOT like syntax.
- MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- O << *Sym;
-
- MachineModuleInfoMachO &MMIMachO =
- MMI->getObjFileInfo<MachineModuleInfoMachO>();
- MachineModuleInfoImpl::StubValueTy &StubSym =
- GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(Sym) :
- MMIMachO.getGVStubEntry(Sym);
- if (StubSym.getPointer() == 0)
- StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
- }
- } else {
- assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
- O << *GetExternalSymbolSymbol(ACPV->getSymbol());
- }
+ // Vendor size + Vendor name + '\0'
+ const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
- if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
- if (ACPV->getPCAdjustment() != 0) {
- O << "-(" << MAI->getPrivateGlobalPrefix() << "PC"
- << getFunctionNumber() << "_" << ACPV->getLabelId()
- << "+" << (unsigned)ACPV->getPCAdjustment();
- if (ACPV->mustAddCurrentAddress())
- O << "-.";
- O << ')';
- }
+ // Tag + Tag Size
+ const size_t TagHeaderSize = 1 + 4;
+
+ Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
+ Streamer.EmitBytes(CurrentVendor, 0);
+ Streamer.EmitIntValue(0, 1); // '\0'
+
+ Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
+ Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+
+ Streamer.EmitBytes(Contents, 0);
+
+ Contents.clear();
}
};
+
} // end of anonymous namespace
-#include "ARMGenAsmWriter.inc"
+MachineLocation ARMAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+}
void ARMAsmPrinter::EmitFunctionEntryLabel() {
if (AFI->isThumbFunction()) {
- OutStreamer.EmitRawText(StringRef("\t.code\t16"));
- if (!Subtarget->isTargetDarwin())
- OutStreamer.EmitRawText(StringRef("\t.thumb_func"));
- else {
- // This needs to emit to a temporary string to get properly quoted
- // MCSymbols when they have spaces in them.
- SmallString<128> Tmp;
- raw_svector_ostream OS(Tmp);
- OS << "\t.thumb_func\t" << *CurrentFnSym;
- OutStreamer.EmitRawText(OS.str());
- }
+ OutStreamer.EmitAssemblerFlag(MCAF_Code16);
+ OutStreamer.EmitThumbFunc(Subtarget->isTargetDarwin()? CurrentFnSym : 0);
}
OutStreamer.EmitLabel(CurrentFnSym);
}
-/// runOnMachineFunction - This uses the printInstruction()
+/// runOnMachineFunction - This uses the EmitInstruction()
/// method to print assembly for each instruction.
///
bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
@@ -337,32 +197,18 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
case MachineOperand::MO_Register: {
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
- if (Modifier && strcmp(Modifier, "dregpair") == 0) {
- unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0);
- unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1);
- O << '{'
- << getRegisterName(DRegLo) << ", " << getRegisterName(DRegHi)
- << '}';
- } else if (Modifier && strcmp(Modifier, "lane") == 0) {
- unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
- unsigned DReg =
- TM.getRegisterInfo()->getMatchingSuperReg(Reg,
- RegNum & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
- O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']';
- } else {
- assert(!MO.getSubReg() && "Subregs should be eliminated!");
- O << getRegisterName(Reg);
- }
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ O << ARMInstPrinter::getRegisterName(Reg);
break;
}
case MachineOperand::MO_Immediate: {
int64_t Imm = MO.getImm();
O << '#';
if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
- (TF & ARMII::MO_LO16))
+ (TF == ARMII::MO_LO16))
O << ":lower16:";
else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
- (TF & ARMII::MO_HI16))
+ (TF == ARMII::MO_HI16))
O << ":upper16:";
O << Imm;
break;
@@ -371,9 +217,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
O << *MO.getMBB()->getSymbol();
return;
case MachineOperand::MO_GlobalAddress: {
- bool isCallOp = Modifier && !strcmp(Modifier, "call");
const GlobalValue *GV = MO.getGlobal();
-
if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
(TF & ARMII::MO_LO16))
O << ":lower16:";
@@ -383,18 +227,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
O << *Mang->getSymbol(GV);
printOffset(MO.getOffset(), O);
-
- if (isCallOp && Subtarget->isTargetELF() &&
- TM.getRelocationModel() == Reloc::PIC_)
+ if (TF == ARMII::MO_PLT)
O << "(PLT)";
break;
}
case MachineOperand::MO_ExternalSymbol: {
- bool isCallOp = Modifier && !strcmp(Modifier, "call");
O << *GetExternalSymbolSymbol(MO.getSymbolName());
-
- if (isCallOp && Subtarget->isTargetELF() &&
- TM.getRelocationModel() == Reloc::PIC_)
+ if (TF == ARMII::MO_PLT)
O << "(PLT)";
break;
}
@@ -407,538 +246,8 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
}
}
-static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm,
- const MCAsmInfo *MAI) {
- // Break it up into two parts that make up a shifter immediate.
- V = ARM_AM::getSOImmVal(V);
- assert(V != -1 && "Not a valid so_imm value!");
-
- unsigned Imm = ARM_AM::getSOImmValImm(V);
- unsigned Rot = ARM_AM::getSOImmValRot(V);
-
- // Print low-level immediate formation info, per
- // A5.1.3: "Data-processing operands - Immediate".
- if (Rot) {
- O << "#" << Imm << ", " << Rot;
- // Pretty printed version.
- if (VerboseAsm) {
- O << "\t" << MAI->getCommentString() << ' ';
- O << (int)ARM_AM::rotr32(Imm, Rot);
- }
- } else {
- O << "#" << Imm;
- }
-}
-
-/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
-/// immediate in bits 0-7.
-void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(OpNum);
- assert(MO.isImm() && "Not a valid so_imm value!");
- printSOImm(O, MO.getImm(), isVerbose(), MAI);
-}
-
-/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov'
-/// followed by an 'orr' to materialize.
-void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(OpNum);
- assert(MO.isImm() && "Not a valid so_imm value!");
- unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO.getImm());
- unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO.getImm());
- printSOImm(O, V1, isVerbose(), MAI);
- O << "\n\torr";
- printPredicateOperand(MI, 2, O);
- O << "\t";
- printOperand(MI, 0, O);
- O << ", ";
- printOperand(MI, 0, O);
- O << ", ";
- printSOImm(O, V2, isVerbose(), MAI);
-}
-
-// so_reg is a 4-operand unit corresponding to register forms of the A5.1
-// "Addressing Mode 1 - Data-processing operands" forms. This includes:
-// REG 0 0 - e.g. R5
-// REG REG 0,SH_OPC - e.g. R5, ROR R3
-// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
-void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(Op);
- const MachineOperand &MO2 = MI->getOperand(Op+1);
- const MachineOperand &MO3 = MI->getOperand(Op+2);
-
- O << getRegisterName(MO1.getReg());
-
- // Print the shift opc.
- ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
- O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
- if (MO2.getReg()) {
- O << ' ' << getRegisterName(MO2.getReg());
- assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else if (ShOpc != ARM_AM::rrx) {
- O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
- }
-}
-
-void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(Op);
- const MachineOperand &MO2 = MI->getOperand(Op+1);
- const MachineOperand &MO3 = MI->getOperand(Op+2);
-
- if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
- printOperand(MI, Op, O);
- return;
- }
-
- O << "[" << getRegisterName(MO1.getReg());
-
- if (!MO2.getReg()) {
- if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
- O << ", #"
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
- << ARM_AM::getAM2Offset(MO3.getImm());
- O << "]";
- return;
- }
-
- O << ", "
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
- << getRegisterName(MO2.getReg());
-
- if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
- << " #" << ShImm;
- O << "]";
-}
-
-void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(Op);
- const MachineOperand &MO2 = MI->getOperand(Op+1);
-
- if (!MO1.getReg()) {
- unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
- O << "#"
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
- << ImmOffs;
- return;
- }
-
- O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
- << getRegisterName(MO1.getReg());
-
- if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
- << " #" << ShImm;
-}
-
-void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(Op);
- const MachineOperand &MO2 = MI->getOperand(Op+1);
- const MachineOperand &MO3 = MI->getOperand(Op+2);
-
- assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
- O << "[" << getRegisterName(MO1.getReg());
-
- if (MO2.getReg()) {
- O << ", "
- << (char)ARM_AM::getAM3Op(MO3.getImm())
- << getRegisterName(MO2.getReg())
- << "]";
- return;
- }
-
- if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
- O << ", #"
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
- << ImmOffs;
- O << "]";
-}
-
-void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op,
- raw_ostream &O){
- const MachineOperand &MO1 = MI->getOperand(Op);
- const MachineOperand &MO2 = MI->getOperand(Op+1);
-
- if (MO1.getReg()) {
- O << (char)ARM_AM::getAM3Op(MO2.getImm())
- << getRegisterName(MO1.getReg());
- return;
- }
-
- unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
- O << "#"
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
- << ImmOffs;
-}
-
-void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
- raw_ostream &O,
- const char *Modifier) {
- const MachineOperand &MO2 = MI->getOperand(Op+1);
- ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
- if (Modifier && strcmp(Modifier, "submode") == 0) {
- O << ARM_AM::getAMSubModeStr(Mode);
- } else if (Modifier && strcmp(Modifier, "wide") == 0) {
- ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
- if (Mode == ARM_AM::ia)
- O << ".w";
- } else {
- printOperand(MI, Op, O);
- }
-}
-
-void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
- raw_ostream &O,
- const char *Modifier) {
- const MachineOperand &MO1 = MI->getOperand(Op);
- const MachineOperand &MO2 = MI->getOperand(Op+1);
-
- if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
- printOperand(MI, Op, O);
- return;
- }
-
- assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
-
- O << "[" << getRegisterName(MO1.getReg());
-
- if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
- O << ", #"
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
- << ImmOffs*4;
- }
- O << "]";
-}
-
-void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(Op);
- const MachineOperand &MO2 = MI->getOperand(Op+1);
-
- O << "[" << getRegisterName(MO1.getReg());
- if (MO2.getImm()) {
- // FIXME: Both darwin as and GNU as violate ARM docs here.
- O << ", :" << (MO2.getImm() << 3);
- }
- O << "]";
-}
-
-void ARMAsmPrinter::printAddrMode6OffsetOperand(const MachineInstr *MI, int Op,
- raw_ostream &O){
- const MachineOperand &MO = MI->getOperand(Op);
- if (MO.getReg() == 0)
- O << "!";
- else
- O << ", " << getRegisterName(MO.getReg());
-}
-
-void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
- raw_ostream &O,
- const char *Modifier) {
- if (Modifier && strcmp(Modifier, "label") == 0) {
- printPCLabel(MI, Op+1, O);
- return;
- }
-
- const MachineOperand &MO1 = MI->getOperand(Op);
- assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
- O << "[pc, " << getRegisterName(MO1.getReg()) << "]";
-}
-
-void
-ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(Op);
- uint32_t v = ~MO.getImm();
- int32_t lsb = CountTrailingZeros_32(v);
- int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
- assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
- O << "#" << lsb << ", #" << width;
-}
-
-void
-ARMAsmPrinter::printMemBOption(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- unsigned val = MI->getOperand(OpNum).getImm();
- O << ARM_MB::MemBOptToString(val);
-}
-
-void ARMAsmPrinter::printShiftImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- unsigned ShiftOp = MI->getOperand(OpNum).getImm();
- ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
- switch (Opc) {
- case ARM_AM::no_shift:
- return;
- case ARM_AM::lsl:
- O << ", lsl #";
- break;
- case ARM_AM::asr:
- O << ", asr #";
- break;
- default:
- assert(0 && "unexpected shift opcode for shift immediate operand");
- }
- O << ARM_AM::getSORegOffset(ShiftOp);
-}
-
-//===--------------------------------------------------------------------===//
-
-void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op,
- raw_ostream &O) {
- O << "#" << MI->getOperand(Op).getImm() * 4;
-}
-
-void
-ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op,
- raw_ostream &O) {
- // (3 - the number of trailing zeros) is the number of then / else.
- unsigned Mask = MI->getOperand(Op).getImm();
- unsigned CondBit0 = Mask >> 4 & 1;
- unsigned NumTZ = CountTrailingZeros_32(Mask);
- assert(NumTZ <= 3 && "Invalid IT mask!");
- for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
- bool T = ((Mask >> Pos) & 1) == CondBit0;
- if (T)
- O << 't';
- else
- O << 'e';
- }
-}
-
-void
-ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(Op);
- const MachineOperand &MO2 = MI->getOperand(Op+1);
- O << "[" << getRegisterName(MO1.getReg());
- O << ", " << getRegisterName(MO2.getReg()) << "]";
-}
-
-void
-ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
- raw_ostream &O,
- unsigned Scale) {
- const MachineOperand &MO1 = MI->getOperand(Op);
- const MachineOperand &MO2 = MI->getOperand(Op+1);
- const MachineOperand &MO3 = MI->getOperand(Op+2);
-
- if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
- printOperand(MI, Op, O);
- return;
- }
-
- O << "[" << getRegisterName(MO1.getReg());
- if (MO3.getReg())
- O << ", " << getRegisterName(MO3.getReg());
- else if (unsigned ImmOffs = MO2.getImm())
- O << ", #" << ImmOffs * Scale;
- O << "]";
-}
-
-void
-ARMAsmPrinter::printThumbAddrModeS1Operand(const MachineInstr *MI, int Op,
- raw_ostream &O) {
- printThumbAddrModeRI5Operand(MI, Op, O, 1);
-}
-void
-ARMAsmPrinter::printThumbAddrModeS2Operand(const MachineInstr *MI, int Op,
- raw_ostream &O) {
- printThumbAddrModeRI5Operand(MI, Op, O, 2);
-}
-void
-ARMAsmPrinter::printThumbAddrModeS4Operand(const MachineInstr *MI, int Op,
- raw_ostream &O) {
- printThumbAddrModeRI5Operand(MI, Op, O, 4);
-}
-
-void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(Op);
- const MachineOperand &MO2 = MI->getOperand(Op+1);
- O << "[" << getRegisterName(MO1.getReg());
- if (unsigned ImmOffs = MO2.getImm())
- O << ", #" << ImmOffs*4;
- O << "]";
-}
-
-//===--------------------------------------------------------------------===//
-
-// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
-// register with shift forms.
-// REG 0 0 - e.g. R5
-// REG IMM, SH_OPC - e.g. R5, LSL #3
-void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(OpNum);
- const MachineOperand &MO2 = MI->getOperand(OpNum+1);
-
- unsigned Reg = MO1.getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
- O << getRegisterName(Reg);
-
- // Print the shift opc.
- assert(MO2.isImm() && "Not a valid t2_so_reg value!");
- ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
- O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
- if (ShOpc != ARM_AM::rrx)
- O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
-}
-
-void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI,
- int OpNum,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(OpNum);
- const MachineOperand &MO2 = MI->getOperand(OpNum+1);
-
- O << "[" << getRegisterName(MO1.getReg());
-
- unsigned OffImm = MO2.getImm();
- if (OffImm) // Don't print +0.
- O << ", #" << OffImm;
- O << "]";
-}
-
-void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI,
- int OpNum,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(OpNum);
- const MachineOperand &MO2 = MI->getOperand(OpNum+1);
-
- O << "[" << getRegisterName(MO1.getReg());
-
- int32_t OffImm = (int32_t)MO2.getImm();
- // Don't print +0.
- if (OffImm < 0)
- O << ", #-" << -OffImm;
- else if (OffImm > 0)
- O << ", #" << OffImm;
- O << "]";
-}
-
-void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI,
- int OpNum,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(OpNum);
- const MachineOperand &MO2 = MI->getOperand(OpNum+1);
-
- O << "[" << getRegisterName(MO1.getReg());
-
- int32_t OffImm = (int32_t)MO2.getImm() / 4;
- // Don't print +0.
- if (OffImm < 0)
- O << ", #-" << -OffImm * 4;
- else if (OffImm > 0)
- O << ", #" << OffImm * 4;
- O << "]";
-}
-
-void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI,
- int OpNum,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(OpNum);
- int32_t OffImm = (int32_t)MO1.getImm();
- // Don't print +0.
- if (OffImm < 0)
- O << "#-" << -OffImm;
- else if (OffImm > 0)
- O << "#" << OffImm;
-}
-
-void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI,
- int OpNum,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(OpNum);
- const MachineOperand &MO2 = MI->getOperand(OpNum+1);
- const MachineOperand &MO3 = MI->getOperand(OpNum+2);
-
- O << "[" << getRegisterName(MO1.getReg());
-
- assert(MO2.getReg() && "Invalid so_reg load / store address!");
- O << ", " << getRegisterName(MO2.getReg());
-
- unsigned ShAmt = MO3.getImm();
- if (ShAmt) {
- assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
- O << ", lsl #" << ShAmt;
- }
- O << "]";
-}
-
-
//===--------------------------------------------------------------------===//
-void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
- if (CC != ARMCC::AL)
- O << ARMCondCodeToString(CC);
-}
-
-void ARMAsmPrinter::printMandatoryPredicateOperand(const MachineInstr *MI,
- int OpNum,
- raw_ostream &O) {
- ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
- O << ARMCondCodeToString(CC);
-}
-
-void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O){
- unsigned Reg = MI->getOperand(OpNum).getReg();
- if (Reg) {
- assert(Reg == ARM::CPSR && "Expect ARM CPSR register!");
- O << 's';
- }
-}
-
-void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- int Id = (int)MI->getOperand(OpNum).getImm();
- O << MAI->getPrivateGlobalPrefix()
- << "PC" << getFunctionNumber() << "_" << Id;
-}
-
-void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "{";
- for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
- if (MI->getOperand(i).isImplicit())
- continue;
- if ((int)i != OpNum) O << ", ";
- printOperand(MI, i, O);
- }
- O << "}";
-}
-
-void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O, const char *Modifier) {
- assert(Modifier && "This operand only works with a modifier!");
- // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the
- // data itself.
- if (!strcmp(Modifier, "label")) {
- unsigned ID = MI->getOperand(OpNum).getImm();
- OutStreamer.EmitLabel(GetCPISymbol(ID));
- } else {
- assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE");
- unsigned CPI = MI->getOperand(OpNum).getIndex();
-
- const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
-
- if (MCPE.isMachineConstantPoolEntry()) {
- EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
- } else {
- EmitGlobalConstant(MCPE.Val.ConstVal);
- }
- }
-}
-
MCSymbol *ARMAsmPrinter::
GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
const MachineBasicBlock *MBB) const {
@@ -957,126 +266,12 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
return OutContext.GetOrCreateSymbol(Name.str());
}
-void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- assert(!Subtarget->isThumb2() && "Thumb2 should use double-jump jumptables!");
-
- const MachineOperand &MO1 = MI->getOperand(OpNum);
- const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
-
- unsigned JTI = MO1.getIndex();
- MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
- // Can't use EmitLabel until instprinter happens, label comes out in the wrong
- // order.
- O << "\n" << *JTISymbol << ":\n";
-
- const char *JTEntryDirective = MAI->getData32bitsDirective();
-
- const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
- bool UseSet= MAI->hasSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
- SmallPtrSet<MachineBasicBlock*, 8> JTSets;
- for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
- MachineBasicBlock *MBB = JTBBs[i];
- bool isNew = JTSets.insert(MBB);
-
- if (UseSet && isNew) {
- O << "\t.set\t"
- << *GetARMSetPICJumpTableLabel2(JTI, MO2.getImm(), MBB) << ','
- << *MBB->getSymbol() << '-' << *JTISymbol << '\n';
- }
-
- O << JTEntryDirective << ' ';
- if (UseSet)
- O << *GetARMSetPICJumpTableLabel2(JTI, MO2.getImm(), MBB);
- else if (TM.getRelocationModel() == Reloc::PIC_)
- O << *MBB->getSymbol() << '-' << *JTISymbol;
- else
- O << *MBB->getSymbol();
-
- if (i != e-1)
- O << '\n';
- }
-}
-
-void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- const MachineOperand &MO1 = MI->getOperand(OpNum);
- const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
- unsigned JTI = MO1.getIndex();
-
- MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
-
- // Can't use EmitLabel until instprinter happens, label comes out in the wrong
- // order.
- O << "\n" << *JTISymbol << ":\n";
-
- const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
- bool ByteOffset = false, HalfWordOffset = false;
- if (MI->getOpcode() == ARM::t2TBB)
- ByteOffset = true;
- else if (MI->getOpcode() == ARM::t2TBH)
- HalfWordOffset = true;
-
- for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
- MachineBasicBlock *MBB = JTBBs[i];
- if (ByteOffset)
- O << MAI->getData8bitsDirective();
- else if (HalfWordOffset)
- O << MAI->getData16bitsDirective();
-
- if (ByteOffset || HalfWordOffset)
- O << '(' << *MBB->getSymbol() << "-" << *JTISymbol << ")/2";
- else
- O << "\tb.w " << *MBB->getSymbol();
-
- if (i != e-1)
- O << '\n';
- }
-}
-
-void ARMAsmPrinter::printTBAddrMode(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg());
- if (MI->getOpcode() == ARM::t2TBH)
- O << ", lsl #1";
- O << ']';
-}
-void ARMAsmPrinter::printNoHashImmediate(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << MI->getOperand(OpNum).getImm();
-}
-
-void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- const ConstantFP *FP = MI->getOperand(OpNum).getFPImm();
- O << '#' << FP->getValueAPF().convertToFloat();
- if (isVerbose()) {
- O << "\t\t" << MAI->getCommentString() << ' ';
- WriteAsOperand(O, FP, /*PrintType=*/false);
- }
-}
-
-void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- const ConstantFP *FP = MI->getOperand(OpNum).getFPImm();
- O << '#' << FP->getValueAPF().convertToDouble();
- if (isVerbose()) {
- O << "\t\t" << MAI->getCommentString() << ' ';
- WriteAsOperand(O, FP, /*PrintType=*/false);
- }
-}
-
-void ARMAsmPrinter::printNEONModImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- unsigned EncodedImm = MI->getOperand(OpNum).getImm();
- unsigned EltBits;
- uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
- O << "#0x" << utohexstr(Val);
+MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const {
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH"
+ << getFunctionNumber();
+ return OutContext.GetOrCreateSymbol(Name.str());
}
bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
@@ -1090,14 +285,16 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
default: return true; // Unknown modifier.
case 'a': // Print as a memory address.
if (MI->getOperand(OpNum).isReg()) {
- O << "[" << getRegisterName(MI->getOperand(OpNum).getReg()) << "]";
+ O << "["
+ << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg())
+ << "]";
return false;
}
// Fallthrough
case 'c': // Don't print "#" before an immediate operand.
if (!MI->getOperand(OpNum).isImm())
return true;
- printNoHashImmediate(MI, OpNum, O);
+ O << MI->getOperand(OpNum).getImm();
return false;
case 'P': // Print a VFP double precision register.
case 'q': // Print a NEON quad precision register.
@@ -1106,7 +303,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
case 'Q':
case 'R':
case 'H':
- report_fatal_error("llvm does not support 'Q', 'R', and 'H' modifiers!");
+ // These modifiers are not yet supported.
return true;
}
}
@@ -1124,48 +321,10 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
const MachineOperand &MO = MI->getOperand(OpNum);
assert(MO.isReg() && "unexpected inline asm memory operand");
- O << "[" << getRegisterName(MO.getReg()) << "]";
+ O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]";
return false;
}
-void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- if (EnableMCInst) {
- printInstructionThroughMCStreamer(MI);
- return;
- }
-
- if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY)
- EmitAlignment(2);
-
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- if (MI->getOpcode() == ARM::DBG_VALUE) {
- unsigned NOps = MI->getNumOperands();
- assert(NOps==4);
- OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
- // cast away const; DIetc do not take const operands for some reason.
- DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
- OS << V.getName();
- OS << " <- ";
- // Frame address. Currently handles register +- offset only.
- assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
- OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
- OS << ']';
- OS << "+";
- printOperand(MI, NOps-2, OS);
- OutStreamer.EmitRawText(OS.str());
- return;
- }
-
- printInstruction(MI, OS);
- OutStreamer.EmitRawText(OS.str());
-
- // Make sure the instruction that follows TBB is 2-byte aligned.
- // FIXME: Constant island pass should insert an "ALIGN" instruction instead.
- if (MI->getOpcode() == ARM::t2TBB)
- EmitAlignment(1);
-}
-
void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (Subtarget->isTargetDarwin()) {
Reloc::Model RelocM = TM.getRelocationModel();
@@ -1205,49 +364,12 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
}
// Use unified assembler syntax.
- OutStreamer.EmitRawText(StringRef("\t.syntax unified"));
+ OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
// Emit ARM Build Attributes
if (Subtarget->isTargetELF()) {
- // CPU Type
- std::string CPUString = Subtarget->getCPUString();
- if (CPUString != "generic")
- OutStreamer.EmitRawText("\t.cpu " + Twine(CPUString));
-
- // FIXME: Emit FPU type
- if (Subtarget->hasVFP2())
- OutStreamer.EmitRawText("\t.eabi_attribute " +
- Twine(ARMBuildAttrs::VFP_arch) + ", 2");
-
- // Signal various FP modes.
- if (!UnsafeFPMath) {
- OutStreamer.EmitRawText("\t.eabi_attribute " +
- Twine(ARMBuildAttrs::ABI_FP_denormal) + ", 1");
- OutStreamer.EmitRawText("\t.eabi_attribute " +
- Twine(ARMBuildAttrs::ABI_FP_exceptions) + ", 1");
- }
- if (NoInfsFPMath && NoNaNsFPMath)
- OutStreamer.EmitRawText("\t.eabi_attribute " +
- Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 1");
- else
- OutStreamer.EmitRawText("\t.eabi_attribute " +
- Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 3");
-
- // 8-bytes alignment stuff.
- OutStreamer.EmitRawText("\t.eabi_attribute " +
- Twine(ARMBuildAttrs::ABI_align8_needed) + ", 1");
- OutStreamer.EmitRawText("\t.eabi_attribute " +
- Twine(ARMBuildAttrs::ABI_align8_preserved) + ", 1");
-
- // Hard float. Use both S and D registers and conform to AAPCS-VFP.
- if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
- OutStreamer.EmitRawText("\t.eabi_attribute " +
- Twine(ARMBuildAttrs::ABI_HardFP_use) + ", 3");
- OutStreamer.EmitRawText("\t.eabi_attribute " +
- Twine(ARMBuildAttrs::ABI_VFP_args) + ", 1");
- }
- // FIXME: Should we signal R9 usage?
+ emitAttributes();
}
}
@@ -1280,10 +402,10 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
else
// Internal to current translation unit.
//
- // When we place the LSDA into the TEXT section, the type info pointers
- // need to be indirect and pc-rel. We accomplish this by using NLPs.
- // However, sometimes the types are local to the file. So we need to
- // fill in the value for the NLP in those cases.
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs; however, sometimes the types are local to the file.
+ // We need to fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
4/*size*/, 0/*addrspace*/);
@@ -1321,38 +443,631 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
}
//===----------------------------------------------------------------------===//
+// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+// FIXME:
+// The following seem like one-off assembler flags, but they actually need
+// to appear in the .ARM.attributes section in ELF.
+// Instead of subclassing the MCELFStreamer, we do the work here.
+
+void ARMAsmPrinter::emitAttributes() {
+
+ emitARMAttributeSection();
+
+ AttributeEmitter *AttrEmitter;
+ if (OutStreamer.hasRawTextSupport())
+ AttrEmitter = new AsmAttributeEmitter(OutStreamer);
+ else {
+ MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
+ AttrEmitter = new ObjectAttributeEmitter(O);
+ }
+
+ AttrEmitter->MaybeSwitchVendor("aeabi");
+
+ std::string CPUString = Subtarget->getCPUString();
+
+ if (CPUString == "cortex-a8" ||
+ Subtarget->isCortexA8()) {
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8");
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
+ // Fixme: figure out when this is emitted.
+ //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch,
+ // ARMBuildAttrs::AllowWMMXv1);
+ //
+
+ /// ADD additional Else-cases here!
+ } else if (CPUString == "generic") {
+ // FIXME: Why these defaults?
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::Allowed);
+ }
-void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
- ARMMCInstLower MCInstLowering(OutContext, *Mang, *this);
- switch (MI->getOpcode()) {
- case ARM::t2MOVi32imm:
- assert(0 && "Should be lowered by thumb2it pass");
+ // FIXME: Emit FPU type
+ if (Subtarget->hasVFP2())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv2);
+
+ // Signal various FP modes.
+ if (!UnsafeFPMath) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+ ARMBuildAttrs::Allowed);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+ ARMBuildAttrs::Allowed);
+ }
+
+ if (NoInfsFPMath && NoNaNsFPMath)
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::Allowed);
+ else
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::AllowIEE754);
+
+ // FIXME: add more flags to ARMBuildAttrs.h
+ // 8-bytes alignment stuff.
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+
+ // Hard float. Use both S and D registers and conform to AAPCS-VFP.
+ if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
+ }
+ // FIXME: Should we signal R9 usage?
+
+ if (Subtarget->hasDivide())
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1);
+
+ AttrEmitter->Finish();
+ delete AttrEmitter;
+}
+
+void ARMAsmPrinter::emitARMAttributeSection() {
+ // <format-version>
+ // [ <section-length> "vendor-name"
+ // [ <file-tag> <size> <attribute>*
+ // | <section-tag> <size> <section-number>* 0 <attribute>*
+ // | <symbol-tag> <size> <symbol-number>* 0 <attribute>*
+ // ]+
+ // ]*
+
+ if (OutStreamer.hasRawTextSupport())
+ return;
+
+ const ARMElfTargetObjectFile &TLOFELF =
+ static_cast<const ARMElfTargetObjectFile &>
+ (getObjFileLowering());
+
+ OutStreamer.SwitchSection(TLOFELF.getAttributesSection());
+
+ // Format version
+ OutStreamer.EmitIntValue(0x41, 1);
+}
+
+//===----------------------------------------------------------------------===//
+
+static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
+ unsigned LabelId, MCContext &Ctx) {
+
+ MCSymbol *Label = Ctx.GetOrCreateSymbol(Twine(Prefix)
+ + "PC" + Twine(FunctionNumber) + "_" + Twine(LabelId));
+ return Label;
+}
+
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
+ switch (Modifier) {
+ default: llvm_unreachable("Unknown modifier!");
+ case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
+ case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD;
+ case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF;
+ case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_ARM_GOTTPOFF;
+ case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT;
+ case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF;
+ }
+ return MCSymbolRefExpr::VK_None;
+}
+
+MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
+ bool isIndirect = Subtarget->isTargetDarwin() &&
+ Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+ if (!isIndirect)
+ return Mang->getSymbol(GV);
+
+ // FIXME: Remove this when Darwin transition to @GOT like syntax.
+ MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoMachO &MMIMachO =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
+ MMIMachO.getGVStubEntry(MCSym);
+ if (StubSym.getPointer() == 0)
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ return MCSym;
+}
+
+void ARMAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ int Size = TM.getTargetData()->getTypeAllocSize(MCPV->getType());
+
+ ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+
+ MCSymbol *MCSym;
+ if (ACPV->isLSDA()) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
+ MCSym = OutContext.GetOrCreateSymbol(OS.str());
+ } else if (ACPV->isBlockAddress()) {
+ MCSym = GetBlockAddressSymbol(ACPV->getBlockAddress());
+ } else if (ACPV->isGlobalValue()) {
+ const GlobalValue *GV = ACPV->getGV();
+ MCSym = GetARMGVSymbol(GV);
+ } else {
+ assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
+ MCSym = GetExternalSymbolSymbol(ACPV->getSymbol());
+ }
+
+ // Create an MCSymbol for the reference.
+ const MCExpr *Expr =
+ MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()),
+ OutContext);
+
+ if (ACPV->getPCAdjustment()) {
+ MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ ACPV->getLabelId(),
+ OutContext);
+ const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext);
+ PCRelExpr =
+ MCBinaryExpr::CreateAdd(PCRelExpr,
+ MCConstantExpr::Create(ACPV->getPCAdjustment(),
+ OutContext),
+ OutContext);
+ if (ACPV->mustAddCurrentAddress()) {
+ // We want "(<expr> - .)", but MC doesn't have a concept of the '.'
+ // label, so just emit a local label end reference that instead.
+ MCSymbol *DotSym = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(DotSym);
+ const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+ PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
+ }
+ Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
+ }
+ OutStreamer.EmitValue(Expr, Size);
+}
+
+void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ int OpNum = 1;
+ if (Opcode == ARM::BR_JTadd)
+ OpNum = 2;
+ else if (Opcode == ARM::BR_JTm)
+ OpNum = 3;
+
+ const MachineOperand &MO1 = MI->getOperand(OpNum);
+ const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+ unsigned JTI = MO1.getIndex();
+
+ // Emit a label for the jump table.
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+ OutStreamer.EmitLabel(JTISymbol);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ // Construct an MCExpr for the entry. We want a value of the form:
+ // (BasicBlockAddr - TableBeginAddr)
+ //
+ // For example, a table with entries jumping to basic blocks BB0 and BB1
+ // would look like:
+ // LJTI_0_0:
+ // .word (LBB0 - LJTI_0_0)
+ // .word (LBB1 - LJTI_0_0)
+ const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol,
+ OutContext),
+ OutContext);
+ OutStreamer.EmitValue(Expr, 4);
+ }
+}
+
+void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
+ const MachineOperand &MO1 = MI->getOperand(OpNum);
+ const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+ unsigned JTI = MO1.getIndex();
+
+ // Emit a label for the jump table.
+ MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+ OutStreamer.EmitLabel(JTISymbol);
+
+ // Emit each entry of the table.
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ unsigned OffsetWidth = 4;
+ if (MI->getOpcode() == ARM::t2TBB_JT)
+ OffsetWidth = 1;
+ else if (MI->getOpcode() == ARM::t2TBH_JT)
+ OffsetWidth = 2;
+
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
+ OutContext);
+ // If this isn't a TBB or TBH, the entries are direct branch instructions.
+ if (OffsetWidth == 4) {
+ MCInst BrInst;
+ BrInst.setOpcode(ARM::t2B);
+ BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
+ OutStreamer.EmitInstruction(BrInst);
+ continue;
+ }
+ // Otherwise it's an offset from the dispatch instruction. Construct an
+ // MCExpr for the entry. We want a value of the form:
+ // (BasicBlockAddr - TableBeginAddr) / 2
+ //
+ // For example, a TBB table with entries jumping to basic blocks BB0 and BB1
+ // would look like:
+ // LJTI_0_0:
+ // .byte (LBB0 - LJTI_0_0) / 2
+ // .byte (LBB1 - LJTI_0_0) / 2
+ const MCExpr *Expr =
+ MCBinaryExpr::CreateSub(MBBSymbolExpr,
+ MCSymbolRefExpr::Create(JTISymbol, OutContext),
+ OutContext);
+ Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext),
+ OutContext);
+ OutStreamer.EmitValue(Expr, OffsetWidth);
+ }
+}
+
+void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+ OS << ']';
+ OS << "+";
+ printOperand(MI, NOps-2, OS);
+}
+
+static void populateADROperands(MCInst &Inst, unsigned Dest,
+ const MCSymbol *Label,
+ unsigned pred, unsigned ccreg,
+ MCContext &Ctx) {
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx);
+ Inst.addOperand(MCOperand::CreateReg(Dest));
+ Inst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+ // Add predicate operands.
+ Inst.addOperand(MCOperand::CreateImm(pred));
+ Inst.addOperand(MCOperand::CreateReg(ccreg));
+}
+
+void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI,
+ unsigned Opcode) {
+ MCInst TmpInst;
+
+ // Emit the instruction as usual, just patch the opcode.
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+ TmpInst.setOpcode(Opcode);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
default: break;
- case ARM::PICADD: { // FIXME: Remove asm string from td file.
+ case ARM::t2ADDrSPi:
+ case ARM::t2ADDrSPi12:
+ case ARM::t2SUBrSPi:
+ case ARM::t2SUBrSPi12:
+ assert ((MI->getOperand(1).getReg() == ARM::SP) &&
+ "Unexpected source register!");
+ break;
+
+ case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass");
+ case ARM::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
+ case ARM::tBfar: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tBL);
+ TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MI->getOperand(0).getMBB()->getSymbol(), OutContext)));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::LEApcrel:
+ case ARM::tLEApcrel:
+ case ARM::t2LEApcrel: {
+ // FIXME: Need to also handle globals and externals
+ MCInst TmpInst;
+ TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+ : ARM::ADR));
+ populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+ GetCPISymbol(MI->getOperand(1).getIndex()),
+ MI->getOperand(2).getImm(), MI->getOperand(3).getReg(),
+ OutContext);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::LEApcrelJT:
+ case ARM::tLEApcrelJT:
+ case ARM::t2LEApcrelJT: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR
+ : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+ : ARM::ADR));
+ populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+ GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
+ MI->getOperand(2).getImm()),
+ MI->getOperand(3).getImm(), MI->getOperand(4).getReg(),
+ OutContext);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::MOVPCRX: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::BXr9_CALL:
+ case ARM::BX_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::BX);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::BMOVPCRXr9_CALL:
+ case ARM::BMOVPCRX_CALL: {
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc == ARM::MOVi16_ga_pcrel? ARM::MOVi16 : ARM::t2MOVi16);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+
+ unsigned TF = MI->getOperand(1).getTargetFlags();
+ bool isPIC = TF == ARMII::MO_LO16_NONLAZY_PIC;
+ const GlobalValue *GV = MI->getOperand(1).getGlobal();
+ MCSymbol *GVSym = GetARMGVSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ if (isPIC) {
+ MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(2).getImm(), OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+ unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
+ const MCExpr *PCRelExpr =
+ ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr,
+ MCBinaryExpr::CreateAdd(LabelSymExpr,
+ MCConstantExpr::Create(PCAdj, OutContext),
+ OutContext), OutContext), OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+ } else {
+ const MCExpr *RefExpr= ARMMCExpr::CreateLower16(GVSymExpr, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+ }
+
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opc == ARM::MOVTi16_ga_pcrel
+ ? ARM::MOVTi16 : ARM::t2MOVTi16);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+
+ unsigned TF = MI->getOperand(2).getTargetFlags();
+ bool isPIC = TF == ARMII::MO_HI16_NONLAZY_PIC;
+ const GlobalValue *GV = MI->getOperand(2).getGlobal();
+ MCSymbol *GVSym = GetARMGVSymbol(GV);
+ const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+ if (isPIC) {
+ MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(3).getImm(), OutContext);
+ const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+ unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
+ const MCExpr *PCRelExpr =
+ ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr,
+ MCBinaryExpr::CreateAdd(LabelSymExpr,
+ MCConstantExpr::Create(PCAdj, OutContext),
+ OutContext), OutContext), OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+ } else {
+ const MCExpr *RefExpr= ARMMCExpr::CreateUpper16(GVSymExpr, OutContext);
+ TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+ }
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::tPICADD: {
// This is a pseudo op for a label + instruction sequence, which looks like:
// LPC0:
- // add r0, pc, r0
+ // add r0, pc
// This adds the address of LPC0 to r0.
// Emit the label.
- // FIXME: MOVE TO SHARED PLACE.
- unsigned Id = (unsigned)MI->getOperand(2).getImm();
- const char *Prefix = MAI->getPrivateGlobalPrefix();
- MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)
- + "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id));
- OutStreamer.EmitLabel(Label);
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+ // Form and emit the add.
+ MCInst AddInst;
+ AddInst.setOpcode(ARM::tADDhirr);
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // Add predicate operands.
+ AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ AddInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(AddInst);
+ return;
+ }
+ case ARM::PICADD: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // add r0, pc, r0
+ // This adds the address of LPC0 to r0.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
- // Form and emit tha dd.
+ // Form and emit the add.
MCInst AddInst;
AddInst.setOpcode(ARM::ADDrr);
AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ // Add predicate operands.
+ AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+ AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+ // Add 's' bit operand (always reg0 for this)
+ AddInst.addOperand(MCOperand::CreateReg(0));
OutStreamer.EmitInstruction(AddInst);
return;
}
- case ARM::CONSTPOOL_ENTRY: { // FIXME: Remove asm string from td file.
+ case ARM::PICSTR:
+ case ARM::PICSTRB:
+ case ARM::PICSTRH:
+ case ARM::PICLDR:
+ case ARM::PICLDRB:
+ case ARM::PICLDRH:
+ case ARM::PICLDRSB:
+ case ARM::PICLDRSH: {
+ // This is a pseudo op for a label + instruction sequence, which looks like:
+ // LPC0:
+ // OP r0, [pc, r0]
+ // The LCP0 label is referenced by a constant pool entry in order to get
+ // a PC-relative address at the ldr instruction.
+
+ // Emit the label.
+ OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+ getFunctionNumber(), MI->getOperand(2).getImm(),
+ OutContext));
+
+ // Form and emit the load
+ unsigned Opcode;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case ARM::PICSTR: Opcode = ARM::STRrs; break;
+ case ARM::PICSTRB: Opcode = ARM::STRBrs; break;
+ case ARM::PICSTRH: Opcode = ARM::STRH; break;
+ case ARM::PICLDR: Opcode = ARM::LDRrs; break;
+ case ARM::PICLDRB: Opcode = ARM::LDRBrs; break;
+ case ARM::PICLDRH: Opcode = ARM::LDRH; break;
+ case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
+ case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
+ }
+ MCInst LdStInst;
+ LdStInst.setOpcode(Opcode);
+ LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ LdStInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ LdStInst.addOperand(MCOperand::CreateImm(0));
+ // Add predicate operands.
+ LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+ LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+ OutStreamer.EmitInstruction(LdStInst);
+
+ return;
+ }
+ case ARM::CONSTPOOL_ENTRY: {
/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
/// in the function. The first operand is the ID# for this instruction, the
/// second is the index into the MachineConstantPool that this is, the third
@@ -1371,100 +1086,450 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
return;
}
- case ARM::MOVi2pieces: { // FIXME: Remove asmstring from td file.
- // This is a hack that lowers as a two instruction sequence.
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned ImmVal = (unsigned)MI->getOperand(1).getImm();
+ case ARM::t2BR_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVgpr2gpr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ return;
+ }
+ case ARM::t2TBB_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ MCInst TmpInst;
+
+ TmpInst.setOpcode(ARM::t2TBB);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ // Make sure the next instruction is 2-byte aligned.
+ EmitAlignment(1);
+ return;
+ }
+ case ARM::t2TBH_JT: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ MCInst TmpInst;
+
+ TmpInst.setOpcode(ARM::t2TBH);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJump2Table(MI);
+ return;
+ }
+ case ARM::tBR_JTr:
+ case ARM::BR_JTr: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // mov pc, target
+ MCInst TmpInst;
+ unsigned Opc = MI->getOpcode() == ARM::BR_JTr ?
+ ARM::MOVr : ARM::tMOVgpr2gpr;
+ TmpInst.setOpcode(Opc);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ if (Opc == ARM::MOVr)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Make sure the Thumb jump table is 4-byte aligned.
+ if (Opc == ARM::tMOVgpr2gpr)
+ EmitAlignment(2);
- unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
- unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::BR_JTm: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // ldr pc, target
+ MCInst TmpInst;
+ if (MI->getOperand(1).getReg() == 0) {
+ // literal offset
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
+ } else {
+ TmpInst.setOpcode(ARM::LDRrs);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ }
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::BR_JTadd: {
+ // Lower and emit the instruction itself, then the jump table following it.
+ // add pc, target, idx
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADDrr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Add 's' bit operand (always reg0 for this)
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Output the data for the jump table itself
+ EmitJumpTable(MI);
+ return;
+ }
+ case ARM::TRAP: {
+ // Non-Darwin binutils don't yet support the "trap" mnemonic.
+ // FIXME: Remove this special case when they do.
+ if (!Subtarget->isTargetDarwin()) {
+ //.long 0xe7ffdefe @ trap
+ uint32_t Val = 0xe7ffdefeUL;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 4);
+ return;
+ }
+ break;
+ }
+ case ARM::tTRAP: {
+ // Non-Darwin binutils don't yet support the "trap" mnemonic.
+ // FIXME: Remove this special case when they do.
+ if (!Subtarget->isTargetDarwin()) {
+ //.short 57086 @ trap
+ uint16_t Val = 0xdefe;
+ OutStreamer.AddComment("trap");
+ OutStreamer.EmitIntValue(Val, 2);
+ return;
+ }
+ break;
+ }
+ case ARM::t2Int_eh_sjlj_setjmp:
+ case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ case ARM::tInt_eh_sjlj_setjmp: {
+ // Two incoming args: GPR:$src, GPR:$val
+ // mov $val, pc
+ // adds $val, #7
+ // str $val, [$src, #4]
+ // movs r0, #0
+ // b 1f
+ // movs r0, #1
+ // 1:
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ValReg = MI->getOperand(1).getReg();
+ MCSymbol *Label = GetARMSJLJEHLabel();
{
MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVi);
- TmpInst.addOperand(MCOperand::CreateReg(DstReg));
- TmpInst.addOperand(MCOperand::CreateImm(SOImmValV1));
-
+ TmpInst.setOpcode(ARM::tMOVgpr2tgpr);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ // 's' bit operand
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tADDi3);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ // 's' bit operand
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateImm(7));
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
- TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
OutStreamer.EmitInstruction(TmpInst);
}
-
{
MCInst TmpInst;
- TmpInst.setOpcode(ARM::ORRri);
- TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
- TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // inreg
- TmpInst.addOperand(MCOperand::CreateImm(SOImmValV2)); // so_imm
+ TmpInst.setOpcode(ARM::tSTRi);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ // The offset immediate is #4. The operand value is scaled by 4 for the
+ // tSTR instruction.
+ TmpInst.addOperand(MCOperand::CreateImm(1));
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tB);
+ TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVi8);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp end");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ OutStreamer.EmitLabel(Label);
+ return;
+ }
- TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ case ARM::Int_eh_sjlj_setjmp_nofp:
+ case ARM::Int_eh_sjlj_setjmp: {
+ // Two incoming args: GPR:$src, GPR:$val
+ // add $val, pc, #8
+ // str $val, [$src, #+4]
+ // mov r0, #0
+ // add pc, pc, #0
+ // mov r0, #1
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ValReg = MI->getOperand(1).getReg();
+
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADDri);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateImm(8));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp begin");
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::STRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(4));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVi);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::ADDri);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::MOVi);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // 's' bit operand (always reg0 for this).
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("eh_setjmp end");
OutStreamer.EmitInstruction(TmpInst);
}
return;
}
- case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file.
- // This is a hack that lowers as a two instruction sequence.
- unsigned DstReg = MI->getOperand(0).getReg();
- const MachineOperand &MO = MI->getOperand(1);
- MCOperand V1, V2;
- if (MO.isImm()) {
- unsigned ImmVal = (unsigned)MI->getOperand(1).getImm();
- V1 = MCOperand::CreateImm(ImmVal & 65535);
- V2 = MCOperand::CreateImm(ImmVal >> 16);
- } else if (MO.isGlobal()) {
- MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO);
- const MCSymbolRefExpr *SymRef1 =
- MCSymbolRefExpr::Create(Symbol,
- MCSymbolRefExpr::VK_ARM_LO16, OutContext);
- const MCSymbolRefExpr *SymRef2 =
- MCSymbolRefExpr::Create(Symbol,
- MCSymbolRefExpr::VK_ARM_HI16, OutContext);
- V1 = MCOperand::CreateExpr(SymRef1);
- V2 = MCOperand::CreateExpr(SymRef2);
- } else {
- MI->dump();
- llvm_unreachable("cannot handle this operand");
+ case ARM::Int_eh_sjlj_longjmp: {
+ // ldr sp, [$src, #8]
+ // ldr $scratch, [$src, #4]
+ // ldr r7, [$src]
+ // bx $scratch
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ScratchReg = MI->getOperand(1).getReg();
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(8));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
}
-
{
MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVi16);
- TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
- TmpInst.addOperand(V1); // lower16(imm)
-
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(4));
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
OutStreamer.EmitInstruction(TmpInst);
}
-
{
MCInst TmpInst;
- TmpInst.setOpcode(ARM::MOVTi16);
- TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
- TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg
- TmpInst.addOperand(V2); // upper16(imm)
-
+ TmpInst.setOpcode(ARM::LDRi12);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
// Predicate.
- TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::BX);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
OutStreamer.EmitInstruction(TmpInst);
}
-
return;
}
+ case ARM::tInt_eh_sjlj_longjmp: {
+ // ldr $scratch, [$src, #8]
+ // mov sp, $scratch
+ // ldr $scratch, [$src, #4]
+ // ldr r7, [$src]
+ // bx $scratch
+ unsigned SrcReg = MI->getOperand(0).getReg();
+ unsigned ScratchReg = MI->getOperand(1).getReg();
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tLDRi);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ // The offset immediate is #8. The operand value is scaled by 4 for the
+ // tLDR instruction.
+ TmpInst.addOperand(MCOperand::CreateImm(2));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tMOVtgpr2gpr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tLDRi);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateImm(1));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tLDRr);
+ TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+ TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tBX_RET_vararg);
+ TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ }
+ return;
+ }
+ // These are the pseudos created to comply with stricter operand restrictions
+ // on ARMv5. Lower them now to "normal" instructions, since all the
+ // restrictions are already satisfied.
+ case ARM::MULv5:
+ EmitPatchedInstruction(MI, ARM::MUL);
+ return;
+ case ARM::MLAv5:
+ EmitPatchedInstruction(MI, ARM::MLA);
+ return;
+ case ARM::SMULLv5:
+ EmitPatchedInstruction(MI, ARM::SMULL);
+ return;
+ case ARM::UMULLv5:
+ EmitPatchedInstruction(MI, ARM::UMULL);
+ return;
+ case ARM::SMLALv5:
+ EmitPatchedInstruction(MI, ARM::SMLAL);
+ return;
+ case ARM::UMLALv5:
+ EmitPatchedInstruction(MI, ARM::UMLAL);
+ return;
+ case ARM::UMAALv5:
+ EmitPatchedInstruction(MI, ARM::UMAAL);
+ return;
}
MCInst TmpInst;
- MCInstLowering.Lower(MI, TmpInst);
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
OutStreamer.EmitInstruction(TmpInst);
}
@@ -1476,7 +1541,7 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI) {
if (SyntaxVariant == 0)
- return new ARMInstPrinter(MAI, false);
+ return new ARMInstPrinter(MAI);
return 0;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
new file mode 100644
index 0000000..5852684
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -0,0 +1,112 @@
+//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ARM Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMASMPRINTER_H
+#define ARMASMPRINTER_H
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+namespace ARM {
+ enum DW_ISA {
+ DW_ISA_ARM_thumb = 1,
+ DW_ISA_ARM_arm = 2
+ };
+}
+
+class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// AFI - Keep a pointer to ARMFunctionInfo for the current
+ /// MachineFunction.
+ ARMFunctionInfo *AFI;
+
+ /// MCP - Keep a pointer to constantpool entries of the current
+ /// MachineFunction.
+ const MachineConstantPool *MCP;
+
+public:
+ explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+ const char *Modifier = 0);
+
+ virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O);
+
+ void EmitJumpTable(const MachineInstr *MI);
+ void EmitJump2Table(const MachineInstr *MI);
+ virtual void EmitInstruction(const MachineInstr *MI);
+ bool runOnMachineFunction(MachineFunction &F);
+
+ virtual void EmitConstantPool() {} // we emit constant pools customly!
+ virtual void EmitFunctionEntryLabel();
+ void EmitStartOfAsmFile(Module &M);
+ void EmitEndOfAsmFile(Module &M);
+
+private:
+ // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+ void emitAttributes();
+
+ // Helper for ELF .o only
+ void emitARMAttributeSection();
+
+ // Generic helper used to emit e.g. ARMv5 mul pseudos
+ void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc);
+
+public:
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+ virtual unsigned getISAEncoding() {
+ // ARM/Darwin adds ISA to the DWARF info for each function.
+ if (!Subtarget->isTargetDarwin())
+ return 0;
+ return Subtarget->isThumb() ?
+ llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+ }
+
+ MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const;
+ MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
+
+ MCSymbol *GetARMSJLJEHLabel(void) const;
+
+ MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
+
+ /// EmitMachineConstantPoolValue - Print a machine constantpool value to
+ /// the .s file.
+ virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h
new file mode 100644
index 0000000..a56cc1a
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInfo.h
@@ -0,0 +1,249 @@
+//===-- ARMBaseInfo.h - Top level definitions for ARM -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the ARM target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINFO_H
+#define ARMBASEINFO_H
+
+#include "llvm/Support/ErrorHandling.h"
+
+// Note that the following auto-generated files only defined enum types, and
+// so are safe to include here.
+
+// Defines symbolic names for ARM registers. This defines a mapping from
+// register name to register number.
+//
+#include "ARMGenRegisterNames.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#include "ARMGenInstrNames.inc"
+
+namespace llvm {
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+ // The CondCodes constants map directly to the 4-bit encoding of the
+ // condition field for predicated instructions.
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Carry set >, ==, or unordered
+ LO, // Carry clear Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Not unordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Greater than Greater than
+ LE, // Less than or equal <, ==, or unordered
+ AL // Always (unconditional) Always (unconditional)
+ };
+
+ inline static CondCodes getOppositeCondition(CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case HS: return LO;
+ case LO: return HS;
+ case MI: return PL;
+ case PL: return MI;
+ case VS: return VC;
+ case VC: return VS;
+ case HI: return LS;
+ case LS: return HI;
+ case GE: return LT;
+ case LT: return GE;
+ case GT: return LE;
+ case LE: return GT;
+ }
+ }
+} // namespace ARMCC
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case ARMCC::EQ: return "eq";
+ case ARMCC::NE: return "ne";
+ case ARMCC::HS: return "hs";
+ case ARMCC::LO: return "lo";
+ case ARMCC::MI: return "mi";
+ case ARMCC::PL: return "pl";
+ case ARMCC::VS: return "vs";
+ case ARMCC::VC: return "vc";
+ case ARMCC::HI: return "hi";
+ case ARMCC::LS: return "ls";
+ case ARMCC::GE: return "ge";
+ case ARMCC::LT: return "lt";
+ case ARMCC::GT: return "gt";
+ case ARMCC::LE: return "le";
+ case ARMCC::AL: return "al";
+ }
+}
+
+namespace ARM_PROC {
+ enum IMod {
+ IE = 2,
+ ID = 3
+ };
+
+ enum IFlags {
+ F = 1,
+ I = 2,
+ A = 4
+ };
+
+ inline static const char *IFlagsToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown iflags operand");
+ case F: return "f";
+ case I: return "i";
+ case A: return "a";
+ }
+ }
+
+ inline static const char *IModToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown imod operand");
+ case IE: return "ie";
+ case ID: return "id";
+ }
+ }
+}
+
+namespace ARM_MB {
+ // The Memory Barrier Option constants map directly to the 4-bit encoding of
+ // the option field for memory barrier operations.
+ enum MemBOpt {
+ SY = 15,
+ ST = 14,
+ ISH = 11,
+ ISHST = 10,
+ NSH = 7,
+ NSHST = 6,
+ OSH = 3,
+ OSHST = 2
+ };
+
+ inline static const char *MemBOptToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown memory operation");
+ case SY: return "sy";
+ case ST: return "st";
+ case ISH: return "ish";
+ case ISHST: return "ishst";
+ case NSH: return "nsh";
+ case NSHST: return "nshst";
+ case OSH: return "osh";
+ case OSHST: return "oshst";
+ }
+ }
+} // namespace ARM_MB
+
+/// getARMRegisterNumbering - Given the enum value for some register, e.g.
+/// ARM::LR, return the number that it corresponds to (e.g. 14).
+inline static unsigned getARMRegisterNumbering(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ default:
+ llvm_unreachable("Unknown ARM register!");
+ case R0: case S0: case D0: case Q0: return 0;
+ case R1: case S1: case D1: case Q1: return 1;
+ case R2: case S2: case D2: case Q2: return 2;
+ case R3: case S3: case D3: case Q3: return 3;
+ case R4: case S4: case D4: case Q4: return 4;
+ case R5: case S5: case D5: case Q5: return 5;
+ case R6: case S6: case D6: case Q6: return 6;
+ case R7: case S7: case D7: case Q7: return 7;
+ case R8: case S8: case D8: case Q8: return 8;
+ case R9: case S9: case D9: case Q9: return 9;
+ case R10: case S10: case D10: case Q10: return 10;
+ case R11: case S11: case D11: case Q11: return 11;
+ case R12: case S12: case D12: case Q12: return 12;
+ case SP: case S13: case D13: case Q13: return 13;
+ case LR: case S14: case D14: case Q14: return 14;
+ case PC: case S15: case D15: case Q15: return 15;
+
+ case S16: case D16: return 16;
+ case S17: case D17: return 17;
+ case S18: case D18: return 18;
+ case S19: case D19: return 19;
+ case S20: case D20: return 20;
+ case S21: case D21: return 21;
+ case S22: case D22: return 22;
+ case S23: case D23: return 23;
+ case S24: case D24: return 24;
+ case S25: case D25: return 25;
+ case S26: case D26: return 26;
+ case S27: case D27: return 27;
+ case S28: case D28: return 28;
+ case S29: case D29: return 29;
+ case S30: case D30: return 30;
+ case S31: case D31: return 31;
+ }
+}
+
+namespace ARMII {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // ARM Specific MachineOperand flags.
+
+ MO_NO_FLAG,
+
+ /// MO_LO16 - On a symbol operand, this represents a relocation containing
+ /// lower 16 bit of the address. Used only via movw instruction.
+ MO_LO16,
+
+ /// MO_HI16 - On a symbol operand, this represents a relocation containing
+ /// higher 16 bit of the address. Used only via movt instruction.
+ MO_HI16,
+
+ /// MO_LO16_NONLAZY - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+ /// i.e. "FOO$non_lazy_ptr".
+ /// Used only via movw instruction.
+ MO_LO16_NONLAZY,
+
+ /// MO_HI16_NONLAZY - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+ /// i.e. "FOO$non_lazy_ptr". Used only via movt instruction.
+ MO_HI16_NONLAZY,
+
+ /// MO_LO16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the PC relative address of the
+ /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+ /// Used only via movw instruction.
+ MO_LO16_NONLAZY_PIC,
+
+ /// MO_HI16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+ /// relocation containing lower 16 bit of the PC relative address of the
+ /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+ /// Used only via movt instruction.
+ MO_HI16_NONLAZY_PIC,
+
+ /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a
+ /// call operand.
+ MO_PLT
+ };
+} // end namespace ARMII
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e4f10f9..2268e59 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -15,13 +15,13 @@
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
+#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
#include "ARMGenInstrInfo.inc"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/GlobalValue.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -34,15 +34,75 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
using namespace llvm;
static cl::opt<bool>
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
+/// ARM_MLxEntry - Record information about MLA / MLS instructions.
+struct ARM_MLxEntry {
+ unsigned MLxOpc; // MLA / MLS opcode
+ unsigned MulOpc; // Expanded multiplication opcode
+ unsigned AddSubOpc; // Expanded add / sub opcode
+ bool NegAcc; // True if the acc is negated before the add / sub.
+ bool HasLane; // True if instruction has an extra "lane" operand.
+};
+
+static const ARM_MLxEntry ARM_MLxTable[] = {
+ // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane
+ // fp scalar ops
+ { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },
+ { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },
+ { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },
+ { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },
+ { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },
+ { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },
+ { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },
+ { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },
+
+ // fp SIMD ops
+ { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },
+ { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },
+ { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },
+ { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },
+ { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },
+ { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },
+ { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },
+ { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },
+};
+
ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
: TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
Subtarget(STI) {
+ for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
+ if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
+ assert(false && "Duplicated entries?");
+ MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
+ MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
+ }
+}
+
+// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
+// currently defaults to no prepass hazard recognizer.
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ if (usePreRAHazardRecognizer()) {
+ const InstrItineraryData *II = TM->getInstrItineraryData();
+ return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
+ }
+ return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+}
+
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+ return (ScheduleHazardRecognizer *)
+ new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
+ return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
}
MachineInstr *
@@ -140,7 +200,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (isLoad)
MemMI = BuildMI(MF, MI->getDebugLoc(),
get(MemOpc), MI->getOperand(0).getReg())
- .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ .addReg(WBReg).addImm(0).addImm(Pred);
else
MemMI = BuildMI(MF, MI->getDebugLoc(),
get(MemOpc)).addReg(MI->getOperand(1).getReg())
@@ -151,7 +211,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (isLoad)
MemMI = BuildMI(MF, MI->getDebugLoc(),
get(MemOpc), MI->getOperand(0).getReg())
- .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ .addReg(BaseReg).addImm(0).addImm(Pred);
else
MemMI = BuildMI(MF, MI->getDebugLoc(),
get(MemOpc)).addReg(MI->getOperand(1).getReg())
@@ -166,8 +226,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (LV) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
unsigned Reg = MO.getReg();
LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
@@ -197,43 +256,6 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return NewMIs[0];
}
-bool
-ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- bool isKill = true;
-
- // Add the callee-saved register as live-in unless it's LR and
- // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
- // then it's already added to the function and entry block live-in sets.
- if (Reg == ARM::LR) {
- MachineFunction &MF = *MBB.getParent();
- if (MF.getFrameInfo()->isReturnAddressTaken() &&
- MF.getRegInfo().isLiveIn(Reg))
- isKill = false;
- }
-
- if (isKill)
- MBB.addLiveIn(Reg);
-
- // Insert the spill to the stack frame. The register is killed at the spill
- //
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- storeRegToStackSlot(MBB, MI, Reg, isKill,
- CSI[i].getFrameIdx(), RC, TRI);
- }
- return true;
-}
-
// Branch analysis.
bool
ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
@@ -275,13 +297,31 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
// Get the instruction before it if it is a terminator.
MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If AllowModify is true and the block ends with two or more unconditional
+ // branches, delete all but the first unconditional branch.
+ if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+ while (isUncondBranchOpcode(SecondLastOpc)) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ // Return now the only terminator is an unconditional branch.
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ SecondLastInst = I;
+ SecondLastOpc = SecondLastInst->getOpcode();
+ }
+ }
+ }
// If there are three terminators, we don't know what sort of block this is.
if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
return true;
// If the block ends with a B and a Bcc, handle it.
- unsigned SecondLastOpc = SecondLastInst->getOpcode();
if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
TBB = SecondLastInst->getOperand(0).getMBB();
Cond.push_back(SecondLastInst->getOperand(1));
@@ -468,7 +508,7 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
}
/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
-DISABLE_INLINE
+LLVM_ATTRIBUTE_NOINLINE
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
unsigned JTI);
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
@@ -513,6 +553,14 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case ARMII::Size2Bytes: return 2; // Thumb1 instruction.
case ARMII::SizeSpecial: {
switch (Opc) {
+ case ARM::MOVi16_ga_pcrel:
+ case ARM::MOVTi16_ga_pcrel:
+ case ARM::t2MOVi16_ga_pcrel:
+ case ARM::t2MOVTi16_ga_pcrel:
+ return 4;
+ case ARM::MOVi32imm:
+ case ARM::t2MOVi32imm:
+ return 8;
case ARM::CONSTPOOL_ENTRY:
// If this machine instr is a constant pool entry, its size is recorded as
// operand #2.
@@ -533,13 +581,13 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case ARM::BR_JTadd:
case ARM::tBR_JTr:
case ARM::t2BR_JT:
- case ARM::t2TBB:
- case ARM::t2TBH: {
+ case ARM::t2TBB_JT:
+ case ARM::t2TBH_JT: {
// These are jumptable branches, i.e. a branch followed by an inlined
// jumptable. The size is 4 + 4 * number of entries. For TBB, each
// entry is one byte; TBH two byte each.
- unsigned EntrySize = (Opc == ARM::t2TBB)
- ? 1 : ((Opc == ARM::t2TBH) ? 2 : 4);
+ unsigned EntrySize = (Opc == ARM::t2TBB_JT)
+ ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
unsigned NumOps = TID.getNumOperands();
MachineOperand JTOP =
MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
@@ -557,7 +605,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
// alignment issue.
unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
unsigned NumEntries = getNumJTEntries(JT, JTI);
- if (Opc == ARM::t2TBB && (NumEntries & 1))
+ if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
// Make sure the instruction that follows TBB is 2-byte aligned.
// FIXME: Constant island pass should insert an "ALIGN" instruction
// instead.
@@ -573,84 +621,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 0; // Not reached
}
-unsigned
-ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case ARM::LDR:
- case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isReg() &&
- MI->getOperand(3).isImm() &&
- MI->getOperand(2).getReg() == 0 &&
- MI->getOperand(3).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- case ARM::t2LDRi12:
- case ARM::tRestore:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- case ARM::VLDRD:
- case ARM::VLDRS:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
-
- return 0;
-}
-
-unsigned
-ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case ARM::STR:
- case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isReg() &&
- MI->getOperand(3).isImm() &&
- MI->getOperand(2).getReg() == 0 &&
- MI->getOperand(3).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- case ARM::t2STRi12:
- case ARM::tSpill:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- case ARM::VSTRD:
- case ARM::VSTRS:
- if (MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
-
- return 0;
-}
-
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -715,8 +685,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- MachineMemOperand::MOStore, 0,
+ MF.getMachineMemOperand(MachinePointerInfo(
+ PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
Align);
@@ -728,9 +699,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
switch (RC->getID()) {
case ARM::GPRRegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
.addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
break;
case ARM::SPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
@@ -747,17 +718,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case ARM::QPRRegClassID:
case ARM::QPR_VFP2RegClassID:
case ARM::QPR_8RegClassID:
- // FIXME: Neon instructions should support predicates
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
.addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addMemOperand(MMO));
}
break;
@@ -766,18 +735,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST1d64Q))
- .addFrameIndex(FI).addImm(16);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
- MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
- AddDefaultPred(MIB.addMemOperand(MMO));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ .addFrameIndex(FI).addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
} else {
MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
- .addFrameIndex(FI)
- .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
@@ -787,9 +752,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case ARM::QQQQPRRegClassID: {
MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
- .addFrameIndex(FI)
- .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
@@ -806,6 +770,53 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
}
}
+unsigned
+ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::STRrs:
+ case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::STRi12:
+ case ARM::t2STRi12:
+ case ARM::tSpill:
+ case ARM::VSTRD:
+ case ARM::VSTRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VST1q64Pseudo:
+ if (MI->getOperand(0).isFI() &&
+ MI->getOperand(2).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ break;
+ case ARM::VSTMQIA:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+
+ return 0;
+}
+
void ARMBaseInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
@@ -817,8 +828,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- MachineMemOperand::MOLoad, 0,
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
Align);
@@ -830,8 +842,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
switch (RC->getID()) {
case ARM::GPRRegClassID:
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
- .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
break;
case ARM::SPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
@@ -846,31 +858,26 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case ARM::QPRRegClassID:
case ARM::QPR_VFP2RegClassID:
case ARM::QPR_8RegClassID:
- if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
+ if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
.addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addMemOperand(MMO));
}
break;
case ARM::QQPRRegClassID:
case ARM::QQPR_VFP2RegClassID:
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD1d64Q));
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- AddDefaultPred(MIB.addFrameIndex(FI).addImm(16).addMemOperand(MMO));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ .addFrameIndex(FI).addImm(16)
+ .addMemOperand(MMO));
} else {
MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
- .addFrameIndex(FI)
- .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
@@ -880,9 +887,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case ARM::QQQQPRRegClassID: {
MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
- .addFrameIndex(FI)
- .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
@@ -899,6 +905,53 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
}
}
+unsigned
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::LDRi12:
+ case ARM::t2LDRi12:
+ case ARM::tRestore:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VLD1q64Pseudo:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::VLDMQIA:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(0).getSubReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+
+ return 0;
+}
+
MachineInstr*
ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -921,7 +974,7 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
ARMConstantPoolValue *ACPV =
static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
- unsigned PCLabelId = AFI->createConstPoolEntryUId();
+ unsigned PCLabelId = AFI->createPICLabelUId();
ARMConstantPoolValue *NewCPV = 0;
// FIXME: The below assumes PIC relocation model and that the function
// is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
@@ -991,12 +1044,18 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
}
bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
- const MachineInstr *MI1) const {
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
int Opcode = MI0->getOpcode();
if (Opcode == ARM::t2LDRpci ||
Opcode == ARM::t2LDRpci_pic ||
Opcode == ARM::tLDRpci ||
- Opcode == ARM::tLDRpci_pic) {
+ Opcode == ARM::tLDRpci_pic ||
+ Opcode == ARM::MOV_ga_dyn ||
+ Opcode == ARM::MOV_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel_ldr ||
+ Opcode == ARM::t2MOV_ga_dyn ||
+ Opcode == ARM::t2MOV_ga_pcrel) {
if (MI1->getOpcode() != Opcode)
return false;
if (MI0->getNumOperands() != MI1->getNumOperands())
@@ -1007,6 +1066,14 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
if (MO0.getOffset() != MO1.getOffset())
return false;
+ if (Opcode == ARM::MOV_ga_dyn ||
+ Opcode == ARM::MOV_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel_ldr ||
+ Opcode == ARM::t2MOV_ga_dyn ||
+ Opcode == ARM::t2MOV_ga_pcrel)
+ // Ignore the PC labels.
+ return MO0.getGlobal() == MO1.getGlobal();
+
const MachineFunction *MF = MI0->getParent()->getParent();
const MachineConstantPool *MCP = MF->getConstantPool();
int CPI0 = MO0.getIndex();
@@ -1018,6 +1085,37 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
ARMConstantPoolValue *ACPV1 =
static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
return ACPV0->hasSameValue(ACPV1);
+ } else if (Opcode == ARM::PICLDR) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ unsigned Addr0 = MI0->getOperand(1).getReg();
+ unsigned Addr1 = MI1->getOperand(1).getReg();
+ if (Addr0 != Addr1) {
+ if (!MRI ||
+ !TargetRegisterInfo::isVirtualRegister(Addr0) ||
+ !TargetRegisterInfo::isVirtualRegister(Addr1))
+ return false;
+
+ // This assumes SSA form.
+ MachineInstr *Def0 = MRI->getVRegDef(Addr0);
+ MachineInstr *Def1 = MRI->getVRegDef(Addr1);
+ // Check if the loaded value, e.g. a constantpool of a global address, are
+ // the same.
+ if (!produceSameValue(Def0, Def1, MRI))
+ return false;
+ }
+
+ for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
+ // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
+ const MachineOperand &MO0 = MI0->getOperand(i);
+ const MachineOperand &MO1 = MI1->getOperand(i);
+ if (!MO0.isIdenticalTo(MO1))
+ return false;
+ }
+ return true;
}
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
@@ -1040,8 +1138,8 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
switch (Load1->getMachineOpcode()) {
default:
return false;
- case ARM::LDR:
- case ARM::LDRB:
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
case ARM::LDRD:
case ARM::LDRH:
case ARM::LDRSB:
@@ -1059,8 +1157,8 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
switch (Load2->getMachineOpcode()) {
default:
return false;
- case ARM::LDR:
- case ARM::LDRB:
+ case ARM::LDRi12:
+ case ARM::LDRBi12:
case ARM::LDRD:
case ARM::LDRH:
case ARM::LDRSB:
@@ -1164,22 +1262,37 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
return false;
}
-bool ARMBaseInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const {
- if (!NumInstrs)
+bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ float Probability,
+ float Confidence) const {
+ if (!NumCyles)
return false;
- if (Subtarget.getCPUString() == "generic")
- // Generic (and overly aggressive) if-conversion limits for testing.
- return NumInstrs <= 10;
- else if (Subtarget.hasV7Ops())
- return NumInstrs <= 3;
- return NumInstrs <= 2;
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ float UnpredCost = Probability * NumCyles;
+ UnpredCost += 1.0; // The branch itself
+ UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
+
+ return (float)(NumCyles + ExtraPredCycles) < UnpredCost;
}
-
+
bool ARMBaseInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
- MachineBasicBlock &FMBB, unsigned NumF) const {
- return NumT && NumF && NumT <= 2 && NumF <= 2;
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned TCycles, unsigned TExtra,
+ MachineBasicBlock &FMBB,
+ unsigned FCycles, unsigned FExtra,
+ float Probability, float Confidence) const {
+ if (!TCycles || !FCycles)
+ return false;
+
+ // Attempt to estimate the relative costs of predication versus branching.
+ float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles;
+ UnpredCost += 1.0; // The branch itself
+ UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
+
+ return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost;
}
/// getInstrPredicate - If instruction is predicated, returns its predicate
@@ -1292,6 +1405,12 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned NumBits = 0;
unsigned Scale = 1;
switch (AddrMode) {
+ case ARMII::AddrMode_i12: {
+ ImmIdx = FrameRegIdx + 1;
+ InstrOffs = MI.getOperand(ImmIdx).getImm();
+ NumBits = 12;
+ break;
+ }
case ARMII::AddrMode2: {
ImmIdx = FrameRegIdx+2;
InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
@@ -1342,8 +1461,15 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
if ((unsigned)Offset <= Mask * Scale) {
// Replace the FrameIndex with sp
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- if (isSub)
- ImmedOffset |= 1 << NumBits;
+ // FIXME: When addrmode2 goes away, this will simplify (like the
+ // T2 version), as the LDR.i12 versions don't need the encoding
+ // tricks for the offset value.
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode_i12)
+ ImmedOffset = -ImmedOffset;
+ else
+ ImmedOffset |= 1 << NumBits;
+ }
ImmOp.ChangeToImmediate(ImmedOffset);
Offset = 0;
return true;
@@ -1351,8 +1477,12 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Otherwise, it didn't fit. Pull in what we can to simplify the immed.
ImmedOffset = ImmedOffset & Mask;
- if (isSub)
- ImmedOffset |= 1 << NumBits;
+ if (isSub) {
+ if (AddrMode == ARMII::AddrMode_i12)
+ ImmedOffset = -ImmedOffset;
+ else
+ ImmedOffset |= 1 << NumBits;
+ }
ImmOp.ChangeToImmediate(ImmedOffset);
Offset &= ~(Mask*Scale);
}
@@ -1363,25 +1493,88 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
}
bool ARMBaseInstrInfo::
-AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpValue) const {
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
+ int &CmpValue) const {
switch (MI->getOpcode()) {
default: break;
case ARM::CMPri:
- case ARM::CMPzri:
case ARM::t2CMPri:
- case ARM::t2CMPzri:
SrcReg = MI->getOperand(0).getReg();
+ CmpMask = ~0;
CmpValue = MI->getOperand(1).getImm();
return true;
+ case ARM::TSTri:
+ case ARM::t2TSTri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpMask = MI->getOperand(1).getImm();
+ CmpValue = 0;
+ return true;
}
return false;
}
-/// ConvertToSetZeroFlag - Convert the instruction to set the "zero" flag so
-/// that we can remove a "comparison with zero".
+/// isSuitableForMask - Identify a suitable 'and' instruction that
+/// operates on the given source register and applies the same mask
+/// as a 'tst' instruction. Provide a limited look-through for copies.
+/// When successful, MI will hold the found instruction.
+static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
+ int CmpMask, bool CommonUse) {
+ switch (MI->getOpcode()) {
+ case ARM::ANDri:
+ case ARM::t2ANDri:
+ if (CmpMask != MI->getOperand(2).getImm())
+ return false;
+ if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
+ return true;
+ break;
+ case ARM::COPY: {
+ // Walk down one instruction which is potentially an 'and'.
+ const MachineInstr &Copy = *MI;
+ MachineBasicBlock::iterator AND(
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ if (AND == MI->getParent()->end()) return false;
+ MI = AND;
+ return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
+ CmpMask, true);
+ }
+ }
+
+ return false;
+}
+
+/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register.
bool ARMBaseInstrInfo::
-ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
+OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
+ int CmpValue, const MachineRegisterInfo *MRI) const {
+ if (CmpValue != 0)
+ return false;
+
+ MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
+ if (llvm::next(DI) != MRI->def_end())
+ // Only support one definition.
+ return false;
+
+ MachineInstr *MI = &*DI;
+
+ // Masked compares sometimes use the same register as the corresponding 'and'.
+ if (CmpMask != ~0) {
+ if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
+ MI = 0;
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ if (UI->getParent() != CmpInstr->getParent()) continue;
+ MachineInstr *PotentialAND = &*UI;
+ if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
+ continue;
+ MI = PotentialAND;
+ break;
+ }
+ if (!MI) return false;
+ }
+ }
+
// Conservatively refuse to convert an instruction which isn't in the same BB
// as the comparison.
if (MI->getParent() != CmpInstr->getParent())
@@ -1391,16 +1584,20 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
// want to change.
MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
B = MI->getParent()->begin();
+
+ // Early exit if CmpInstr is at the beginning of the BB.
+ if (I == B) return false;
+
--I;
for (; I != E; --I) {
const MachineInstr &Instr = *I;
for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
const MachineOperand &MO = Instr.getOperand(IO);
- if (!MO.isReg() || !MO.isDef()) continue;
+ if (!MO.isReg()) continue;
- // This instruction modifies CPSR before the one we want to change. We
- // can't do this transformation.
+ // This instruction modifies or uses CPSR after the one we want to
+ // change. We can't do this transformation.
if (MO.getReg() == ARM::CPSR)
return false;
}
@@ -1414,15 +1611,713 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
switch (MI->getOpcode()) {
default: break;
case ARM::ADDri:
+ case ARM::ANDri:
+ case ARM::t2ANDri:
case ARM::SUBri:
case ARM::t2ADDri:
case ARM::t2SUBri:
- MI->RemoveOperand(5);
- MachineInstrBuilder(MI)
- .addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
+ // Toggle the optional operand to CPSR.
+ MI->getOperand(5).setReg(ARM::CPSR);
+ MI->getOperand(5).setIsDef(true);
CmpInstr->eraseFromParent();
return true;
}
return false;
}
+
+bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
+ MachineInstr *DefMI, unsigned Reg,
+ MachineRegisterInfo *MRI) const {
+ // Fold large immediates into add, sub, or, xor.
+ unsigned DefOpc = DefMI->getOpcode();
+ if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
+ return false;
+ if (!DefMI->getOperand(1).isImm())
+ // Could be t2MOVi32imm <ga:xx>
+ return false;
+
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ unsigned UseOpc = UseMI->getOpcode();
+ unsigned NewUseOpc = 0;
+ uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
+ uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
+ bool Commute = false;
+ switch (UseOpc) {
+ default: return false;
+ case ARM::SUBrr:
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr:
+ case ARM::t2SUBrr:
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ Commute = UseMI->getOperand(2).getReg() != Reg;
+ switch (UseOpc) {
+ default: break;
+ case ARM::SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::SUBri;
+ // Fallthrough
+ }
+ case ARM::ADDrr:
+ case ARM::ORRrr:
+ case ARM::EORrr: {
+ if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
+ case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
+ case ARM::EORrr: NewUseOpc = ARM::EORri; break;
+ }
+ break;
+ }
+ case ARM::t2SUBrr: {
+ if (Commute)
+ return false;
+ ImmVal = -ImmVal;
+ NewUseOpc = ARM::t2SUBri;
+ // Fallthrough
+ }
+ case ARM::t2ADDrr:
+ case ARM::t2ORRrr:
+ case ARM::t2EORrr: {
+ if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
+ return false;
+ SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
+ SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
+ switch (UseOpc) {
+ default: break;
+ case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
+ case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
+ case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ unsigned OpIdx = Commute ? 2 : 1;
+ unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
+ bool isKill = UseMI->getOperand(OpIdx).isKill();
+ unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+ AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
+ *UseMI, UseMI->getDebugLoc(),
+ get(NewUseOpc), NewReg)
+ .addReg(Reg1, getKillRegState(isKill))
+ .addImm(SOImmValV1)));
+ UseMI->setDesc(get(NewUseOpc));
+ UseMI->getOperand(1).setReg(NewReg);
+ UseMI->getOperand(1).setIsKill();
+ UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
+ DefMI->eraseFromParent();
+ return true;
+}
+
+unsigned
+ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned Class = Desc.getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps)
+ return UOps;
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unexpected multi-uops instruction!");
+ break;
+ case ARM::VLDMQIA:
+ case ARM::VLDMQDB:
+ case ARM::VSTMQIA:
+ case ARM::VSTMQDB:
+ return 2;
+
+ // The number of uOps for load / store multiple are determined by the number
+ // registers.
+ //
+ // On Cortex-A8, each pair of register loads / stores can be scheduled on the
+ // same cycle. The scheduling for the first load / store must be done
+ // separately by assuming the the address is not 64-bit aligned.
+ //
+ // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
+ // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
+ // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
+ case ARM::VLDMDIA:
+ case ARM::VLDMDDB:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSDB:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDDB:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSDB:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
+ return (NumRegs / 2) + (NumRegs % 2) + 1;
+ }
+
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA:
+ case ARM::tSTMIA_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::tPUSH:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
+ unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
+ if (Subtarget.isCortexA8()) {
+ if (NumRegs < 4)
+ return 2;
+ // 4 registers would be issued: 2, 2.
+ // 5 registers would be issued: 2, 2, 1.
+ UOps = (NumRegs / 2);
+ if (NumRegs % 2)
+ ++UOps;
+ return UOps;
+ } else if (Subtarget.isCortexA9()) {
+ UOps = (NumRegs / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((NumRegs % 2) ||
+ !MI->hasOneMemOperand() ||
+ (*MI->memoperands_begin())->getAlignment() < 8)
+ ++UOps;
+ return UOps;
+ } else {
+ // Assume the worst.
+ return NumRegs;
+ }
+ }
+ }
+}
+
+int
+ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8()) {
+ // (regno / 2) + (regno % 2) + 1
+ DefCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++DefCycle;
+ } else if (Subtarget.isCortexA9()) {
+ DefCycle = RegNo;
+ bool isSLoad = false;
+
+ switch (DefTID.getOpcode()) {
+ default: break;
+ case ARM::VLDMSIA:
+ case ARM::VLDMSDB:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ isSLoad = true;
+ break;
+ }
+
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
+ ++DefCycle;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const {
+ int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ // Def is the address writeback.
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+
+ int DefCycle;
+ if (Subtarget.isCortexA8()) {
+ // 4 registers would be issued: 1, 2, 1.
+ // 5 registers would be issued: 1, 2, 2.
+ DefCycle = RegNo / 2;
+ if (DefCycle < 1)
+ DefCycle = 1;
+ // Result latency is issue cycle + 2: E2.
+ DefCycle += 2;
+ } else if (Subtarget.isCortexA9()) {
+ DefCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || DefAlign < 8)
+ ++DefCycle;
+ // Result latency is AGU cycles + 2.
+ DefCycle += 2;
+ } else {
+ // Assume the worst.
+ DefCycle = RegNo + 2;
+ }
+
+ return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &UseTID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8()) {
+ // (regno / 2) + (regno % 2) + 1
+ UseCycle = RegNo / 2 + 1;
+ if (RegNo % 2)
+ ++UseCycle;
+ } else if (Subtarget.isCortexA9()) {
+ UseCycle = RegNo;
+ bool isSStore = false;
+
+ switch (UseTID.getOpcode()) {
+ default: break;
+ case ARM::VSTMSIA:
+ case ARM::VSTMSDB:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ isSStore = true;
+ break;
+ }
+
+ // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+ // then it takes an extra cycle.
+ if ((isSStore && (RegNo % 2)) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = RegNo + 2;
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &UseTID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const {
+ int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+ if (RegNo <= 0)
+ return ItinData->getOperandCycle(UseClass, UseIdx);
+
+ int UseCycle;
+ if (Subtarget.isCortexA8()) {
+ UseCycle = RegNo / 2;
+ if (UseCycle < 2)
+ UseCycle = 2;
+ // Read in E3.
+ UseCycle += 2;
+ } else if (Subtarget.isCortexA9()) {
+ UseCycle = (RegNo / 2);
+ // If there are odd number of registers or if it's not 64-bit aligned,
+ // then it takes an extra AGU (Address Generation Unit) cycle.
+ if ((RegNo % 2) || UseAlign < 8)
+ ++UseCycle;
+ } else {
+ // Assume the worst.
+ UseCycle = 1;
+ }
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefIdx, unsigned DefAlign,
+ const TargetInstrDesc &UseTID,
+ unsigned UseIdx, unsigned UseAlign) const {
+ unsigned DefClass = DefTID.getSchedClass();
+ unsigned UseClass = UseTID.getSchedClass();
+
+ if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands())
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+
+ // This may be a def / use of a variable_ops instruction, the operand
+ // latency might be determinable dynamically. Let the target try to
+ // figure it out.
+ int DefCycle = -1;
+ bool LdmBypass = false;
+ switch (DefTID.getOpcode()) {
+ default:
+ DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ break;
+
+ case ARM::VLDMDIA:
+ case ARM::VLDMDDB:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSDB:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMSDB_UPD:
+ DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+ break;
+
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tPUSH:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ LdmBypass = 1;
+ DefCycle = getLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+ break;
+ }
+
+ if (DefCycle == -1)
+ // We can't seem to determine the result latency of the def, assume it's 2.
+ DefCycle = 2;
+
+ int UseCycle = -1;
+ switch (UseTID.getOpcode()) {
+ default:
+ UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
+ break;
+
+ case ARM::VSTMDIA:
+ case ARM::VSTMDDB:
+ case ARM::VSTMDIA_UPD:
+ case ARM::VSTMDDB_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSDB:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VSTMSDB_UPD:
+ UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+ break;
+
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::STMIA_UPD:
+ case ARM::STMDA_UPD:
+ case ARM::STMDB_UPD:
+ case ARM::STMIB_UPD:
+ case ARM::tSTMIA:
+ case ARM::tSTMIA_UPD:
+ case ARM::tPOP_RET:
+ case ARM::tPOP:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ UseCycle = getSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+ break;
+ }
+
+ if (UseCycle == -1)
+ // Assume it's read in the first stage.
+ UseCycle = 1;
+
+ UseCycle = DefCycle - UseCycle + 1;
+ if (UseCycle > 0) {
+ if (LdmBypass) {
+ // It's a variable_ops instruction so we can't use DefIdx here. Just use
+ // first def operand.
+ if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1,
+ UseClass, UseIdx))
+ --UseCycle;
+ } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
+ UseClass, UseIdx)) {
+ --UseCycle;
+ }
+ }
+
+ return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef())
+ return 1;
+
+ const TargetInstrDesc &DefTID = DefMI->getDesc();
+ if (!ItinData || ItinData->isEmpty())
+ return DefTID.mayLoad() ? 3 : 1;
+
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ if (DefMO.getReg() == ARM::CPSR) {
+ if (DefMI->getOpcode() == ARM::FMSTAT) {
+ // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
+ return Subtarget.isCortexA9() ? 1 : 20;
+ }
+
+ // CPSR set and branch can be paired in the same cycle.
+ if (UseTID.isBranch())
+ return 0;
+ }
+
+ unsigned DefAlign = DefMI->hasOneMemOperand()
+ ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+ unsigned UseAlign = UseMI->hasOneMemOperand()
+ ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+ int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+ UseTID, UseIdx, UseAlign);
+
+ if (Latency > 1 &&
+ (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefTID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal = DefMI->getOperand(3).getImm();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt = DefMI->getOperand(3).getImm();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Latency;
+ break;
+ }
+ }
+ }
+
+ return Latency;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!DefNode->isMachineOpcode())
+ return 1;
+
+ const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
+
+ if (isZeroCost(DefTID.Opcode))
+ return 0;
+
+ if (!ItinData || ItinData->isEmpty())
+ return DefTID.mayLoad() ? 3 : 1;
+
+ if (!UseNode->isMachineOpcode()) {
+ int Latency = ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx);
+ if (Subtarget.isCortexA9())
+ return Latency <= 2 ? 1 : Latency - 1;
+ else
+ return Latency <= 3 ? 1 : Latency - 2;
+ }
+
+ const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode());
+ const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
+ unsigned DefAlign = !DefMN->memoperands_empty()
+ ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+ const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
+ unsigned UseAlign = !UseMN->memoperands_empty()
+ ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+ int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+ UseTID, UseIdx, UseAlign);
+
+ if (Latency > 1 &&
+ (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+ // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+ // variants are one cycle cheaper.
+ switch (DefTID.getOpcode()) {
+ default: break;
+ case ARM::LDRrs:
+ case ARM::LDRBrs: {
+ unsigned ShOpVal =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+ if (ShImm == 0 ||
+ (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+ --Latency;
+ break;
+ }
+ case ARM::t2LDRs:
+ case ARM::t2LDRBs:
+ case ARM::t2LDRHs:
+ case ARM::t2LDRSHs: {
+ // Thumb2 mode: lsl only.
+ unsigned ShAmt =
+ cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+ if (ShAmt == 0 || ShAmt == 2)
+ --Latency;
+ break;
+ }
+ }
+ }
+
+ return Latency;
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned Class = TID.getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR))
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ *PredCost = 1;
+ if (UOps)
+ return ItinData->getStageLatency(Class);
+ return getNumMicroOps(ItinData, MI);
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const {
+ if (!Node->isMachineOpcode())
+ return 1;
+
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Opcode = Node->getMachineOpcode();
+ switch (Opcode) {
+ default:
+ return ItinData->getStageLatency(get(Opcode).getSchedClass());
+ case ARM::VLDMQIA:
+ case ARM::VLDMQDB:
+ case ARM::VSTMQIA:
+ case ARM::VSTMQDB:
+ return 2;
+ }
+}
+
+bool ARMBaseInstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (Subtarget.isCortexA8() &&
+ (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
+ // CortexA8 VFP instructions are not pipelined.
+ return true;
+
+ // Hoist VFP / NEON instructions with 4 or higher latency.
+ int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ if (Latency <= 3)
+ return false;
+ return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
+ UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
+}
+
+bool ARMBaseInstrInfo::
+hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (DDomain == ARMII::DomainGeneral) {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 2);
+ }
+ return false;
+}
+
+bool
+ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+ unsigned &AddSubOpc,
+ bool &NegAcc, bool &HasLane) const {
+ DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
+ if (I == MLxEntryMap.end())
+ return false;
+
+ const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
+ MulOpc = Entry.MulOpc;
+ AddSubOpc = Entry.AddSubOpc;
+ NegAcc = Entry.NegAcc;
+ HasLane = Entry.HasLane;
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index b4f4a33..1fb8872 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -17,6 +17,8 @@
#include "ARM.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
namespace llvm {
class ARMSubtarget;
@@ -33,7 +35,7 @@ namespace ARMII {
//===------------------------------------------------------------------===//
// This four-bit field describes the addressing mode used.
- AddrModeMask = 0xf,
+ AddrModeMask = 0x1f,
AddrModeNone = 0,
AddrMode1 = 1,
AddrMode2 = 2,
@@ -50,9 +52,10 @@ namespace ARMII {
AddrModeT2_so = 13,
AddrModeT2_pc = 14, // +/- i12 for pc relative data
AddrModeT2_i8s4 = 15, // i8 * 4
+ AddrMode_i12 = 16,
// Size* - Flags to keep track of the size of an instruction.
- SizeShift = 4,
+ SizeShift = 5,
SizeMask = 7 << SizeShift,
SizeSpecial = 1, // 0 byte pseudo or special case.
Size8Bytes = 2,
@@ -61,7 +64,7 @@ namespace ARMII {
// IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
// and store ops only. Generic "updating" flag is used for ld/st multiple.
- IndexModeShift = 7,
+ IndexModeShift = 8,
IndexModeMask = 3 << IndexModeShift,
IndexModePre = 1,
IndexModePost = 2,
@@ -70,7 +73,7 @@ namespace ARMII {
//===------------------------------------------------------------------===//
// Instruction encoding formats.
//
- FormShift = 9,
+ FormShift = 10,
FormMask = 0x3f << FormShift,
// Pseudo instructions
@@ -143,15 +146,15 @@ namespace ARMII {
// UnaryDP - Indicates this is a unary data processing instruction, i.e.
// it doesn't have a Rn operand.
- UnaryDP = 1 << 15,
+ UnaryDP = 1 << 16,
// Xform16Bit - Indicates this Thumb2 instruction may be transformed into
// a 16-bit Thumb instruction if certain conditions are met.
- Xform16Bit = 1 << 16,
+ Xform16Bit = 1 << 17,
//===------------------------------------------------------------------===//
// Code domain.
- DomainShift = 17,
+ DomainShift = 18,
DomainMask = 3 << DomainShift,
DomainGeneral = 0 << DomainShift,
DomainVFP = 1 << DomainShift,
@@ -160,6 +163,11 @@ namespace ARMII {
//===------------------------------------------------------------------===//
// Field shifts - such shifts are used to set field while generating
// machine instructions.
+ //
+ // FIXME: This list will need adjusting/fixing as the MC code emitter
+ // takes shape and the ARMCodeEmitter.cpp bits go away.
+ ShiftTypeShift = 4,
+
M_BitShift = 5,
ShiftImmShift = 5,
ShiftShift = 7,
@@ -181,29 +189,15 @@ namespace ARMII {
I_BitShift = 25,
CondShift = 28
};
-
- /// Target Operand Flag enum.
- enum TOF {
- //===------------------------------------------------------------------===//
- // ARM Specific MachineOperand flags.
-
- MO_NO_FLAG,
-
- /// MO_LO16 - On a symbol operand, this represents a relocation containing
- /// lower 16 bit of the address. Used only via movw instruction.
- MO_LO16,
-
- /// MO_HI16 - On a symbol operand, this represents a relocation containing
- /// higher 16 bit of the address. Used only via movt instruction.
- MO_HI16
- };
}
class ARMBaseInstrInfo : public TargetInstrInfoImpl {
const ARMSubtarget &Subtarget;
+
protected:
// Can be only subclassed.
explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+
public:
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
@@ -216,10 +210,13 @@ public:
virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
const ARMSubtarget &getSubtarget() const { return Subtarget; }
- bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const;
// Branch analysis.
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
@@ -301,7 +298,8 @@ public:
MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
virtual bool produceSameValue(const MachineInstr *MI0,
- const MachineInstr *MI1) const;
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const;
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
/// determine if two loads are loading from the same base address. It should
@@ -328,26 +326,117 @@ public:
const MachineFunction &MF) const;
virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumInstrs) const;
+ unsigned NumCyles, unsigned ExtraPredCycles,
+ float Prob, float Confidence) const;
- virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,unsigned NumT,
- MachineBasicBlock &FMBB,unsigned NumF) const;
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ float Probability, float Confidence) const;
virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
- unsigned NumInstrs) const {
- return NumInstrs && NumInstrs == 1;
+ unsigned NumCyles,
+ float Probability,
+ float Confidence) const {
+ return NumCyles == 1;
}
/// AnalyzeCompare - For a comparison instruction, return the source register
/// in SrcReg and the value it compares against in CmpValue. Return true if
/// the comparison instruction can be analyzed.
virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
- int &CmpValue) const;
+ int &CmpMask, int &CmpValue) const;
- /// ConvertToSetZeroFlag - Convert the instruction to set the zero flag so
+ /// OptimizeCompareInstr - Convert the instruction to set the zero flag so
/// that we can remove a "comparison with zero".
- virtual bool ConvertToSetZeroFlag(MachineInstr *Instr,
- MachineInstr *CmpInstr) const;
+ virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const;
+
+ /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+ /// instruction, try to fold the immediate into the use instruction.
+ virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const;
+
+ virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const;
+
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ virtual
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const;
+private:
+ int getVLDMDefCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const;
+ int getLDMDefCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefClass,
+ unsigned DefIdx, unsigned DefAlign) const;
+ int getVSTMUseCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &UseTID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const;
+ int getSTMUseCycle(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &UseTID,
+ unsigned UseClass,
+ unsigned UseIdx, unsigned UseAlign) const;
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const TargetInstrDesc &DefTID,
+ unsigned DefIdx, unsigned DefAlign,
+ const TargetInstrDesc &UseTID,
+ unsigned UseIdx, unsigned UseAlign) const;
+
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI, unsigned *PredCost = 0) const;
+
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+ bool hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx) const;
+
+private:
+ /// Modeling special VFP / NEON fp MLA / MLS hazards.
+
+ /// MLxEntryMap - Map fp MLA / MLS to the corresponding entry in the internal
+ /// MLx table.
+ DenseMap<unsigned, unsigned> MLxEntryMap;
+
+ /// MLxHazardOpcodes - Set of add / sub and multiply opcodes that would cause
+ /// stalls when scheduled together with fp MLA / MLS opcodes.
+ SmallSet<unsigned, 16> MLxHazardOpcodes;
+
+public:
+ /// isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS
+ /// instruction.
+ bool isFpMLxInstruction(unsigned Opcode) const {
+ return MLxEntryMap.count(Opcode);
+ }
+
+ /// isFpMLxInstruction - This version also returns the multiply opcode and the
+ /// addition / subtraction opcode to expand to. Return true for 'HasLane' for
+ /// the MLX instructions with an extra lane operand.
+ bool isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+ unsigned &AddSubOpc, bool &NegAcc,
+ bool &HasLane) const;
+
+ /// canCauseFpMLxStall - Return true if an instruction of the specified opcode
+ /// will cause stalls when scheduled after (within 4-cycle window) a fp
+ /// MLA / MLS instruction.
+ bool canCauseFpMLxStall(unsigned Opcode) const {
+ return MLxHazardOpcodes.count(Opcode);
+ }
};
static inline
@@ -389,7 +478,7 @@ bool isJumpTableBranchOpcode(int Opc) {
static inline
bool isIndirectBranchOpcode(int Opc) {
- return Opc == ARM::BRIND || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
+ return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
}
/// getInstrPredicate - If instruction is predicated, returns its predicate
@@ -413,6 +502,12 @@ void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
const ARMBaseInstrInfo &TII);
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ DebugLoc dl);
/// rewriteARMFrameIndex / rewriteT2FrameIndex -
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index eceafad..67a4b7d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -15,6 +15,7 @@
#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
+#include "ARMFrameLowering.h"
#include "ARMInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
@@ -32,120 +33,25 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CommandLine.h"
-namespace llvm {
+using namespace llvm;
+
static cl::opt<bool>
ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
cl::desc("Force use of virtual base registers for stack load/store"));
static cl::opt<bool>
EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
cl::desc("Enable pre-regalloc stack frame index allocation"));
-}
-
-using namespace llvm;
-
static cl::opt<bool>
EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
-unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
- bool *isSPVFP) {
- if (isSPVFP)
- *isSPVFP = false;
-
- using namespace ARM;
- switch (RegEnum) {
- default:
- llvm_unreachable("Unknown ARM register!");
- case R0: case D0: case Q0: return 0;
- case R1: case D1: case Q1: return 1;
- case R2: case D2: case Q2: return 2;
- case R3: case D3: case Q3: return 3;
- case R4: case D4: case Q4: return 4;
- case R5: case D5: case Q5: return 5;
- case R6: case D6: case Q6: return 6;
- case R7: case D7: case Q7: return 7;
- case R8: case D8: case Q8: return 8;
- case R9: case D9: case Q9: return 9;
- case R10: case D10: case Q10: return 10;
- case R11: case D11: case Q11: return 11;
- case R12: case D12: case Q12: return 12;
- case SP: case D13: case Q13: return 13;
- case LR: case D14: case Q14: return 14;
- case PC: case D15: case Q15: return 15;
-
- case D16: return 16;
- case D17: return 17;
- case D18: return 18;
- case D19: return 19;
- case D20: return 20;
- case D21: return 21;
- case D22: return 22;
- case D23: return 23;
- case D24: return 24;
- case D25: return 25;
- case D26: return 26;
- case D27: return 27;
- case D28: return 28;
- case D29: return 29;
- case D30: return 30;
- case D31: return 31;
-
- case S0: case S1: case S2: case S3:
- case S4: case S5: case S6: case S7:
- case S8: case S9: case S10: case S11:
- case S12: case S13: case S14: case S15:
- case S16: case S17: case S18: case S19:
- case S20: case S21: case S22: case S23:
- case S24: case S25: case S26: case S27:
- case S28: case S29: case S30: case S31: {
- if (isSPVFP)
- *isSPVFP = true;
- switch (RegEnum) {
- default: return 0; // Avoid compile time warning.
- case S0: return 0;
- case S1: return 1;
- case S2: return 2;
- case S3: return 3;
- case S4: return 4;
- case S5: return 5;
- case S6: return 6;
- case S7: return 7;
- case S8: return 8;
- case S9: return 9;
- case S10: return 10;
- case S11: return 11;
- case S12: return 12;
- case S13: return 13;
- case S14: return 14;
- case S15: return 15;
- case S16: return 16;
- case S17: return 17;
- case S18: return 18;
- case S19: return 19;
- case S20: return 20;
- case S21: return 21;
- case S22: return 22;
- case S23: return 23;
- case S24: return 24;
- case S25: return 25;
- case S26: return 26;
- case S27: return 27;
- case S28: return 28;
- case S29: return 29;
- case S30: return 30;
- case S31: return 31;
- }
- }
- }
-}
-
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti)
: ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
@@ -180,12 +86,14 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
// FIXME: avoid re-calculating this everytime.
BitVector Reserved(getNumRegs());
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
Reserved.set(ARM::FPSCR);
- if (hasFP(MF))
+ if (TFI->hasFP(MF))
Reserved.set(FramePtr);
if (hasBasePointer(MF))
Reserved.set(BasePtr);
@@ -197,6 +105,8 @@ getReservedRegs(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
unsigned Reg) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
switch (Reg) {
default: break;
case ARM::SP:
@@ -208,7 +118,7 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
break;
case ARM::R7:
case ARM::R11:
- if (FramePtr == Reg && hasFP(MF))
+ if (FramePtr == Reg && TFI->hasFP(MF))
return true;
break;
case ARM::R9:
@@ -444,6 +354,7 @@ std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
unsigned HintType, unsigned HintReg,
const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
// Alternative register allocation orders when favoring even / odd registers
// of register pairs.
@@ -525,7 +436,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!hasFP(MF)) {
+ if (!TFI->hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPREven1,
GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
@@ -554,7 +465,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!hasFP(MF)) {
+ if (!TFI->hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPROdd1,
GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
@@ -606,7 +517,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
Hint.first == (unsigned)ARMRI::RegPairEven) &&
- Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+ TargetRegisterInfo::isVirtualRegister(Hint.second)) {
// If 'Reg' is one of the even / odd register pair and it's now changed
// (e.g. coalesced) into a different register. The other register of the
// pair allocation hint must be updated to reflect the relationship
@@ -619,23 +530,6 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
}
}
-/// hasFP - Return true if the specified function should have a dedicated frame
-/// pointer register. This is true if the function has variable sized allocas
-/// or if frame pointer elimination is disabled.
-///
-bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- if (STI.isTargetDarwin())
- return true;
-
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- // Always eliminate non-leaf frame pointers.
- return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
- needsStackRealignment(MF) ||
- MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken());
-}
-
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -681,7 +575,7 @@ bool ARMBaseRegisterInfo::
needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
F->hasFnAttr(Attribute::StackAlignment));
@@ -697,417 +591,19 @@ cannotEliminateFrame(const MachineFunction &MF) const {
|| needsStackRealignment(MF);
}
-/// estimateStackSize - Estimate and return the size of the frame.
-static unsigned estimateStackSize(MachineFunction &MF) {
- const MachineFrameInfo *FFI = MF.getFrameInfo();
- int Offset = 0;
- for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -FFI->getObjectOffset(i);
- if (FixedOff > Offset) Offset = FixedOff;
- }
- for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
- if (FFI->isDeadObjectIndex(i))
- continue;
- Offset += FFI->getObjectSize(i);
- unsigned Align = FFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
- }
- return (unsigned)Offset;
-}
-
-/// estimateRSStackSizeLimit - Look at each instruction that references stack
-/// frames and return the stack size limit beyond which some of these
-/// instructions will require a scratch register during their expansion later.
-unsigned
-ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
- const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned Limit = (1 << 12) - 1;
- for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
- for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- if (!I->getOperand(i).isFI()) continue;
-
- // When using ADDri to get the address of a stack object, 255 is the
- // largest offset guaranteed to fit in the immediate offset.
- if (I->getOpcode() == ARM::ADDri) {
- Limit = std::min(Limit, (1U << 8) - 1);
- break;
- }
-
- // Otherwise check the addressing mode.
- switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
- case ARMII::AddrMode3:
- case ARMII::AddrModeT2_i8:
- Limit = std::min(Limit, (1U << 8) - 1);
- break;
- case ARMII::AddrMode5:
- case ARMII::AddrModeT2_i8s4:
- Limit = std::min(Limit, ((1U << 8) - 1) * 4);
- break;
- case ARMII::AddrModeT2_i12:
- // i12 supports only positive offset so these will be converted to
- // i8 opcodes. See llvm::rewriteT2FrameIndex.
- if (hasFP(MF) && AFI->hasStackFrame())
- Limit = std::min(Limit, (1U << 8) - 1);
- break;
- case ARMII::AddrMode6:
- // Addressing mode 6 (load/store) instructions can't encode an
- // immediate offset for stack references.
- return 0;
- default:
- break;
- }
- break; // At most one FI per instruction
- }
- }
- }
-
- return Limit;
-}
-
-static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
- const ARMBaseInstrInfo &TII) {
- unsigned FnSize = 0;
- for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
- MBBI != E; ++MBBI) {
- const MachineBasicBlock &MBB = *MBBI;
- for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
- I != E; ++I)
- FnSize += TII.GetInstSizeInBytes(I);
- }
- return FnSize;
-}
-
-void
-ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- // This tells PEI to spill the FP as if it is any other callee-save register
- // to take advantage the eliminateFrameIndex machinery. This also ensures it
- // is spilled in the order specified by getCalleeSavedRegs() to make it easier
- // to combine multiple loads / stores.
- bool CanEliminateFrame = true;
- bool CS1Spilled = false;
- bool LRSpilled = false;
- unsigned NumGPRSpills = 0;
- SmallVector<unsigned, 4> UnspilledCS1GPRs;
- SmallVector<unsigned, 4> UnspilledCS2GPRs;
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Spill R4 if Thumb2 function requires stack realignment - it will be used as
- // scratch register.
- // FIXME: It will be better just to find spare register here.
- if (needsStackRealignment(MF) &&
- AFI->isThumb2Function())
- MF.getRegInfo().setPhysRegUsed(ARM::R4);
-
- // Spill LR if Thumb1 function uses variable length argument lists.
- if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0)
- MF.getRegInfo().setPhysRegUsed(ARM::LR);
-
- // Spill the BasePtr if it's used.
- if (hasBasePointer(MF))
- MF.getRegInfo().setPhysRegUsed(BasePtr);
-
- // Don't spill FP if the frame can be eliminated. This is determined
- // by scanning the callee-save registers to see if any is used.
- const unsigned *CSRegs = getCalleeSavedRegs();
- for (unsigned i = 0; CSRegs[i]; ++i) {
- unsigned Reg = CSRegs[i];
- bool Spilled = false;
- if (MF.getRegInfo().isPhysRegUsed(Reg)) {
- AFI->setCSRegisterIsSpilled(Reg);
- Spilled = true;
- CanEliminateFrame = false;
- } else {
- // Check alias registers too.
- for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) {
- if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
- Spilled = true;
- CanEliminateFrame = false;
- }
- }
- }
-
- if (!ARM::GPRRegisterClass->contains(Reg))
- continue;
-
- if (Spilled) {
- NumGPRSpills++;
-
- if (!STI.isTargetDarwin()) {
- if (Reg == ARM::LR)
- LRSpilled = true;
- CS1Spilled = true;
- continue;
- }
-
- // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
- switch (Reg) {
- case ARM::LR:
- LRSpilled = true;
- // Fallthrough
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- CS1Spilled = true;
- break;
- default:
- break;
- }
- } else {
- if (!STI.isTargetDarwin()) {
- UnspilledCS1GPRs.push_back(Reg);
- continue;
- }
-
- switch (Reg) {
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- UnspilledCS1GPRs.push_back(Reg);
- break;
- default:
- UnspilledCS2GPRs.push_back(Reg);
- break;
- }
- }
- }
-
- bool ForceLRSpill = false;
- if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
- unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
- // Force LR to be spilled if the Thumb function size is > 2048. This enables
- // use of BL to implement far jump. If it turns out that it's not needed
- // then the branch fix up path will undo it.
- if (FnSize >= (1 << 11)) {
- CanEliminateFrame = false;
- ForceLRSpill = true;
- }
- }
-
- // If any of the stack slot references may be out of range of an immediate
- // offset, make sure a register (or a spill slot) is available for the
- // register scavenger. Note that if we're indexing off the frame pointer, the
- // effective stack size is 4 bytes larger since the FP points to the stack
- // slot of the previous FP. Also, if we have variable sized objects in the
- // function, stack slot references will often be negative, and some of
- // our instructions are positive-offset only, so conservatively consider
- // that case to want a spill slot (or register) as well. Similarly, if
- // the function adjusts the stack pointer during execution and the
- // adjustments aren't already part of our stack size estimate, our offset
- // calculations may be off, so be conservative.
- // FIXME: We could add logic to be more precise about negative offsets
- // and which instructions will need a scratch register for them. Is it
- // worth the effort and added fragility?
- bool BigStack =
- (RS &&
- (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
- estimateRSStackSizeLimit(MF)))
- || MFI->hasVarSizedObjects()
- || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
-
- bool ExtraCSSpill = false;
- if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) {
- AFI->setHasStackFrame(true);
-
- // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
- // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
- if (!LRSpilled && CS1Spilled) {
- MF.getRegInfo().setPhysRegUsed(ARM::LR);
- AFI->setCSRegisterIsSpilled(ARM::LR);
- NumGPRSpills++;
- UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
- UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
- ForceLRSpill = false;
- ExtraCSSpill = true;
- }
-
- if (hasFP(MF)) {
- MF.getRegInfo().setPhysRegUsed(FramePtr);
- NumGPRSpills++;
- }
-
- // If stack and double are 8-byte aligned and we are spilling an odd number
- // of GPRs. Spill one extra callee save GPR so we won't have to pad between
- // the integer and double callee save areas.
- unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
- if (TargetAlign == 8 && (NumGPRSpills & 1)) {
- if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
- for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
- unsigned Reg = UnspilledCS1GPRs[i];
- // Don't spill high register if the function is thumb1
- if (!AFI->isThumb1OnlyFunction() ||
- isARMLowRegister(Reg) || Reg == ARM::LR) {
- MF.getRegInfo().setPhysRegUsed(Reg);
- AFI->setCSRegisterIsSpilled(Reg);
- if (!isReservedReg(MF, Reg))
- ExtraCSSpill = true;
- break;
- }
- }
- } else if (!UnspilledCS2GPRs.empty() &&
- !AFI->isThumb1OnlyFunction()) {
- unsigned Reg = UnspilledCS2GPRs.front();
- MF.getRegInfo().setPhysRegUsed(Reg);
- AFI->setCSRegisterIsSpilled(Reg);
- if (!isReservedReg(MF, Reg))
- ExtraCSSpill = true;
- }
- }
-
- // Estimate if we might need to scavenge a register at some point in order
- // to materialize a stack offset. If so, either spill one additional
- // callee-saved register or reserve a special spill slot to facilitate
- // register scavenging. Thumb1 needs a spill slot for stack pointer
- // adjustments also, even when the frame itself is small.
- if (BigStack && !ExtraCSSpill) {
- // If any non-reserved CS register isn't spilled, just spill one or two
- // extra. That should take care of it!
- unsigned NumExtras = TargetAlign / 4;
- SmallVector<unsigned, 2> Extras;
- while (NumExtras && !UnspilledCS1GPRs.empty()) {
- unsigned Reg = UnspilledCS1GPRs.back();
- UnspilledCS1GPRs.pop_back();
- if (!isReservedReg(MF, Reg) &&
- (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
- Reg == ARM::LR)) {
- Extras.push_back(Reg);
- NumExtras--;
- }
- }
- // For non-Thumb1 functions, also check for hi-reg CS registers
- if (!AFI->isThumb1OnlyFunction()) {
- while (NumExtras && !UnspilledCS2GPRs.empty()) {
- unsigned Reg = UnspilledCS2GPRs.back();
- UnspilledCS2GPRs.pop_back();
- if (!isReservedReg(MF, Reg)) {
- Extras.push_back(Reg);
- NumExtras--;
- }
- }
- }
- if (Extras.size() && NumExtras == 0) {
- for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
- MF.getRegInfo().setPhysRegUsed(Extras[i]);
- AFI->setCSRegisterIsSpilled(Extras[i]);
- }
- } else if (!AFI->isThumb1OnlyFunction()) {
- // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
- // closest to SP or frame pointer.
- const TargetRegisterClass *RC = ARM::GPRRegisterClass;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
- }
- }
-
- if (ForceLRSpill) {
- MF.getRegInfo().setPhysRegUsed(ARM::LR);
- AFI->setCSRegisterIsSpilled(ARM::LR);
- AFI->setLRIsSpilledForFarJump(true);
- }
-}
-
unsigned ARMBaseRegisterInfo::getRARegister() const {
return ARM::LR;
}
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- if (hasFP(MF))
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF))
return FramePtr;
return ARM::SP;
}
-// Provide a base+offset reference to an FI slot for debug info. It's the
-// same as what we use for resolving the code-gen references for now.
-// FIXME: This can go wrong when references are SP-relative and simple call
-// frames aren't used.
-int
-ARMBaseRegisterInfo::getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const {
- return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
-}
-
-int
-ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF,
- int FI,
- unsigned &FrameReg,
- int SPAdj) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
- int FPOffset = Offset - AFI->getFramePtrSpillOffset();
- bool isFixed = MFI->isFixedObjectIndex(FI);
-
- FrameReg = ARM::SP;
- Offset += SPAdj;
- if (AFI->isGPRCalleeSavedArea1Frame(FI))
- return Offset - AFI->getGPRCalleeSavedArea1Offset();
- else if (AFI->isGPRCalleeSavedArea2Frame(FI))
- return Offset - AFI->getGPRCalleeSavedArea2Offset();
- else if (AFI->isDPRCalleeSavedAreaFrame(FI))
- return Offset - AFI->getDPRCalleeSavedAreaOffset();
-
- // When dynamically realigning the stack, use the frame pointer for
- // parameters, and the stack/base pointer for locals.
- if (needsStackRealignment(MF)) {
- assert (hasFP(MF) && "dynamic stack realignment without a FP!");
- if (isFixed) {
- FrameReg = getFrameRegister(MF);
- Offset = FPOffset;
- } else if (MFI->hasVarSizedObjects()) {
- assert(hasBasePointer(MF) &&
- "VLAs and dynamic stack alignment, but missing base pointer!");
- FrameReg = BasePtr;
- }
- return Offset;
- }
-
- // If there is a frame pointer, use it when we can.
- if (hasFP(MF) && AFI->hasStackFrame()) {
- // Use frame pointer to reference fixed objects. Use it for locals if
- // there are VLAs (and thus the SP isn't reliable as a base).
- if (isFixed || (MFI->hasVarSizedObjects() && !hasBasePointer(MF))) {
- FrameReg = getFrameRegister(MF);
- return FPOffset;
- } else if (MFI->hasVarSizedObjects()) {
- assert(hasBasePointer(MF) && "missing base pointer!");
- // Use the base register since we have it.
- FrameReg = BasePtr;
- } else if (AFI->isThumb2Function()) {
- // In Thumb2 mode, the negative offset is very limited. Try to avoid
- // out of range references.
- if (FPOffset >= -255 && FPOffset < 0) {
- FrameReg = getFrameRegister(MF);
- return FPOffset;
- }
- } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
- // Otherwise, use SP or FP, whichever is closer to the stack slot.
- FrameReg = getFrameRegister(MF);
- return FPOffset;
- }
- }
- // Use the base pointer if we have one.
- if (hasBasePointer(MF))
- FrameReg = BasePtr;
- return Offset;
-}
-
-int
-ARMBaseRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- unsigned FrameReg;
- return getFrameIndexReference(MF, FI, FrameReg);
-}
-
unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
llvm_unreachable("What is the exception register");
return 0;
@@ -1320,7 +816,7 @@ emitLoadConstPool(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
.addReg(DestReg, getDefRegState(true), SubIdx)
.addConstantPoolIndex(Idx)
- .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+ .addImm(0).addImm(Pred).addReg(PredReg);
}
bool ARMBaseRegisterInfo::
@@ -1338,34 +834,6 @@ requiresVirtualBaseRegisters(const MachineFunction &MF) const {
return EnableLocalStackAlloc;
}
-// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
-// not required, we reserve argument space for call sites in the function
-// immediately on entry to the current function. This eliminates the need for
-// add/sub sp brackets around call sites. Returns true if the call frame is
-// included as part of the stack frame.
-bool ARMBaseRegisterInfo::
-hasReservedCallFrame(const MachineFunction &MF) const {
- const MachineFrameInfo *FFI = MF.getFrameInfo();
- unsigned CFSize = FFI->getMaxCallFrameSize();
- // It's not always a good idea to include the call frame as part of the
- // stack frame. ARM (especially Thumb) has small immediate offset to
- // address the stack frame. So a large call frame can cause poor codegen
- // and may even makes it impossible to scavenge a register.
- if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
- return false;
-
- return !MF.getFrameInfo()->hasVarSizedObjects();
-}
-
-// canSimplifyCallFramePseudos - If there is a reserved call frame, the
-// call frame pseudos can be simplified. Unlike most targets, having a FP
-// is not sufficient here since we still may reference some objects via SP
-// even when FP is available in Thumb2 mode.
-bool ARMBaseRegisterInfo::
-canSimplifyCallFramePseudos(const MachineFunction &MF) const {
- return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
-}
-
static void
emitSPUpdate(bool isARM,
MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
@@ -1384,7 +852,8 @@ emitSPUpdate(bool isARM,
void ARMBaseRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (!hasReservedCallFrame(MF)) {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (!TFI->hasReservedCallFrame(MF)) {
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
// ADJCALLSTACKUP -> add, sp, sp, amount
@@ -1395,7 +864,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Align = TFI->getStackAlignment();
Amount = (Amount+Align-1)/Align*Align;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -1433,8 +902,7 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
switch (AddrMode) {
case ARMII::AddrModeT2_i8:
case ARMII::AddrModeT2_i12:
- // i8 supports only negative, and i12 supports only positive, so
- // based on Offset sign, consider the appropriate instruction
+ case ARMII::AddrMode_i12:
InstrOffs = MI->getOperand(Idx+1).getImm();
Scale = 1;
break;
@@ -1496,8 +964,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// return false for everything else.
unsigned Opc = MI->getOpcode();
switch (Opc) {
- case ARM::LDR: case ARM::LDRH: case ARM::LDRB:
- case ARM::STR: case ARM::STRH: case ARM::STRB:
+ case ARM::LDRi12: case ARM::LDRH: case ARM::LDRBi12:
+ case ARM::STRi12: case ARM::STRH: case ARM::STRBi12:
case ARM::t2LDRi12: case ARM::t2LDRi8:
case ARM::t2STRi12: case ARM::t2STRi8:
case ARM::VLDRS: case ARM::VLDRD:
@@ -1516,6 +984,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// Note that the incoming offset is based on the SP value at function entry,
// so it'll be negative.
MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -1542,8 +1011,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// The FP is only available if there is no dynamic realignment. We
// don't know for sure yet whether we'll need that, so we guess based
// on whether there are any local variables that would trigger it.
- unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
- if (hasFP(MF) &&
+ unsigned StackAlign = TFI->getStackAlignment();
+ if (TFI->hasFP(MF) &&
!((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
if (isFrameOffsetLegal(MI, FPOffset))
return false;
@@ -1560,19 +1029,25 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
return true;
}
-/// materializeFrameBaseRegister - Insert defining instruction(s) for
-/// BaseReg to be a pointer to FrameIdx before insertion point I.
+/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
void ARMBaseRegisterInfo::
-materializeFrameBaseRegister(MachineBasicBlock::iterator I, unsigned BaseReg,
- int FrameIdx, int64_t Offset) const {
- ARMFunctionInfo *AFI =
- I->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const {
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
(AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+ MachineBasicBlock::iterator Ins = MBB->begin();
+ DebugLoc DL; // Defaults to "unknown"
+ if (Ins != MBB->end())
+ DL = Ins->getDebugLoc();
+
MachineInstrBuilder MIB =
- BuildMI(*I->getParent(), I, I->getDebugLoc(), TII.get(ADDriOpc), BaseReg)
+ BuildMI(*MBB, Ins, DL, TII.get(ADDriOpc), BaseReg)
.addFrameIndex(FrameIdx).addImm(Offset);
+
if (!AFI->isThumb1OnlyFunction())
AddDefaultCC(AddDefaultPred(MIB));
}
@@ -1640,6 +1115,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
NumBits = 8;
Scale = 4;
break;
+ case ARMII::AddrMode_i12:
case ARMII::AddrMode2:
NumBits = 12;
break;
@@ -1679,6 +1155,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const ARMFrameLowering *TFI =
+ static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
"This eliminateFrameIndex does not support Thumb1!");
@@ -1691,7 +1169,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FrameIndex = MI.getOperand(i).getIndex();
unsigned FrameReg;
- int Offset = ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
+ int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
@@ -1737,339 +1215,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
Offset, Pred, PredReg, TII);
}
+ // Update the original instruction to use the scratch register.
MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+ if (MI.getOpcode() == ARM::t2ADDrSPi)
+ MI.setDesc(TII.get(ARM::t2ADDri));
+ else if (MI.getOpcode() == ARM::t2SUBrSPi)
+ MI.setDesc(TII.get(ARM::t2SUBri));
}
}
-/// Move iterator past the next bunch of callee save load / store ops for
-/// the particular spill area (1: integer area 1, 2: integer area 2,
-/// 3: fp area, 0: don't care).
-static void movePastCSLoadStoreOps(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- int Opc1, int Opc2, unsigned Area,
- const ARMSubtarget &STI) {
- while (MBBI != MBB.end() &&
- ((MBBI->getOpcode() == Opc1) || (MBBI->getOpcode() == Opc2)) &&
- MBBI->getOperand(1).isFI()) {
- if (Area != 0) {
- bool Done = false;
- unsigned Category = 0;
- switch (MBBI->getOperand(0).getReg()) {
- case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7:
- case ARM::LR:
- Category = 1;
- break;
- case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11:
- Category = STI.isTargetDarwin() ? 2 : 1;
- break;
- case ARM::D8: case ARM::D9: case ARM::D10: case ARM::D11:
- case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
- Category = 3;
- break;
- default:
- Done = true;
- break;
- }
- if (Done || Category != Area)
- break;
- }
-
- ++MBBI;
- }
-}
-
-void ARMBaseRegisterInfo::
-emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- assert(!AFI->isThumb1OnlyFunction() &&
- "This emitPrologue does not support Thumb1!");
- bool isARM = !AFI->isThumbFunction();
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
- unsigned NumBytes = MFI->getStackSize();
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- // Determine the sizes of each callee-save spill areas and record which frame
- // belongs to which callee-save spill areas.
- unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
- int FramePtrSpillFI = 0;
-
- // Allocate the vararg register save area. This is not counted in NumBytes.
- if (VARegSaveSize)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize);
-
- if (!AFI->hasStackFrame()) {
- if (NumBytes != 0)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
- return;
- }
-
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- int FI = CSI[i].getFrameIdx();
- switch (Reg) {
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
- AFI->addGPRCalleeSavedArea1Frame(FI);
- GPRCS1Size += 4;
- break;
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
- if (STI.isTargetDarwin()) {
- AFI->addGPRCalleeSavedArea2Frame(FI);
- GPRCS2Size += 4;
- } else {
- AFI->addGPRCalleeSavedArea1Frame(FI);
- GPRCS1Size += 4;
- }
- break;
- default:
- AFI->addDPRCalleeSavedAreaFrame(FI);
- DPRCSSize += 8;
- }
- }
-
- // Build the new SUBri to adjust SP for integer callee-save spill area 1.
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -GPRCS1Size);
- movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, ARM::t2STRi12, 1, STI);
-
- // Set FP to point to the stack slot that contains the previous FP.
- // For Darwin, FP is R7, which has now been stored in spill area 1.
- // Otherwise, if this is not Darwin, all the callee-saved registers go
- // into spill area 1, including the FP in R11. In either case, it is
- // now safe to emit this assignment.
- bool HasFP = hasFP(MF);
- if (HasFP) {
- unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
- .addFrameIndex(FramePtrSpillFI).addImm(0);
- AddDefaultCC(AddDefaultPred(MIB));
- }
-
- // Build the new SUBri to adjust SP for integer callee-save spill area 2.
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -GPRCS2Size);
-
- // Build the new SUBri to adjust SP for FP callee-save spill area.
- movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, ARM::t2STRi12, 2, STI);
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRCSSize);
-
- // Determine starting offsets of spill areas.
- unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
- unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
- unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- if (HasFP)
- AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
- NumBytes);
- AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
- AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
- AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
-
- movePastCSLoadStoreOps(MBB, MBBI, ARM::VSTRD, 0, 3, STI);
- NumBytes = DPRCSOffset;
- if (NumBytes) {
- // Adjust SP after all the callee-save spills.
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
- if (HasFP)
- AFI->setShouldRestoreSPFromFP(true);
- }
-
- if (STI.isTargetELF() && hasFP(MF)) {
- MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
- AFI->getFramePtrSpillOffset());
- AFI->setShouldRestoreSPFromFP(true);
- }
-
- AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
- AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
- AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
-
- // If we need dynamic stack realignment, do it here. Be paranoid and make
- // sure if we also have VLAs, we have a base pointer for frame access.
- if (needsStackRealignment(MF)) {
- unsigned MaxAlign = MFI->getMaxAlignment();
- assert (!AFI->isThumb1OnlyFunction());
- if (!AFI->isThumbFunction()) {
- // Emit bic sp, sp, MaxAlign
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
- TII.get(ARM::BICri), ARM::SP)
- .addReg(ARM::SP, RegState::Kill)
- .addImm(MaxAlign-1)));
- } else {
- // We cannot use sp as source/dest register here, thus we're emitting the
- // following sequence:
- // mov r4, sp
- // bic r4, r4, MaxAlign
- // mov sp, r4
- // FIXME: It will be better just to find spare register here.
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4)
- .addReg(ARM::SP, RegState::Kill);
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
- TII.get(ARM::t2BICri), ARM::R4)
- .addReg(ARM::R4, RegState::Kill)
- .addImm(MaxAlign-1)));
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
- .addReg(ARM::R4, RegState::Kill);
- }
-
- AFI->setShouldRestoreSPFromFP(true);
- }
-
- // If we need a base pointer, set it up here. It's whatever the value
- // of the stack pointer is at this point. Any variable size objects
- // will be allocated after this, so we can still use the base pointer
- // to reference locals.
- if (hasBasePointer(MF)) {
- if (isARM)
- BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), BasePtr)
- .addReg(ARM::SP)
- .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
- else
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr)
- .addReg(ARM::SP);
- }
-
- // If the frame has variable sized objects then the epilogue must restore
- // the sp from fp.
- if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects())
- AFI->setShouldRestoreSPFromFP(true);
-}
-
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
- for (unsigned i = 0; CSRegs[i]; ++i)
- if (Reg == CSRegs[i])
- return true;
- return false;
-}
-
-static bool isCSRestore(MachineInstr *MI,
- const ARMBaseInstrInfo &TII,
- const unsigned *CSRegs) {
- return ((MI->getOpcode() == (int)ARM::VLDRD ||
- MI->getOpcode() == (int)ARM::LDR ||
- MI->getOpcode() == (int)ARM::t2LDRi12) &&
- MI->getOperand(1).isFI() &&
- isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
-}
-
-void ARMBaseRegisterInfo::
-emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- assert(MBBI->getDesc().isReturn() &&
- "Can only insert epilog into returning blocks");
- unsigned RetOpcode = MBBI->getOpcode();
- DebugLoc dl = MBBI->getDebugLoc();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- assert(!AFI->isThumb1OnlyFunction() &&
- "This emitEpilogue does not support Thumb1!");
- bool isARM = !AFI->isThumbFunction();
-
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
- int NumBytes = (int)MFI->getStackSize();
-
- if (!AFI->hasStackFrame()) {
- if (NumBytes != 0)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
- } else {
- // Unwind MBBI to point to first LDR / VLDRD.
- const unsigned *CSRegs = getCalleeSavedRegs();
- if (MBBI != MBB.begin()) {
- do
- --MBBI;
- while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
- if (!isCSRestore(MBBI, TII, CSRegs))
- ++MBBI;
- }
-
- // Move SP to start of FP callee save spill area.
- NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
- AFI->getGPRCalleeSavedArea2Size() +
- AFI->getDPRCalleeSavedAreaSize());
-
- // Reset SP based on frame pointer only if the stack frame extends beyond
- // frame pointer stack slot or target is ELF and the function has FP.
- if (AFI->shouldRestoreSPFromFP()) {
- NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
- if (NumBytes) {
- if (isARM)
- emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- } else {
- // Thumb2 or ARM.
- if (isARM)
- BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
- .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
- else
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
- .addReg(FramePtr);
- }
- } else if (NumBytes)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
-
- // Move SP to start of integer callee save spill area 2.
- movePastCSLoadStoreOps(MBB, MBBI, ARM::VLDRD, 0, 3, STI);
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedAreaSize());
-
- // Move SP to start of integer callee save spill area 1.
- movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, ARM::t2LDRi12, 2, STI);
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea2Size());
-
- // Move SP to SP upon entry to the function.
- movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, ARM::t2LDRi12, 1, STI);
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea1Size());
- }
-
- if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
- RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
- // Tail call return: adjust the stack pointer and jump to callee.
- MBBI = prior(MBB.end());
- MachineOperand &JumpTarget = MBBI->getOperand(0);
-
- // Jump to label or value in register.
- if (RetOpcode == ARM::TCRETURNdi) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- } else if (RetOpcode == ARM::TCRETURNdiND) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- } else if (RetOpcode == ARM::TCRETURNri) {
- BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- } else if (RetOpcode == ARM::TCRETURNriND) {
- BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- }
-
- MachineInstr *NewMI = prior(MBBI);
- for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
- NewMI->addOperand(MBBI->getOperand(i));
-
- // Delete the pseudo instruction TCRETURN.
- MBB.erase(MBBI);
- }
-
- if (VARegSaveSize)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
-}
-
#include "ARMGenRegisterInfo.inc"
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index fa2eb6c..ba6bd2b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -44,6 +44,45 @@ static inline bool isARMLowRegister(unsigned Reg) {
}
}
+/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
+/// or a stack/pc register that we should push/pop.
+static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ case LR: case SP: case PC:
+ return true;
+ case R8: case R9: case R10: case R11:
+ // For darwin we want r7 and lr to be next to each other.
+ return !isDarwin;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) {
+ using namespace ARM;
+ switch (Reg) {
+ case R8: case R9: case R10: case R11:
+ // Darwin has this second area.
+ return isDarwin;
+ default:
+ return false;
+ }
+}
+
+static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) {
+ using namespace ARM;
+ switch (Reg) {
+ case D15: case D14: case D13: case D12:
+ case D11: case D10: case D9: case D8:
+ return true;
+ default:
+ return false;
+ }
+}
+
class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
protected:
const ARMBaseInstrInfo &TII;
@@ -65,12 +104,6 @@ protected:
unsigned getOpcode(int Op) const;
public:
- /// getRegisterNumbering - Given the enum value for some register, e.g.
- /// ARM::LR, return the number that it corresponds to (e.g. 14). It
- /// also returns true in isSPVFP if the register is a single precision
- /// VFP register.
- static unsigned getRegisterNumbering(unsigned RegEnum, bool *isSPVFP = 0);
-
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
@@ -106,14 +139,13 @@ public:
void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const;
- bool hasFP(const MachineFunction &MF) const;
bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
- void materializeFrameBaseRegister(MachineBasicBlock::iterator I,
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB,
unsigned BaseReg, int FrameIdx,
int64_t Offset) const;
void resolveFrameIndex(MachineBasicBlock::iterator I,
@@ -122,17 +154,10 @@ public:
bool cannotEliminateFrame(const MachineFunction &MF) const;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
-
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const;
- int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg, int SPAdj) const;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ unsigned getBaseRegister() const { return BasePtr; }
// Exception handling queries.
unsigned getEHExceptionRegister() const;
@@ -162,9 +187,6 @@ public:
virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
- virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
- virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
-
virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
@@ -172,12 +194,7 @@ public:
virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
- virtual void emitPrologue(MachineFunction &MF) const;
- virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
private:
- unsigned estimateRSStackSizeLimit(MachineFunction &MF) const;
-
unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
index 3b38375..69eddf0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This file contains enumerations and support routines for ARM build attributes
-// as defined in ARM ABI addenda document (ABI release 2.07).
+// as defined in ARM ABI addenda document (ABI release 2.08).
//
//===----------------------------------------------------------------------===//
@@ -16,7 +16,14 @@
#define __TARGET_ARMBUILDATTRS_H__
namespace ARMBuildAttrs {
- enum {
+ enum SpecialAttr {
+ // This is for the .cpu asm attr. It translates into one or more
+ // AttrType (below) entries in the .ARM.attributes section in the ELF.
+ SEL_CPU
+ };
+
+ enum AttrType {
+ // Rest correspond to ELF/.ARM.attributes
File = 1,
Section = 2,
Symbol = 3,
@@ -52,12 +59,72 @@ namespace ARMBuildAttrs {
CPU_unaligned_access = 34,
VFP_HP_extension = 36,
ABI_FP_16bit_format = 38,
+ MPextension_use = 42, // was 70, 2.08 ABI
+ DIV_use = 44,
nodefaults = 64,
also_compatible_with = 65,
T2EE_use = 66,
conformance = 67,
Virtualization_use = 68,
- MPextension_use = 70
+ MPextension_use_old = 70
+ };
+
+ // Magic numbers for .ARM.attributes
+ enum AttrMagic {
+ Format_Version = 0x41
+ };
+
+ // Legal Values for CPU_arch, (=6), uleb128
+ enum CPUArch {
+ Pre_v4 = 0,
+ v4 = 1, // e.g. SA110
+ v4T = 2, // e.g. ARM7TDMI
+ v5T = 3, // e.g. ARM9TDMI
+ v5TE = 4, // e.g. ARM946E_S
+ v5TEJ = 5, // e.g. ARM926EJ_S
+ v6 = 6, // e.g. ARM1136J_S
+ v6KZ = 7, // e.g. ARM1176JZ_S
+ v6T2 = 8, // e.g. ARM1156T2F_S
+ v6K = 9, // e.g. ARM1136J_S
+ v7 = 10, // e.g. Cortex A8, Cortex M3
+ v6_M = 11, // e.g. Cortex M1
+ v6S_M = 12, // v6_M with the System extensions
+ v7E_M = 13 // v7_M with DSP extensions
+ };
+
+ enum CPUArchProfile { // (=7), uleb128
+ Not_Applicable = 0, // pre v7, or cross-profile code
+ ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8)
+ RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4)
+ MicroControllerProfile = (0x4D), // 'M' (e.g. for Cortex M3)
+ SystemProfile = (0x53) // 'S' Application or real-time profile
+ };
+
+ // The following have a lot of common use cases
+ enum {
+ //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128
+ Not_Allowed = 0,
+ Allowed = 1,
+
+ // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
+ AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA)
+ AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA)
+ AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31
+ AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA)
+ AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31
+
+ // Tag_WMMX_arch, (=11), uleb128
+ AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
+
+ // Tag_WMMX_arch, (=11), uleb128
+ AllowWMMXv1 = 2, // The user permitted this entity to use WMMX v2
+
+ // Tag_ABI_FP_denormal, (=20), uleb128
+ PreserveFPSign = 2, // sign when flushed-to-zero is preserved
+
+ // Tag_ABI_FP_number_model, (=23), uleb128
+ AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI])
+ AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings
};
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
new file mode 100644
index 0000000..ff7db1f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -0,0 +1,160 @@
+//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the ARM Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMCALLINGCONV_H
+#define ARMCALLINGCONV_H
+
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARM.h"
+
+namespace llvm {
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ // Try to get the first register.
+ if (unsigned Reg = State.AllocateReg(RegList, 4))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else {
+ // For the 2nd half of a v2f64, do not fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 4),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ // Try to get the second register.
+ if (unsigned Reg = State.AllocateReg(RegList, 4))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4, 4),
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+ static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+ static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
+ if (Reg == 0) {
+ // For the 2nd half of a v2f64, do not just fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 8),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo, CCState &State) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+ static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ return true; // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index 293e32a..426ba13 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -53,6 +53,34 @@ def RetCC_ARM_APCS : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+def FastCC_ARM_APCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<CC_ARM_APCS>
+]>;
+
+def RetFastCC_ARM_APCS : CallingConv<[
+ // Handle all vector types as either f64 or v2f64.
+ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<RetCC_ARM_APCS>
+]>;
+
+
+//===----------------------------------------------------------------------===//
// ARM AAPCS (EABI) Calling Convention, common parts
//===----------------------------------------------------------------------===//
@@ -105,6 +133,7 @@ def RetCC_ARM_AAPCS : CallingConv<[
//===----------------------------------------------------------------------===//
// ARM AAPCS-VFP (EABI) Calling Convention
+// Also used for FastCC (when VFP2 or later is available)
//===----------------------------------------------------------------------===//
def CC_ARM_AAPCS_VFP : CallingConv<[
diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
index b1a702f..9bbf6a0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -74,7 +74,7 @@ namespace {
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
/// machine instructions.
- unsigned getBinaryCodeForInstr(const MachineInstr &MI);
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
bool runOnMachineFunction(MachineFunction &MF);
@@ -101,7 +101,6 @@ namespace {
unsigned OpIdx);
unsigned getMachineSoImmOpValue(unsigned SoImm);
-
unsigned getAddrModeSBit(const MachineInstr &MI,
const TargetInstrDesc &TID) const;
@@ -140,8 +139,6 @@ namespace {
void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
- void emitMiscInstruction(const MachineInstr &MI);
-
void emitNEONLaneInstruction(const MachineInstr &MI);
void emitNEONDupInstruction(const MachineInstr &MI);
void emitNEON1RegModImmInstruction(const MachineInstr &MI);
@@ -150,20 +147,176 @@ namespace {
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO);
- unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+ unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const {
return getMachineOpValue(MI, MI.getOperand(OpIdx));
}
+ // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the
+ // TableGen'erated getBinaryCodeForInstr() function to encode any
+ // operand values, instead querying getMachineOpValue() directly for
+ // each operand it needs to encode. Thus, any of the new encoder
+ // helper functions can simply return 0 as the values the return
+ // are already handled elsewhere. They are placeholders to allow this
+ // encoder to continue to function until the MC encoder is sufficiently
+ // far along that this one can be eliminated entirely.
+ unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val)
+ const { return 0; }
+ unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val)
+ const { return 0; }
+ unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val)
+ const { return 0; }
+ unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
+ const { return 0; }
+ unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getSORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op)
+ const { return 0; }
+ unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getRotImmOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getImmMinusOneOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ unsigned getMsbOpValue(const MachineInstr &MI,
+ unsigned Op) const { return 0; }
+ uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const {return 0; }
+ uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0; }
+
+ unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+ const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ const MachineOperand &MO = MI.getOperand(Op);
+ const MachineOperand &MO1 = MI.getOperand(Op + 1);
+ if (!MO.isReg()) {
+ emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+ return 0;
+ }
+ unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ int32_t Imm12 = MO1.getImm();
+ uint32_t Binary;
+ Binary = Imm12 & 0xfff;
+ if (Imm12 >= 0)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+ }
+
+ unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const {
+ return 0;
+ }
+
+ uint32_t getAddrMode2OpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+ const { return 0;}
+ uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeSOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ const MachineOperand &MO = MI.getOperand(Op);
+ const MachineOperand &MO1 = MI.getOperand(Op + 1);
+ if (!MO.isReg()) {
+ emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+ return 0;
+ }
+ unsigned Reg = getARMRegisterNumbering(MO.getReg());
+ int32_t Imm12 = MO1.getImm();
+
+ // Special value for #-0
+ if (Imm12 == INT32_MIN)
+ Imm12 = 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs
+ // sub.
+ bool isAdd = true;
+ if (Imm12 < 0) {
+ Imm12 = -Imm12;
+ isAdd = false;
+ }
+
+ uint32_t Binary = Imm12 & 0xfff;
+ if (isAdd)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+ }
+ unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
+ unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
/// machine operand requires relocation, record the relocation and return
/// zero.
unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
unsigned Reloc);
- unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx,
- unsigned Reloc) {
- return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc);
- }
/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
///
@@ -173,12 +326,12 @@ namespace {
/// fixed up by the relocation stage.
void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
bool MayNeedFarStub, bool Indirect,
- intptr_t ACPV = 0);
- void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
- void emitConstPoolAddress(unsigned CPI, unsigned Reloc);
- void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc);
+ intptr_t ACPV = 0) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
- intptr_t JTBase = 0);
+ intptr_t JTBase = 0) const;
};
}
@@ -266,9 +419,9 @@ unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) {
+ const MachineOperand &MO) const {
if (MO.isReg())
- return ARMRegisterInfo::getRegisterNumbering(MO.getReg());
+ return getARMRegisterNumbering(MO.getReg());
else if (MO.isImm())
return static_cast<unsigned>(MO.getImm());
else if (MO.isGlobal())
@@ -285,12 +438,8 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
else if (MO.isMBB())
emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
- else {
-#ifndef NDEBUG
- errs() << MO;
-#endif
- llvm_unreachable(0);
- }
+ else
+ llvm_unreachable("Unable to encode MachineOperand!");
return 0;
}
@@ -298,7 +447,7 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
///
void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
bool MayNeedFarStub, bool Indirect,
- intptr_t ACPV) {
+ intptr_t ACPV) const {
MachineRelocation MR = Indirect
? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
const_cast<GlobalValue *>(GV),
@@ -312,7 +461,8 @@ void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
/// be emitted to the current location in the function, and allow it to be PC
/// relative.
-void ARMCodeEmitter::emitExternalSymbolAddress(const char *ES, unsigned Reloc) {
+void ARMCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
Reloc, ES));
}
@@ -320,7 +470,7 @@ void ARMCodeEmitter::emitExternalSymbolAddress(const char *ES, unsigned Reloc) {
/// emitConstPoolAddress - Arrange for the address of an constant pool
/// to be emitted to the current location in the function, and allow it to be PC
/// relative.
-void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) {
+void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
// Tell JIT emitter we'll resolve the address.
MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
Reloc, CPI, 0, true));
@@ -329,14 +479,16 @@ void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) {
/// emitJumpTableAddress - Arrange for the address of a jump table to
/// be emitted to the current location in the function, and allow it to be PC
/// relative.
-void ARMCodeEmitter::emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) {
+void ARMCodeEmitter::
+emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
Reloc, JTIndex, 0, true));
}
/// emitMachineBasicBlock - Emit the specified address basic block.
void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
- unsigned Reloc, intptr_t JTBase) {
+ unsigned Reloc,
+ intptr_t JTBase) const {
MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
Reloc, BB, JTBase));
}
@@ -364,6 +516,14 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
llvm_unreachable("Unhandled instruction encoding format!");
break;
}
+ case ARMII::MiscFrm:
+ if (MI.getOpcode() == ARM::LEApcrelJT) {
+ // Materialize jumptable address.
+ emitLEApcrelJTInstruction(MI);
+ break;
+ }
+ llvm_unreachable("Unhandled instruction encoding!");
+ break;
case ARMII::Pseudo:
emitPseudoInstruction(MI);
break;
@@ -418,9 +578,7 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
case ARMII::VFPLdStMulFrm:
emitVFPLoadStoreMultipleInstruction(MI);
break;
- case ARMII::VFPMiscFrm:
- emitMiscInstruction(MI);
- break;
+
// NEON instructions.
case ARMII::NGetLnFrm:
case ARMII::NSetLnFrm:
@@ -488,7 +646,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
emitWordLE(0);
} else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
- uint32_t Val = *(uint32_t*)CI->getValue().getRawData();
+ uint32_t Val = uint32_t(*CI->getValue().getRawData());
emitWordLE(Val);
} else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
if (CFP->getType()->isFloatTy())
@@ -588,7 +746,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
const TargetInstrDesc &TID = MI.getDesc();
// Emit the 'add' instruction.
- unsigned Binary = 0x4 << 21; // add: Insts{24-31} = 0b0100
+ unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100
// Set the conditional execution predicate
Binary |= II->getPredicate(&MI) << ARMII::CondShift;
@@ -600,7 +758,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
// Encode Rn which is PC.
- Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
// Encode the displacement.
Binary |= 1 << ARMII::I_BitShift;
@@ -628,7 +786,7 @@ void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) {
// Encode the shift operation.
switch (Opcode) {
default: break;
- case ARM::MOVrx:
+ case ARM::RRX:
// rrx
Binary |= 0x6 << 4;
break;
@@ -659,10 +817,10 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
switch (Opcode) {
default:
llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
- case ARM::BX:
- case ARM::BMOVPCRX:
- case ARM::BXr9:
- case ARM::BMOVPCRXr9: {
+ case ARM::BX_CALL:
+ case ARM::BMOVPCRX_CALL:
+ case ARM::BXr9_CALL:
+ case ARM::BMOVPCRXr9_CALL: {
// First emit mov lr, pc
unsigned Binary = 0x01a0e00f;
Binary |= II->getPredicate(&MI) << ARMII::CondShift;
@@ -720,18 +878,18 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
}
case ARM::MOVi32imm:
- emitMOVi32immInstruction(MI);
- break;
-
- case ARM::MOVi2pieces:
// Two instructions to materialize a constant.
- emitMOVi2piecesInstruction(MI);
+ if (Subtarget->hasV6T2Ops())
+ emitMOVi32immInstruction(MI);
+ else
+ emitMOVi2piecesInstruction(MI);
break;
+
case ARM::LEApcrelJT:
// Materialize jumptable address.
emitLEApcrelJTInstruction(MI);
break;
- case ARM::MOVrx:
+ case ARM::RRX:
case ARM::MOVsrl_flag:
case ARM::MOVsra_flag:
emitPseudoMoveInstruction(MI);
@@ -789,8 +947,7 @@ unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
if (Rs) {
// Encode Rs bit[11:8].
assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
- return Binary |
- (ARMRegisterInfo::getRegisterNumbering(Rs) << ARMII::RegRsShift);
+ return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
}
// Encode shift_imm bit[11:7].
@@ -841,8 +998,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
else if (ImplicitRd)
// Special handling for implicit use (e.g. PC).
- Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
- << ARMII::RegRdShift);
+ Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
if (TID.Opcode == ARM::MOVi16) {
// Get immediate from MI.
@@ -892,8 +1048,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
if (!isUnary) {
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
- << ARMII::RegRnShift);
+ Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
else {
Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
++OpIdx;
@@ -910,7 +1065,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
if (MO.isReg()) {
// Encode register Rm.
- emitWordLE(Binary | ARMRegisterInfo::getRegisterNumbering(MO.getReg()));
+ emitWordLE(Binary | getARMRegisterNumbering(MO.getReg()));
return;
}
@@ -930,6 +1085,13 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
// Part of binary is determined by TableGn.
unsigned Binary = getBinaryCodeForInstr(MI);
+ // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done.
+ if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp ||
+ MI.getOpcode() == ARM::STRi12) {
+ emitWordLE(Binary);
+ return;
+ }
+
// Set the conditional execution predicate
Binary |= II->getPredicate(&MI) << ARMII::CondShift;
@@ -946,16 +1108,14 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
// Set first operand
if (ImplicitRd)
// Special handling for implicit use (e.g. PC).
- Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
- << ARMII::RegRdShift);
+ Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
// Set second operand
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
- << ARMII::RegRnShift);
+ Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
@@ -978,11 +1138,11 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
return;
}
- // Set bit I(25), because this is not in immediate enconding.
+ // Set bit I(25), because this is not in immediate encoding.
Binary |= 1 << ARMII::I_BitShift;
assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
// Set bit[3:0] to the corresponding Rm register
- Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg());
+ Binary |= getARMRegisterNumbering(MO2.getReg());
// If this instr is in scaled register offset/index instruction, set
// shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
@@ -1026,8 +1186,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
// Set second operand
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
- Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
- << ARMII::RegRnShift);
+ Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
else
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
@@ -1046,7 +1205,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
// If this instr is in register offset/index encoding, set bit[3:0]
// to the corresponding Rm register.
if (MO2.getReg()) {
- Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg());
+ Binary |= getARMRegisterNumbering(MO2.getReg());
emitWordLE(Binary);
return;
}
@@ -1100,8 +1259,8 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
// Set addressing mode by modifying bits U(23) and P(24)
- const MachineOperand &MO = MI.getOperand(OpIdx++);
- Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
// Set bit W(21)
if (IsUpdating)
@@ -1112,7 +1271,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || MO.isImplicit())
break;
- unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(MO.getReg());
+ unsigned RegNum = getARMRegisterNumbering(MO.getReg());
assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
RegNum < 16);
Binary |= 0x1 << RegNum;
@@ -1349,7 +1508,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR)
// The return register is LR.
- Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR);
+ Binary |= getARMRegisterNumbering(ARM::LR);
else
// otherwise, set the return register
Binary |= getMachineOpValue(MI, 0);
@@ -1360,8 +1519,8 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
unsigned RegD = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- bool isSPVFP = false;
- RegD = ARMRegisterInfo::getRegisterNumbering(RegD, &isSPVFP);
+ bool isSPVFP = ARM::SPRRegisterClass->contains(RegD);
+ RegD = getARMRegisterNumbering(RegD);
if (!isSPVFP)
Binary |= RegD << ARMII::RegRdShift;
else {
@@ -1374,8 +1533,8 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
unsigned RegN = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- bool isSPVFP = false;
- RegN = ARMRegisterInfo::getRegisterNumbering(RegN, &isSPVFP);
+ bool isSPVFP = ARM::SPRRegisterClass->contains(RegN);
+ RegN = getARMRegisterNumbering(RegN);
if (!isSPVFP)
Binary |= RegN << ARMII::RegRnShift;
else {
@@ -1388,8 +1547,8 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
unsigned RegM = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- bool isSPVFP = false;
- RegM = ARMRegisterInfo::getRegisterNumbering(RegM, &isSPVFP);
+ bool isSPVFP = ARM::SPRRegisterClass->contains(RegM);
+ RegM = getARMRegisterNumbering(RegM);
if (!isSPVFP)
Binary |= RegM;
else {
@@ -1548,8 +1707,8 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
// Set addressing mode by modifying bits U(23) and P(24)
- const MachineOperand &MO = MI.getOperand(OpIdx++);
- Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
// Set bit W(21)
if (IsUpdating)
@@ -1576,63 +1735,10 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
-void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
- unsigned Opcode = MI.getDesc().Opcode;
- // Part of binary is determined by TableGn.
- unsigned Binary = getBinaryCodeForInstr(MI);
-
- // Set the conditional execution predicate
- Binary |= II->getPredicate(&MI) << ARMII::CondShift;
-
- switch(Opcode) {
- default:
- llvm_unreachable("ARMCodeEmitter::emitMiscInstruction");
-
- case ARM::FMSTAT:
- // No further encoding needed.
- break;
-
- case ARM::VMRS:
- case ARM::VMSR: {
- const MachineOperand &MO0 = MI.getOperand(0);
- // Encode Rt.
- Binary |= ARMRegisterInfo::getRegisterNumbering(MO0.getReg())
- << ARMII::RegRdShift;
- break;
- }
-
- case ARM::FCONSTD:
- case ARM::FCONSTS: {
- // Encode Dd / Sd.
- Binary |= encodeVFPRd(MI, 0);
-
- // Encode imm., Table A7-18 VFP modified immediate constants
- const MachineOperand &MO1 = MI.getOperand(1);
- unsigned Imm = static_cast<unsigned>(MO1.getFPImm()->getValueAPF()
- .bitcastToAPInt().getHiBits(32).getLimitedValue());
- unsigned ModifiedImm;
-
- if(Opcode == ARM::FCONSTS)
- ModifiedImm = (Imm & 0x80000000) >> 24 | // a
- (Imm & 0x03F80000) >> 19; // bcdefgh
- else // Opcode == ARM::FCONSTD
- ModifiedImm = (Imm & 0x80000000) >> 24 | // a
- (Imm & 0x007F0000) >> 16; // bcdefgh
-
- // Insts{19-16} = abcd, Insts{3-0} = efgh
- Binary |= ((ModifiedImm & 0xF0) >> 4) << 16;
- Binary |= (ModifiedImm & 0xF);
- break;
- }
- }
-
- emitWordLE(Binary);
-}
-
static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
unsigned RegD = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegD = ARMRegisterInfo::getRegisterNumbering(RegD);
+ RegD = getARMRegisterNumbering(RegD);
Binary |= (RegD & 0xf) << ARMII::RegRdShift;
Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
return Binary;
@@ -1641,7 +1747,7 @@ static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
unsigned RegN = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegN = ARMRegisterInfo::getRegisterNumbering(RegN);
+ RegN = getARMRegisterNumbering(RegN);
Binary |= (RegN & 0xf) << ARMII::RegRnShift;
Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
return Binary;
@@ -1650,7 +1756,7 @@ static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
unsigned RegM = MI.getOperand(OpIdx).getReg();
unsigned Binary = 0;
- RegM = ARMRegisterInfo::getRegisterNumbering(RegM);
+ RegM = getARMRegisterNumbering(RegM);
Binary |= (RegM & 0xf);
Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
return Binary;
@@ -1684,7 +1790,7 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
- RegT = ARMRegisterInfo::getRegisterNumbering(RegT);
+ RegT = getARMRegisterNumbering(RegT);
Binary |= (RegT << ARMII::RegRdShift);
Binary |= encodeNEONRn(MI, RegNOpIdx);
@@ -1713,7 +1819,7 @@ void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
unsigned RegT = MI.getOperand(1).getReg();
- RegT = ARMRegisterInfo::getRegisterNumbering(RegT);
+ RegT = getARMRegisterNumbering(RegT);
Binary |= (RegT << ARMII::RegRdShift);
Binary |= encodeNEONRn(MI, 0);
emitWordLE(Binary);
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 60e923b..13d1b33 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1,4 +1,4 @@
-//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===//
+//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -316,7 +316,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
}
/// The next UID to take is the first unused one.
- AFI->initConstPoolEntryUId(CPEMIs.size());
+ AFI->initPICLabelUId(CPEMIs.size());
// Do the initial scan of the function, building up information about the
// sizes of each block, the location of all the water, and finding all of the
@@ -327,7 +327,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
/// Remove dead constant pool entries.
- RemoveUnusedCPEntries();
+ MadeChange |= RemoveUnusedCPEntries();
// Iteratively place constant pool entries and fix up branches until there
// is no change.
@@ -368,6 +368,14 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
MadeChange |= UndoLRSpillRestore();
+ // Save the mapping between original and cloned constpool entries.
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
+ const CPEntry & CPE = CPEntries[i][j];
+ AFI->recordCPEClone(i, CPE.CPI);
+ }
+ }
+
DEBUG(errs() << '\n'; dumpBBs());
BBSizes.clear();
@@ -482,7 +490,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
HasInlineAsm = true;
}
- // Now go back through the instructions and build up our data structures
+ // Now go back through the instructions and build up our data structures.
unsigned Offset = 0;
for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
MBBI != E; ++MBBI) {
@@ -603,7 +611,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
Scale = 4;
break;
- case ARM::LDR:
+ case ARM::LDRi12:
case ARM::LDRcp:
case ARM::t2LDRpci:
Bits = 12; // +-offset_12
@@ -611,7 +619,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
break;
case ARM::tLDRpci:
- case ARM::tLDRcp:
Bits = 8;
Scale = 4; // +(offset_8*4)
break;
@@ -692,7 +699,7 @@ static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
/// machine function, it upsets all of the block numbers. Renumber the blocks
/// and update the arrays that parallel this numbering.
void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
- // Renumber the MBB's to keep them consequtive.
+ // Renumber the MBB's to keep them consecutive.
NewBB->getParent()->RenumberBlocks(NewBB);
// Insert a size into BBSizes to align it properly with the (newly
@@ -1242,7 +1249,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
// No existing clone of this CPE is within range.
// We will be generating a new clone. Get a UID for it.
- unsigned ID = AFI->createConstPoolEntryUId();
+ unsigned ID = AFI->createPICLabelUId();
// Look for water where we can place this CPE.
MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
@@ -1644,7 +1651,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
unsigned DestOffset = BBOffsets[DestBB->getNumber()];
if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI;
- if (CmpMI->getOpcode() == ARM::tCMPzi8) {
+ if (CmpMI->getOpcode() == ARM::tCMPi8) {
unsigned Reg = CmpMI->getOperand(0).getReg();
Pred = llvm::getInstrPredicate(CmpMI, PredReg);
if (Pred == ARMCC::AL &&
@@ -1766,7 +1773,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
if (!OptOk)
continue;
- unsigned Opc = ByteOk ? ARM::t2TBB : ARM::t2TBH;
+ unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
.addReg(IdxReg, getKillRegState(IdxRegKill))
.addJumpTableIndex(JTI, JTOP.getTargetFlags())
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
index f13ccc6..165a1d8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id,
ARMCP::ARMCPKind K,
unsigned char PCAdj,
- const char *Modif,
+ ARMCP::ARMCPModifier Modif,
bool AddCA)
: MachineConstantPoolValue((const Type*)cval->getType()),
CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj),
@@ -33,17 +33,17 @@ ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id,
ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
const char *s, unsigned id,
unsigned char PCAdj,
- const char *Modif,
+ ARMCP::ARMCPModifier Modif,
bool AddCA)
: MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)),
CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol),
PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {}
ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv,
- const char *Modif)
+ ARMCP::ARMCPModifier Modif)
: MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())),
CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
- Modifier(Modif) {}
+ Modifier(Modif), AddCurrentAddress(false) {}
const GlobalValue *ARMConstantPoolValue::getGV() const {
return dyn_cast_or_null<GlobalValue>(CVal);
@@ -53,6 +53,14 @@ const BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
return dyn_cast_or_null<BlockAddress>(CVal);
}
+static bool CPV_streq(const char *S1, const char *S2) {
+ if (S1 == S2)
+ return true;
+ if (S1 && S2 && strcmp(S1, S2) == 0)
+ return true;
+ return false;
+}
+
int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
unsigned AlignMask = Alignment - 1;
@@ -65,8 +73,8 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
if (CPV->CVal == CVal &&
CPV->LabelId == LabelId &&
CPV->PCAdjust == PCAdjust &&
- (CPV->S == S || strcmp(CPV->S, S) == 0) &&
- (CPV->Modifier == Modifier || strcmp(CPV->Modifier, Modifier) == 0))
+ CPV_streq(CPV->S, S) &&
+ CPV->Modifier == Modifier)
return i;
}
}
@@ -91,8 +99,8 @@ ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
if (ACPV->Kind == Kind &&
ACPV->CVal == CVal &&
ACPV->PCAdjust == PCAdjust &&
- (ACPV->S == S || strcmp(ACPV->S, S) == 0) &&
- (ACPV->Modifier == Modifier || strcmp(ACPV->Modifier, Modifier) == 0)) {
+ CPV_streq(ACPV->S, S) &&
+ ACPV->Modifier == Modifier) {
if (ACPV->LabelId == LabelId)
return true;
// Two PC relative constpool entries containing the same GV address or
@@ -113,7 +121,7 @@ void ARMConstantPoolValue::print(raw_ostream &O) const {
O << CVal->getName();
else
O << S;
- if (Modifier) O << "(" << Modifier << ")";
+ if (Modifier) O << "(" << getModifierText() << ")";
if (PCAdjust != 0) {
O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
if (AddCurrentAddress) O << "-.";
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index 3119b54..d008811 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -15,6 +15,7 @@
#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/ErrorHandling.h"
#include <cstddef>
namespace llvm {
@@ -31,6 +32,15 @@ namespace ARMCP {
CPBlockAddress,
CPLSDA
};
+
+ enum ARMCPModifier {
+ no_modifier,
+ TLSGD,
+ GOT,
+ GOTOFF,
+ GOTTPOFF,
+ TPOFF
+ };
}
/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
@@ -43,26 +53,41 @@ class ARMConstantPoolValue : public MachineConstantPoolValue {
ARMCP::ARMCPKind Kind; // Kind of constant.
unsigned char PCAdjust; // Extra adjustment if constantpool is pc-relative.
// 8 for ARM, 4 for Thumb.
- const char *Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+ ARMCP::ARMCPModifier Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8))
bool AddCurrentAddress;
public:
ARMConstantPoolValue(const Constant *cval, unsigned id,
ARMCP::ARMCPKind Kind = ARMCP::CPValue,
- unsigned char PCAdj = 0, const char *Modifier = NULL,
+ unsigned char PCAdj = 0,
+ ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
bool AddCurrentAddress = false);
ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id,
- unsigned char PCAdj = 0, const char *Modifier = NULL,
+ unsigned char PCAdj = 0,
+ ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
bool AddCurrentAddress = false);
- ARMConstantPoolValue(const GlobalValue *GV, const char *Modifier);
+ ARMConstantPoolValue(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier);
ARMConstantPoolValue();
~ARMConstantPoolValue();
const GlobalValue *getGV() const;
const char *getSymbol() const { return S; }
const BlockAddress *getBlockAddress() const;
- const char *getModifier() const { return Modifier; }
- bool hasModifier() const { return Modifier != NULL; }
+ ARMCP::ARMCPModifier getModifier() const { return Modifier; }
+ const char *getModifierText() const {
+ switch (Modifier) {
+ default: llvm_unreachable("Unknown modifier!");
+ // FIXME: Are these case sensitive? It'd be nice to lower-case all the
+ // strings if that's legal.
+ case ARMCP::no_modifier: return "none";
+ case ARMCP::TLSGD: return "tlsgd";
+ case ARMCP::GOT: return "GOT";
+ case ARMCP::GOTOFF: return "GOTOFF";
+ case ARMCP::GOTTPOFF: return "gottpoff";
+ case ARMCP::TPOFF: return "tpoff";
+ }
+ }
+ bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
bool mustAddCurrentAddress() const { return AddCurrentAddress; }
unsigned getLabelId() const { return LabelId; }
unsigned char getPCAdjustment() const { return PCAdjust; }
@@ -71,11 +96,7 @@ public:
bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; }
bool isLSDA() { return Kind == ARMCP::CPLSDA; }
- virtual unsigned getRelocationInfo() const {
- // FIXME: This is conservatively claiming that these entries require a
- // relocation, we may be able to do better than this.
- return 2;
- }
+ virtual unsigned getRelocationInfo() const { return 2; }
virtual int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment);
diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp
new file mode 100644
index 0000000..51e68b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -0,0 +1,83 @@
+//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMELFWriterInfo.h"
+#include "ARMRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the ARMELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM)
+ : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+ TM.getTargetData()->isLittleEndian()) {
+}
+
+ARMELFWriterInfo::~ARMELFWriterInfo() {}
+
+unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ switch (MachineRelTy) {
+ case ARM::reloc_arm_absolute:
+ case ARM::reloc_arm_relative:
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ case ARM::reloc_arm_machine_cp_entry:
+ case ARM::reloc_arm_jt_base:
+ case ARM::reloc_arm_pic_jt:
+ assert(0 && "unsupported ARM relocation type"); break;
+
+ case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break;
+ case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; break;
+ case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; break;
+ default:
+ llvm_unreachable("unknown ARM relocation type"); break;
+ }
+ return 0;
+}
+
+long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented");
+ return 0;
+}
+
+unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented");
+ return 0;
+}
+
+bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented");
+ return 1;
+}
+
+unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ assert(0 &&
+ "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+ return 0;
+}
+
+long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+ assert(0 &&
+ "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h
new file mode 100644
index 0000000..1c4e532
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_ELF_WRITER_INFO_H
+#define ARM_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class ARMELFWriterInfo : public TargetELFWriterInfo {
+ public:
+ ARMELFWriterInfo(TargetMachine &TM);
+ virtual ~ARMELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// hasRelocationAddend - True if the target uses an addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const { return false; }
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatable field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+
+} // end llvm namespace
+
+#endif // ARM_ELF_WRITER_INFO_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index fc2e3c3..bd753d2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -7,36 +7,38 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains a pass that expand pseudo instructions into target
+// This file contains a pass that expands pseudo instructions into target
// instructions to allow proper scheduling, if-conversion, and other late
// optimizations. This pass should be run after register allocation but before
-// post- regalloc scheduling pass.
+// the post-regalloc scheduling pass.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-pseudo"
#include "ARM.h"
+#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
using namespace llvm;
namespace {
class ARMExpandPseudo : public MachineFunctionPass {
- // Constants for register spacing in NEON load/store instructions.
- enum NEONRegSpacing {
- SingleSpc,
- EvenDblSpc,
- OddDblSpc
- };
-
public:
static char ID;
ARMExpandPseudo() : MachineFunctionPass(ID) {}
- const TargetInstrInfo *TII;
+ const ARMBaseInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
+ ARMFunctionInfo *AFI;
virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -47,11 +49,16 @@ namespace {
private:
void TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
+ bool ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
bool ExpandMBB(MachineBasicBlock &MBB);
- void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc,
- bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
- void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
- bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
+ void ExpandVLD(MachineBasicBlock::iterator &MBBI);
+ void ExpandVST(MachineBasicBlock::iterator &MBBI);
+ void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
+ void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool IsExt, unsigned NumRegs);
+ void ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
};
char ARMExpandPseudo::ID = 0;
}
@@ -67,44 +74,349 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
const MachineOperand &MO = OldMI.getOperand(i);
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
- UseMI.addReg(MO.getReg(), getKillRegState(MO.isKill()));
+ UseMI.addOperand(MO);
else
- DefMI.addReg(MO.getReg(),
- getDefRegState(true) | getDeadRegState(MO.isDead()));
+ DefMI.addOperand(MO);
+ }
+}
+
+namespace {
+ // Constants for register spacing in NEON load/store instructions.
+ // For quad-register load-lane and store-lane pseudo instructors, the
+ // spacing is initially assumed to be EvenDblSpc, and that is changed to
+ // OddDblSpc depending on the lane number operand.
+ enum NEONRegSpacing {
+ SingleSpc,
+ EvenDblSpc,
+ OddDblSpc
+ };
+
+ // Entries for NEON load/store information table. The table is sorted by
+ // PseudoOpc for fast binary-search lookups.
+ struct NEONLdStTableEntry {
+ unsigned PseudoOpc;
+ unsigned RealOpc;
+ bool IsLoad;
+ bool HasWriteBack;
+ NEONRegSpacing RegSpacing;
+ unsigned char NumRegs; // D registers loaded or stored
+ unsigned char RegElts; // elements per D register; used for lane ops
+
+ // Comparison methods for binary search of the table.
+ bool operator<(const NEONLdStTableEntry &TE) const {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
+ return TE.PseudoOpc < PseudoOpc;
+ }
+ friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
+ const NEONLdStTableEntry &TE) {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ };
+}
+
+static const NEONLdStTableEntry NEONLdStTable[] = {
+{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, SingleSpc, 2, 4},
+{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, SingleSpc, 2, 4},
+{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, SingleSpc, 2, 2},
+{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, SingleSpc, 2, 2},
+{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, SingleSpc, 2, 8},
+{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, SingleSpc, 2, 8},
+
+{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 },
+{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 },
+
+{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 },
+{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 },
+{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 },
+{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 },
+
+{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 },
+{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 },
+{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 },
+
+{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, SingleSpc, 2, 4},
+{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, SingleSpc, 2, 4},
+{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, SingleSpc, 2, 2},
+{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, SingleSpc, 2, 2},
+{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, SingleSpc, 2, 8},
+{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, SingleSpc, 2, 8},
+
+{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 },
+{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 },
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 },
+
+{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 },
+
+{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 },
+
+{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8},
+{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8},
+
+{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 },
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 },
+{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 },
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 },
+{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 },
+{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 },
+{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 },
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 },
+
+{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 },
+{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 },
+{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 },
+{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 },
+{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 },
+{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 },
+
+{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, OddDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 },
+{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, OddDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 },
+{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, OddDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 },
+
+{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, SingleSpc, 4, 4},
+{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, SingleSpc, 4, 4},
+{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, SingleSpc, 4, 2},
+{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, SingleSpc, 4, 2},
+{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, SingleSpc, 4, 8},
+{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, SingleSpc, 4, 8},
+
+{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 },
+{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 },
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 },
+
+{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 },
+
+{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, OddDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 },
+{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, OddDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 },
+{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, OddDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 },
+
+{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 },
+{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 },
+
+{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 },
+{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 },
+{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 },
+{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 },
+
+{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 },
+{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 },
+{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 },
+
+{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 },
+{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2},
+
+{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 },
+
+{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 },
+
+{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 },
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 },
+{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 },
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 },
+{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 },
+{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 },
+{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2},
+
+{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 },
+{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 },
+{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 },
+{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 },
+{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 },
+{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 },
+
+{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, OddDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 },
+{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, OddDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 },
+{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, OddDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 },
+
+{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 },
+{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2},
+
+{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 },
+
+{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, OddDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 },
+{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, OddDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 },
+{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, OddDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 }
+};
+
+/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
+/// load or store pseudo instruction.
+static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
+ unsigned NumEntries = array_lengthof(NEONLdStTable);
+
+#ifndef NDEBUG
+ // Make sure the table is sorted.
+ static bool TableChecked = false;
+ if (!TableChecked) {
+ for (unsigned i = 0; i != NumEntries-1; ++i)
+ assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
+ "NEONLdStTable is not sorted!");
+ TableChecked = true;
+ }
+#endif
+
+ const NEONLdStTableEntry *I =
+ std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
+ if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+ return I;
+ return NULL;
+}
+
+/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
+/// corresponding to the specified register spacing. Not all of the results
+/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
+static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
+ const TargetRegisterInfo *TRI, unsigned &D0,
+ unsigned &D1, unsigned &D2, unsigned &D3) {
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_4);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing");
+ D0 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_3);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_5);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_7);
}
}
/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
/// operands to real VLD instructions with D register operands.
-void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool hasWriteBack,
- NEONRegSpacing RegSpc, unsigned NumRegs) {
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
bool DstIsDead = MI.getOperand(OpIdx).isDead();
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
- if (RegSpc == SingleSpc) {
- D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
- D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
- D2 = TRI->getSubReg(DstReg, ARM::dsub_2);
- D3 = TRI->getSubReg(DstReg, ARM::dsub_3);
- } else if (RegSpc == EvenDblSpc) {
- D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
- D1 = TRI->getSubReg(DstReg, ARM::dsub_2);
- D2 = TRI->getSubReg(DstReg, ARM::dsub_4);
- D3 = TRI->getSubReg(DstReg, ARM::dsub_6);
- } else {
- assert(RegSpc == OddDblSpc && "unknown register spacing for VLD");
- D0 = TRI->getSubReg(DstReg, ARM::dsub_1);
- D1 = TRI->getSubReg(DstReg, ARM::dsub_3);
- D2 = TRI->getSubReg(DstReg, ARM::dsub_5);
- D3 = TRI->getSubReg(DstReg, ARM::dsub_7);
- }
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
if (NumRegs > 2)
@@ -112,107 +424,373 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
if (NumRegs > 3)
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
- if (hasWriteBack) {
- bool WBIsDead = MI.getOperand(OpIdx).isDead();
- unsigned WBReg = MI.getOperand(OpIdx++).getReg();
- MIB.addReg(WBReg, RegState::Define | getDeadRegState(WBIsDead));
- }
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
// Copy the addrmode6 operands.
- bool AddrIsKill = MI.getOperand(OpIdx).isKill();
- MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
- MIB.addImm(MI.getOperand(OpIdx++).getImm());
- if (hasWriteBack) {
- // Copy the am6offset operand.
- bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
- MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
- }
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
- MIB = AddDefaultPred(MIB);
- TransferImpOps(MI, MIB, MIB);
- // For an instruction writing the odd subregs, add an implicit use of the
- // super-register because the even subregs were loaded separately.
- if (RegSpc == OddDblSpc)
- MIB.addReg(DstReg, RegState::Implicit);
+ // For an instruction writing double-spaced subregs, the pseudo instruction
+ // has an extra operand that is a use of the super-register. Record the
+ // operand index and skip over it.
+ unsigned SrcOpIdx = 0;
+ if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc)
+ SrcOpIdx = OpIdx++;
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the super-register source operand used for double-spaced subregs over
+ // to the new instruction as an implicit operand.
+ if (SrcOpIdx != 0) {
+ MachineOperand MO = MI.getOperand(SrcOpIdx);
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ }
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
}
/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
/// operands to real VST instructions with D register operands.
-void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool hasWriteBack,
- NEONRegSpacing RegSpc, unsigned NumRegs) {
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
- if (hasWriteBack) {
- bool DstIsDead = MI.getOperand(OpIdx).isDead();
- unsigned DstReg = MI.getOperand(OpIdx++).getReg();
- MIB.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
- }
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
// Copy the addrmode6 operands.
- bool AddrIsKill = MI.getOperand(OpIdx).isKill();
- MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
- MIB.addImm(MI.getOperand(OpIdx++).getImm());
- if (hasWriteBack) {
- // Copy the am6offset operand.
- bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
- MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
- }
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
- unsigned SrcReg = MI.getOperand(OpIdx).getReg();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
- if (RegSpc == SingleSpc) {
- D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
- D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
- D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
- D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
- } else if (RegSpc == EvenDblSpc) {
- D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
- D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
- D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
- D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
- } else {
- assert(RegSpc == OddDblSpc && "unknown register spacing for VST");
- D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
- D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
- D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
- D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
- }
-
+ GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0).addReg(D1);
if (NumRegs > 2)
MIB.addReg(D2);
if (NumRegs > 3)
MIB.addReg(D3);
- MIB = AddDefaultPred(MIB);
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ if (SrcIsKill)
+ // Add an implicit kill for the super-reg.
+ (*MIB).addRegisterKilled(SrcReg, TRI, true);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
+/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+ unsigned RegElts = TableEntry->RegElts;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+ // The lane operand is always the 3rd from last operand, before the 2
+ // predicate operands.
+ unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
+
+ // Adjust the lane and spacing as needed for Q registers.
+ assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
+ if (RegSpc == EvenDblSpc && Lane >= RegElts) {
+ RegSpc = OddDblSpc;
+ Lane -= RegElts;
+ }
+ assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
+
+ unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
+ unsigned DstReg = 0;
+ bool DstIsDead = false;
+ if (TableEntry->IsLoad) {
+ DstIsDead = MI.getOperand(OpIdx).isDead();
+ DstReg = MI.getOperand(OpIdx++).getReg();
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 1)
+ MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+ }
+
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Grab the super-register source.
+ MachineOperand MO = MI.getOperand(OpIdx++);
+ if (!TableEntry->IsLoad)
+ GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
+
+ // Add the subregs as sources of the new instruction.
+ unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
+ getKillRegState(MO.isKill()));
+ MIB.addReg(D0, SrcFlags);
+ if (NumRegs > 1)
+ MIB.addReg(D1, SrcFlags);
+ if (NumRegs > 2)
+ MIB.addReg(D2, SrcFlags);
+ if (NumRegs > 3)
+ MIB.addReg(D3, SrcFlags);
+
+ // Add the lane number operand.
+ MIB.addImm(Lane);
+ OpIdx += 1;
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the super-register source to be an implicit source.
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ if (TableEntry->IsLoad)
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
+/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool IsExt, unsigned NumRegs) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+
+ // Transfer the destination register operand.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ if (IsExt)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0).addReg(D1);
+ if (NumRegs > 2)
+ MIB.addReg(D2);
+ if (NumRegs > 3)
+ MIB.addReg(D3);
+
+ // Copy the other source register operand.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
if (SrcIsKill)
// Add an implicit kill for the super-reg.
(*MIB).addRegisterKilled(SrcReg, TRI, true);
+ TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
}
-bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
- bool Modified = false;
+void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
+ const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
+ MachineInstrBuilder LO16, HI16;
- MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- while (MBBI != E) {
- MachineInstr &MI = *MBBI;
- MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+ if (!STI->hasV6T2Ops() &&
+ (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
+ // Expand into a movi + orr.
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
+ unsigned ImmVal = (unsigned)MO.getImm();
+ unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
+ unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+ LO16 = LO16.addImm(SOImmValV1);
+ HI16 = HI16.addImm(SOImmValV2);
+ (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg).addReg(0);
+ HI16.addImm(Pred).addReg(PredReg).addReg(0);
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+ return;
+ }
+
+ unsigned LO16Opc = 0;
+ unsigned HI16Opc = 0;
+ if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
+ LO16Opc = ARM::t2MOVi16;
+ HI16Opc = ARM::t2MOVTi16;
+ } else {
+ LO16Opc = ARM::MOVi16;
+ HI16Opc = ARM::MOVTi16;
+ }
- bool ModifiedOp = true;
- unsigned Opcode = MI.getOpcode();
- switch (Opcode) {
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ if (MO.isImm()) {
+ unsigned Imm = MO.getImm();
+ unsigned Lo16 = Imm & 0xffff;
+ unsigned Hi16 = (Imm >> 16) & 0xffff;
+ LO16 = LO16.addImm(Lo16);
+ HI16 = HI16.addImm(Hi16);
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+ HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+ }
+
+ (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg);
+ HI16.addImm(Pred).addReg(PredReg);
+
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+}
+
+bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
default:
- ModifiedOp = false;
- break;
+ return false;
+ case ARM::Int_eh_sjlj_dispatchsetup: {
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const ARMBaseInstrInfo *AII =
+ static_cast<const ARMBaseInstrInfo*>(TII);
+ const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+ // For functions using a base pointer, we rematerialize it (via the frame
+ // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
+ // for us. Otherwise, expand to nothing.
+ if (RI.hasBasePointer(MF)) {
+ int32_t NumBytes = AFI->getFramePtrSpillOffset();
+ unsigned FramePtr = RI.getFrameRegister(MF);
+ assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "base pointer without frame pointer?");
+
+ if (AFI->isThumb2Function()) {
+ llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
+ } else if (AFI->isThumbFunction()) {
+ llvm::emitThumbRegPlusImmediate(MBB, MBBI, ARM::R6,
+ FramePtr, -NumBytes,
+ *TII, RI, MI.getDebugLoc());
+ } else {
+ llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, ARMCC::AL, 0,
+ *TII);
+ }
+ // If there's dynamic realignment, adjust for it.
+ if (RI.needsStackRealignment(MF)) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert (!AFI->isThumb1OnlyFunction());
+ // Emit bic r6, r6, MaxAlign
+ unsigned bicOpc = AFI->isThumbFunction() ?
+ ARM::t2BICri : ARM::BICri;
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(bicOpc), ARM::R6)
+ .addReg(ARM::R6, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ }
+
+ }
+ MI.eraseFromParent();
+ return true;
+ }
- case ARM::tLDRpci_pic:
+ case ARM::MOVsrl_flag:
+ case ARM::MOVsra_flag: {
+ // These are just fancy MOVs insructions.
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1))
+ .addReg(0)
+ .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr
+ : ARM_AM::asr), 1)))
+ .addReg(ARM::CPSR, RegState::Define);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::RRX: {
+ // This encodes as "MOVs Rd, Rm, rrx
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
+ .addReg(0);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::TPsoft: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::BL))
+ .addExternalSymbol("__aeabi_read_tp", 0);
+
+ (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
? ARM::tLDRpci : ARM::t2LDRpci;
@@ -225,54 +803,73 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
(*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::tPICADD))
- .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg)
.addOperand(MI.getOperand(2));
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
- break;
+ return true;
}
- case ARM::MOVi32imm:
- case ARM::t2MOVi32imm: {
- unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+ case ARM::MOV_ga_dyn:
+ case ARM::MOV_ga_pcrel:
+ case ARM::MOV_ga_pcrel_ldr:
+ case ARM::t2MOV_ga_dyn:
+ case ARM::t2MOV_ga_pcrel: {
+ // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
+ unsigned LabelId = AFI->createPICLabelUId();
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
- const MachineOperand &MO = MI.getOperand(1);
- MachineInstrBuilder LO16, HI16;
-
- LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(Opcode == ARM::MOVi32imm ?
- ARM::MOVi16 : ARM::t2MOVi16),
- DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(Opcode == ARM::MOVi32imm ?
- ARM::MOVTi16 : ARM::t2MOVTi16))
- .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(DstReg);
-
- if (MO.isImm()) {
- unsigned Imm = MO.getImm();
- unsigned Lo16 = Imm & 0xffff;
- unsigned Hi16 = (Imm >> 16) & 0xffff;
- LO16 = LO16.addImm(Lo16);
- HI16 = HI16.addImm(Hi16);
- } else {
- const GlobalValue *GV = MO.getGlobal();
- unsigned TF = MO.getTargetFlags();
- LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
- HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+ const MachineOperand &MO1 = MI.getOperand(1);
+ const GlobalValue *GV = MO1.getGlobal();
+ unsigned TF = MO1.getTargetFlags();
+ bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
+ bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
+ unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
+ unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel;
+ unsigned LO16TF = isPIC
+ ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY;
+ unsigned HI16TF = isPIC
+ ? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY;
+ unsigned PICAddOpc = isARM
+ ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
+ : ARM::tPICADD;
+ MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(LO16Opc), DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
+ .addImm(LabelId);
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(HI16Opc), DstReg)
+ .addReg(DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
+ .addImm(LabelId);
+ if (!isPIC) {
+ TransferImpOps(MI, MIB1, MIB2);
+ MI.eraseFromParent();
+ return true;
}
- (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- LO16.addImm(Pred).addReg(PredReg);
- HI16.addImm(Pred).addReg(PredReg);
- TransferImpOps(MI, LO16, HI16);
+
+ MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(PICAddOpc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg).addImm(LabelId);
+ if (isARM) {
+ AddDefaultPred(MIB3);
+ if (Opcode == ARM::MOV_ga_pcrel_ldr)
+ (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ }
+ TransferImpOps(MI, MIB1, MIB3);
MI.eraseFromParent();
- break;
+ return true;
}
+ case ARM::MOVi32imm:
+ case ARM::MOVCCi32imm:
+ case ARM::t2MOVi32imm:
+ case ARM::t2MOVCCi32imm:
+ ExpandMOV32BitImm(MBB, MBBI);
+ return true;
+
case ARM::VMOVQQ: {
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
@@ -285,222 +882,339 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineInstrBuilder Even =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::VMOVQ))
- .addReg(EvenDst,
- getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+ .addReg(EvenDst,
+ RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(EvenSrc, getKillRegState(SrcIsKill)));
MachineInstrBuilder Odd =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::VMOVQ))
- .addReg(OddDst,
- getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(OddSrc, getKillRegState(SrcIsKill)));
+ .addReg(OddDst,
+ RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(OddSrc, getKillRegState(SrcIsKill)));
TransferImpOps(MI, Even, Odd);
MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VLDMQIA:
+ case ARM::VLDMQDB: {
+ unsigned NewOpc = (Opcode == ARM::VLDMQIA) ? ARM::VLDMDIA : ARM::VLDMDDB;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+
+ // Grab the Q register destination.
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+
+ // Copy the source register.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add the destination operands (D subregs).
+ unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ case ARM::VSTMQIA:
+ case ARM::VSTMQDB: {
+ unsigned NewOpc = (Opcode == ARM::VSTMQIA) ? ARM::VSTMDIA : ARM::VSTMDDB;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+
+ // Grab the Q register source.
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+
+ // Copy the destination register.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Add the source operands (D subregs).
+ unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ MIB.addReg(D0).addReg(D1);
+
+ if (SrcIsKill)
+ // Add an implicit kill for the Q register.
+ (*MIB).addRegisterKilled(SrcReg, TRI, true);
+
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::VDUPfqf:
+ case ARM::VDUPfdf:{
+ unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLNfq : ARM::VDUPLNfd;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+ unsigned OpIdx = 0;
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
+ unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
+ Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
+ // The lane is [0,1] for the containing DReg superregister.
+ // Copy the dst/src register operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addReg(DReg);
+ ++OpIdx;
+ // Add the lane select operand.
+ MIB.addImm(Lane);
+ // Add the predicate operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
}
case ARM::VLD1q8Pseudo:
- ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break;
case ARM::VLD1q16Pseudo:
- ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break;
case ARM::VLD1q32Pseudo:
- ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break;
case ARM::VLD1q64Pseudo:
- ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break;
case ARM::VLD1q8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break;
case ARM::VLD1q16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break;
case ARM::VLD1q32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break;
case ARM::VLD1q64Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break;
-
case ARM::VLD2d8Pseudo:
- ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break;
case ARM::VLD2d16Pseudo:
- ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break;
case ARM::VLD2d32Pseudo:
- ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break;
case ARM::VLD2q8Pseudo:
- ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break;
case ARM::VLD2q16Pseudo:
- ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break;
case ARM::VLD2q32Pseudo:
- ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break;
case ARM::VLD2d8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break;
case ARM::VLD2d16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break;
case ARM::VLD2d32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break;
case ARM::VLD2q8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break;
case ARM::VLD2q16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break;
case ARM::VLD2q32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break;
-
case ARM::VLD3d8Pseudo:
- ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break;
case ARM::VLD3d16Pseudo:
- ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break;
case ARM::VLD3d32Pseudo:
- ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break;
case ARM::VLD1d64TPseudo:
- ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break;
case ARM::VLD3d8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break;
case ARM::VLD3d16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break;
case ARM::VLD3d32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break;
case ARM::VLD1d64TPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break;
case ARM::VLD3q8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break;
case ARM::VLD3q16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break;
case ARM::VLD3q32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
case ARM::VLD3q8oddPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break;
case ARM::VLD3q16oddPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break;
case ARM::VLD3q32oddPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break;
-
case ARM::VLD4d8Pseudo:
- ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break;
case ARM::VLD4d16Pseudo:
- ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break;
case ARM::VLD4d32Pseudo:
- ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break;
case ARM::VLD1d64QPseudo:
- ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break;
case ARM::VLD4d8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break;
case ARM::VLD4d16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break;
case ARM::VLD4d32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break;
case ARM::VLD1d64QPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break;
case ARM::VLD4q8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break;
case ARM::VLD4q16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break;
case ARM::VLD4q32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
case ARM::VLD4q8oddPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break;
case ARM::VLD4q16oddPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break;
case ARM::VLD4q32oddPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break;
+ case ARM::VLD1DUPq8Pseudo:
+ case ARM::VLD1DUPq16Pseudo:
+ case ARM::VLD1DUPq32Pseudo:
+ case ARM::VLD1DUPq8Pseudo_UPD:
+ case ARM::VLD1DUPq16Pseudo_UPD:
+ case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD2DUPd8Pseudo:
+ case ARM::VLD2DUPd16Pseudo:
+ case ARM::VLD2DUPd32Pseudo:
+ case ARM::VLD2DUPd8Pseudo_UPD:
+ case ARM::VLD2DUPd16Pseudo_UPD:
+ case ARM::VLD2DUPd32Pseudo_UPD:
+ case ARM::VLD3DUPd8Pseudo:
+ case ARM::VLD3DUPd16Pseudo:
+ case ARM::VLD3DUPd32Pseudo:
+ case ARM::VLD3DUPd8Pseudo_UPD:
+ case ARM::VLD3DUPd16Pseudo_UPD:
+ case ARM::VLD3DUPd32Pseudo_UPD:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ ExpandVLD(MBBI);
+ return true;
case ARM::VST1q8Pseudo:
- ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
case ARM::VST1q16Pseudo:
- ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break;
case ARM::VST1q32Pseudo:
- ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break;
case ARM::VST1q64Pseudo:
- ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break;
case ARM::VST1q8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break;
case ARM::VST1q16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break;
case ARM::VST1q32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break;
case ARM::VST1q64Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break;
-
case ARM::VST2d8Pseudo:
- ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break;
case ARM::VST2d16Pseudo:
- ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break;
case ARM::VST2d32Pseudo:
- ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break;
case ARM::VST2q8Pseudo:
- ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break;
case ARM::VST2q16Pseudo:
- ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break;
case ARM::VST2q32Pseudo:
- ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break;
case ARM::VST2d8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break;
case ARM::VST2d16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break;
case ARM::VST2d32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break;
case ARM::VST2q8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break;
case ARM::VST2q16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break;
case ARM::VST2q32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break;
-
case ARM::VST3d8Pseudo:
- ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
case ARM::VST3d16Pseudo:
- ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break;
case ARM::VST3d32Pseudo:
- ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break;
case ARM::VST1d64TPseudo:
- ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break;
case ARM::VST3d8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break;
case ARM::VST3d16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break;
case ARM::VST3d32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break;
case ARM::VST1d64TPseudo_UPD:
- ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break;
case ARM::VST3q8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break;
case ARM::VST3q16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break;
case ARM::VST3q32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q8oddPseudo:
+ case ARM::VST3q16oddPseudo:
+ case ARM::VST3q32oddPseudo:
case ARM::VST3q8oddPseudo_UPD:
- ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break;
case ARM::VST3q16oddPseudo_UPD:
- ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break;
case ARM::VST3q32oddPseudo_UPD:
- ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break;
-
case ARM::VST4d8Pseudo:
- ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break;
case ARM::VST4d16Pseudo:
- ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break;
case ARM::VST4d32Pseudo:
- ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break;
case ARM::VST1d64QPseudo:
- ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break;
case ARM::VST4d8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break;
case ARM::VST4d16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break;
case ARM::VST4d32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break;
case ARM::VST1d64QPseudo_UPD:
- ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break;
case ARM::VST4q8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break;
case ARM::VST4q16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break;
case ARM::VST4q32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q8oddPseudo:
+ case ARM::VST4q16oddPseudo:
+ case ARM::VST4q32oddPseudo:
case ARM::VST4q8oddPseudo_UPD:
- ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break;
case ARM::VST4q16oddPseudo_UPD:
- ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break;
case ARM::VST4q32oddPseudo_UPD:
- ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break;
- }
+ ExpandVST(MBBI);
+ return true;
+
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD3LNd8Pseudo:
+ case ARM::VLD3LNd16Pseudo:
+ case ARM::VLD3LNd32Pseudo:
+ case ARM::VLD3LNq16Pseudo:
+ case ARM::VLD3LNq32Pseudo:
+ case ARM::VLD3LNd8Pseudo_UPD:
+ case ARM::VLD3LNd16Pseudo_UPD:
+ case ARM::VLD3LNd32Pseudo_UPD:
+ case ARM::VLD3LNq16Pseudo_UPD:
+ case ARM::VLD3LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ case ARM::VST1LNq8Pseudo:
+ case ARM::VST1LNq16Pseudo:
+ case ARM::VST1LNq32Pseudo:
+ case ARM::VST1LNq8Pseudo_UPD:
+ case ARM::VST1LNq16Pseudo_UPD:
+ case ARM::VST1LNq32Pseudo_UPD:
+ case ARM::VST2LNd8Pseudo:
+ case ARM::VST2LNd16Pseudo:
+ case ARM::VST2LNd32Pseudo:
+ case ARM::VST2LNq16Pseudo:
+ case ARM::VST2LNq32Pseudo:
+ case ARM::VST2LNd8Pseudo_UPD:
+ case ARM::VST2LNd16Pseudo_UPD:
+ case ARM::VST2LNd32Pseudo_UPD:
+ case ARM::VST2LNq16Pseudo_UPD:
+ case ARM::VST2LNq32Pseudo_UPD:
+ case ARM::VST3LNd8Pseudo:
+ case ARM::VST3LNd16Pseudo:
+ case ARM::VST3LNd32Pseudo:
+ case ARM::VST3LNq16Pseudo:
+ case ARM::VST3LNq32Pseudo:
+ case ARM::VST3LNd8Pseudo_UPD:
+ case ARM::VST3LNd16Pseudo_UPD:
+ case ARM::VST3LNd32Pseudo_UPD:
+ case ARM::VST3LNq16Pseudo_UPD:
+ case ARM::VST3LNq32Pseudo_UPD:
+ case ARM::VST4LNd8Pseudo:
+ case ARM::VST4LNd16Pseudo:
+ case ARM::VST4LNd32Pseudo:
+ case ARM::VST4LNq16Pseudo:
+ case ARM::VST4LNq32Pseudo:
+ case ARM::VST4LNd8Pseudo_UPD:
+ case ARM::VST4LNd16Pseudo_UPD:
+ case ARM::VST4LNd32Pseudo_UPD:
+ case ARM::VST4LNq16Pseudo_UPD:
+ case ARM::VST4LNq32Pseudo_UPD:
+ ExpandLaneOp(MBBI);
+ return true;
+
+ case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
+ case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
+ case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
+ case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
+ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
+ case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+ }
+
+ return false;
+}
- if (ModifiedOp)
- Modified = true;
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+ Modified |= ExpandMI(MBB, MBBI);
MBBI = NMBBI;
}
@@ -508,8 +1222,11 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
}
bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- TII = MF.getTarget().getInstrInfo();
- TRI = MF.getTarget().getRegisterInfo();
+ const TargetMachine &TM = MF.getTarget();
+ TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
+ STI = &TM.getSubtarget<ARMSubtarget>();
+ AFI = MF.getInfo<ARMFunctionInfo>();
bool Modified = false;
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 4892eae..9f29530 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -15,14 +15,17 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMCallingConv.h"
#include "ARMRegisterInfo.h"
#include "ARMTargetMachine.h"
#include "ARMSubtarget.h"
+#include "ARMConstantPoolValue.h"
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -30,7 +33,9 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -43,12 +48,37 @@
using namespace llvm;
static cl::opt<bool>
-EnableARMFastISel("arm-fast-isel",
- cl::desc("Turn on experimental ARM fast-isel support"),
- cl::init(false), cl::Hidden);
+DisableARMFastISel("disable-arm-fast-isel",
+ cl::desc("Turn off experimental ARM fast-isel support"),
+ cl::init(false), cl::Hidden);
+
+extern cl::opt<bool> EnableARMLongCalls;
namespace {
+ // All possible address modes, plus some.
+ typedef struct Address {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
+
+ int Offset;
+ unsigned Scale;
+ unsigned PlusReg;
+
+ // Innocuous defaults for our address.
+ Address()
+ : BaseType(RegBase), Offset(0), Scale(0), PlusReg(0) {
+ Base.Reg = 0;
+ }
+ } Address;
+
class ARMFastISel : public FastISel {
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
@@ -57,13 +87,14 @@ class ARMFastISel : public FastISel {
const TargetMachine &TM;
const TargetInstrInfo &TII;
const TargetLowering &TLI;
- const ARMFunctionInfo *AFI;
+ ARMFunctionInfo *AFI;
- // Convenience variable to avoid checking all the time.
+ // Convenience variables to avoid some queries.
bool isThumb;
+ LLVMContext *Context;
public:
- explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
: FastISel(funcInfo),
TM(funcInfo.MF->getTarget()),
TII(*TM.getInstrInfo()),
@@ -71,6 +102,7 @@ class ARMFastISel : public FastISel {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
isThumb = AFI->isThumbFunction();
+ Context = &funcInfo.Fn->getContext();
}
// Code from FastISel.cpp.
@@ -102,36 +134,73 @@ class ARMFastISel : public FastISel {
virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
unsigned Op0, bool Op0IsKill,
uint32_t Idx);
-
+
// Backend specific FastISel code.
virtual bool TargetSelectInstruction(const Instruction *I);
virtual unsigned TargetMaterializeConstant(const Constant *C);
+ virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
#include "ARMGenFastISel.inc"
-
+
// Instruction selection routines.
- virtual bool ARMSelectLoad(const Instruction *I);
- virtual bool ARMSelectStore(const Instruction *I);
- virtual bool ARMSelectBranch(const Instruction *I);
+ private:
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectCmp(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectSIToFP(const Instruction *I);
+ bool SelectFPToSI(const Instruction *I);
+ bool SelectSDiv(const Instruction *I);
+ bool SelectSRem(const Instruction *I);
+ bool SelectCall(const Instruction *I);
+ bool SelectSelect(const Instruction *I);
+ bool SelectRet(const Instruction *I);
// Utility routines.
private:
- bool isTypeLegal(const Type *Ty, EVT &VT);
- bool isLoadTypeLegal(const Type *Ty, EVT &VT);
- bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset);
- bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset);
- bool ARMLoadAlloca(const Instruction *I);
- bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg);
- bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset);
- bool ARMMaterializeConstant(const ConstantInt *Val, unsigned &Reg);
-
+ bool isTypeLegal(const Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(const Type *Ty, MVT &VT);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+ bool ARMComputeAddress(const Value *Obj, Address &Addr);
+ void ARMSimplifyAddress(Address &Addr, EVT VT);
+ unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
+ unsigned ARMMaterializeInt(const Constant *C, EVT VT);
+ unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
+ unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
+ unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+
+ // Call handling routines.
+ private:
+ bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+ unsigned &ResultReg);
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
+ bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes);
+ bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes);
+ bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
+
+ // OptionalDef handling routines.
+ private:
bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+ void AddLoadStoreOperands(EVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB);
};
} // end anonymous namespace
-// #include "ARMGenCallingConv.inc"
+#include "ARMGenCallingConv.inc"
// DefinesOptionalPredicate - This is different from DefinesPredicate in that
// we don't care about implicit defs here, just places we'll need to add a
@@ -153,6 +222,9 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
// If the machine is predicable go ahead and add the predicate operands, if
// it needs default CC operands add those.
+// TODO: If we want to support thumb1 then we'll need to deal with optional
+// CPSR defs that need to be added before the remaining operands. See s_cc_out
+// for descriptions why.
const MachineInstrBuilder &
ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
MachineInstr *MI = &*MIB;
@@ -160,7 +232,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
// Do we use a predicate?
if (TII.isPredicable(MI))
AddDefaultPred(MIB);
-
+
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
// defines CPSR. All other OptionalDefines in ARM are the CCR register.
bool CPSR = false;
@@ -297,7 +369,7 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
-
+
if (II.getNumDefs() >= 1)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addImm(Imm));
@@ -323,16 +395,84 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
return ResultReg;
}
-unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
- EVT VT = TLI.getValueType(C->getType(), true);
+// TODO: Don't worry about 64-bit now, but when this is fixed remove the
+// checks from the various callers.
+unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
+ if (VT == MVT::f64) return 0;
- // Only handle simple types.
- if (!VT.isSimple()) return 0;
-
- // TODO: This should be safe for fp because they're just bits from the
- // Constant.
- // TODO: Theoretically we could materialize fp constants with instructions
- // from VFP3.
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
+ if (VT == MVT::i64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+// For double width floating point we need to materialize two constants
+// (the high and the low) into integer registers then use a move to get
+// the combined constant into an FP reg.
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
+ const APFloat Val = CFP->getValueAPF();
+ bool is64bit = VT == MVT::f64;
+
+ // This checks to see if we can use VFP3 instructions to materialize
+ // a constant, otherwise we have to go through the constant pool.
+ if (TLI.isFPImmLegal(Val, VT)) {
+ unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg)
+ .addFPImm(CFP));
+ return DestReg;
+ }
+
+ // Require VFP2 for loading fp constants.
+ if (!Subtarget->hasVFP2()) return false;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(CFP->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
+
+ // The extra reg is for addrmode5.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addReg(0));
+ return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
+
+ // For now 32-bit only.
+ if (VT != MVT::i32) return false;
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
+ // If we can do this in a single instruction without a constant pool entry
+ // do so now.
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) {
+ unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), DestReg)
+ .addImm(CI->getSExtValue()));
+ return DestReg;
+ }
// MachineConstantPool wants an explicit alignment.
unsigned Align = TD.getPrefTypeAlignment(C->getType());
@@ -342,58 +482,144 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
}
unsigned Idx = MCP.getConstantPoolIndex(C, Align);
- unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
- // Different addressing modes between ARM/Thumb2 for constant pool loads.
if (isThumb)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(ARM::t2LDRpci))
- .addReg(DestReg).addConstantPoolIndex(Idx));
+ TII.get(ARM::t2LDRpci), DestReg)
+ .addConstantPoolIndex(Idx));
else
+ // The extra immediate is for addrmode2.
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(ARM::LDRcp))
- .addReg(DestReg).addConstantPoolIndex(Idx)
- .addReg(0).addImm(0));
-
+ TII.get(ARM::LDRcp), DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+
return DestReg;
}
-bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) {
- VT = TLI.getValueType(Ty, true);
-
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
+ // For now 32-bit only.
+ if (VT != MVT::i32) return 0;
+
+ Reloc::Model RelocM = TM.getRelocationModel();
+
+ // TODO: No external globals for now.
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0;
+
+ // TODO: Need more magic for ARM PIC.
+ if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(GV->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(GV->getType());
+ }
+
+ // Grab index.
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ unsigned Id = AFI->createPICLabelUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id,
+ ARMCP::CPValue, PCAdj);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ // Load value.
+ MachineInstrBuilder MIB;
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (isThumb) {
+ unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx);
+ if (RelocM == Reloc::PIC_)
+ MIB.addImm(Id);
+ } else {
+ // The extra immediate is for addrmode2.
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0);
+ }
+ AddOptionalDefs(MIB);
+ return DestReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT VT = TLI.getValueType(C->getType(), true);
+
// Only handle simple types.
- if (VT == MVT::Other || !VT.isSimple()) return false;
-
+ if (!VT.isSimple()) return 0;
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return ARMMaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return ARMMaterializeGV(GV, VT);
+ else if (isa<ConstantInt>(C))
+ return ARMMaterializeInt(C, VT);
+
+ return 0;
+}
+
+unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ MVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return false;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ // This will get lowered later into the correct offsets and registers
+ // via rewriteXFrameIndex.
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(SI->second)
+ .addImm(0));
+ return ResultReg;
+ }
+
+ return 0;
+}
+
+bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) {
+ EVT evt = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (evt == MVT::Other || !evt.isSimple()) return false;
+ VT = evt.getSimpleVT();
+
// Handle all legal types, i.e. a register that will directly hold this
// value.
return TLI.isTypeLegal(VT);
}
-bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) {
+bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) {
if (isTypeLegal(Ty, VT)) return true;
-
+
// If this is a type than can be sign or zero-extended to a basic operation
// go ahead and accept it now.
if (VT == MVT::i8 || VT == MVT::i16)
return true;
-
+
return false;
}
-// Computes the Reg+Offset to get to an object.
-bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg,
- int &Offset) {
+// Computes the address to get to an object.
+bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
// Some boilerplate from the X86 FastISel.
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
- // Don't walk into other basic blocks; it's possible we haven't
- // visited them yet, so the instructions may not yet be assigned
- // virtual registers.
- if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
- return false;
-
- Opcode = I->getOpcode();
- U = I;
+ // Don't walk into other basic blocks unless the object is an alloca from
+ // another block, otherwise it may not have a virtual register assigned.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
Opcode = C->getOpcode();
U = C;
@@ -404,141 +630,282 @@ bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg,
// Fast instruction selection doesn't support the special
// address spaces.
return false;
-
+
switch (Opcode) {
- default:
- //errs() << "Failing Opcode is: " << *Op1 << "\n";
+ default:
break;
+ case Instruction::BitCast: {
+ // Look through bitcasts.
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ }
+ case Instruction::IntToPtr: {
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ break;
+ }
+ case Instruction::PtrToInt: {
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return ARMComputeAddress(U->getOperand(0), Addr);
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ Address SavedAddr = Addr;
+ int TmpOffset = Addr.Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ const Value *Op = *i;
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(Op);
+ do {
+ Op = Worklist.pop_back_val();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ } else if (isa<AddOperator>(Op) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Add the other operand back to the work list.
+ Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
+ } else
+ goto unsupported_gep;
+ } while (!Worklist.empty());
+ }
+ }
+
+ // Try to grab the base operand now.
+ Addr.Offset = TmpOffset;
+ if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
+
+ // We failed, restore everything and try the other options.
+ Addr = SavedAddr;
+
+ unsupported_gep:
+ break;
+ }
case Instruction::Alloca: {
- assert(false && "Alloca should have been handled earlier!");
- return false;
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = SI->second;
+ return true;
+ }
+ break;
}
}
-
+
+ // Materialize the global variable's address into a reg which can
+ // then be used later to load the variable.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
- //errs() << "Failing GV is: " << GV << "\n";
- (void)GV;
- return false;
+ unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
+ if (Tmp == 0) return false;
+
+ Addr.Base.Reg = Tmp;
+ return true;
}
-
+
// Try to get this in a register if nothing else has worked.
- Reg = getRegForValue(Obj);
- if (Reg == 0) return false;
+ if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
+ return Addr.Base.Reg != 0;
+}
+
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
- // Since the offset may be too large for the load instruction
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+
+ bool needsLowering = false;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Unhandled load/store type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // Integer loads/stores handle 12-bit offsets.
+ needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ // Floating point operands handle 8-bit offsets.
+ needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
+ break;
+ }
+
+ // If this is a stack pointer and the offset needs to be simplified then
+ // put the alloca address into a register, set the base type back to
+ // register and continue. This should almost never happen.
+ if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
+ TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass;
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(Addr.Base.FI)
+ .addImm(0));
+ Addr.Base.Reg = ResultReg;
+ Addr.BaseType = Address::RegBase;
+ }
+
+ // Since the offset is too large for the load/store instruction
// get the reg+offset into a register.
- // TODO: Verify the additions work, otherwise we'll need to add the
- // offset instead of 0 to the instructions and do all sorts of operand
- // munging.
- // TODO: Optimize this somewhat.
- if (Offset != 0) {
+ if (needsLowering) {
ARMCC::CondCodes Pred = ARMCC::AL;
unsigned PredReg = 0;
+ TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass;
+ unsigned BaseReg = createResultReg(RC);
+
if (!isThumb)
emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- Reg, Reg, Offset, Pred, PredReg,
+ BaseReg, Addr.Base.Reg, Addr.Offset,
+ Pred, PredReg,
static_cast<const ARMBaseInstrInfo&>(TII));
else {
assert(AFI->isThumb2Function());
emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- Reg, Reg, Offset, Pred, PredReg,
+ BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg,
static_cast<const ARMBaseInstrInfo&>(TII));
}
+ Addr.Offset = 0;
+ Addr.Base.Reg = BaseReg;
}
-
- return true;
}
-bool ARMFastISel::ARMLoadAlloca(const Instruction *I) {
- Value *Op0 = I->getOperand(0);
+void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
+ const MachineInstrBuilder &MIB) {
+ // addrmode5 output depends on the selection dag addressing dividing the
+ // offset by 4 that it then later multiplies. Do this here as well.
+ if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
+ VT.getSimpleVT().SimpleTy == MVT::f64)
+ Addr.Offset /= 4;
+
+ // Frame base works a bit differently. Handle it separately.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ int FI = Addr.Base.FI;
+ int Offset = Addr.Offset;
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI, Offset),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ // Now add the rest of the operands.
+ MIB.addFrameIndex(FI);
- // Verify it's an alloca.
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) {
- DenseMap<const AllocaInst*, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
-
- if (SI != FuncInfo.StaticAllocaMap.end()) {
- TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
- unsigned ResultReg = createResultReg(RC);
- TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
- ResultReg, SI->second, RC,
- TM.getRegisterInfo());
- UpdateValueMap(I, ResultReg);
- return true;
- }
+ // ARM halfword load/stores need an additional operand.
+ if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+ MIB.addImm(Addr.Offset);
+ MIB.addMemOperand(MMO);
+ } else {
+ // Now add the rest of the operands.
+ MIB.addReg(Addr.Base.Reg);
+
+ // ARM halfword load/stores need an additional operand.
+ if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+ MIB.addImm(Addr.Offset);
}
- return false;
+ AddOptionalDefs(MIB);
}
-bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg,
- unsigned Reg, int Offset) {
-
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
+
assert(VT.isSimple() && "Non-simple types are invalid here!");
unsigned Opc;
-
+ TargetRegisterClass *RC;
switch (VT.getSimpleVT().SimpleTy) {
- default:
- assert(false && "Trying to emit for an unhandled type!");
- return false;
+ // This is mostly going to be Neon/vector support.
+ default: return false;
case MVT::i16:
- Opc = isThumb ? ARM::tLDRH : ARM::LDRH;
- VT = MVT::i32;
+ Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+ RC = ARM::GPRRegisterClass;
break;
case MVT::i8:
- Opc = isThumb ? ARM::tLDRB : ARM::LDRB;
- VT = MVT::i32;
+ Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
+ RC = ARM::GPRRegisterClass;
break;
case MVT::i32:
- Opc = isThumb ? ARM::tLDR : ARM::LDR;
+ Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::f32:
+ Opc = ARM::VLDRS;
+ RC = TLI.getRegClassFor(VT);
+ break;
+ case MVT::f64:
+ Opc = ARM::VLDRD;
+ RC = TLI.getRegClassFor(VT);
break;
}
-
- ResultReg = createResultReg(TLI.getRegClassFor(VT));
-
- // TODO: Fix the Addressing modes so that these can share some code.
- // Since this is a Thumb1 load this will work in Thumb1 or 2 mode.
- if (isThumb)
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Opc), ResultReg)
- .addReg(Reg).addImm(Offset).addReg(0));
- else
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Opc), ResultReg)
- .addReg(Reg).addReg(0).addImm(Offset));
-
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT);
+
+ // Create the base instruction, then add the operands.
+ ResultReg = createResultReg(RC);
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg);
+ AddLoadStoreOperands(VT, Addr, MIB);
return true;
}
-bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg) {
- Value *Op1 = I->getOperand(1);
+bool ARMFastISel::SelectLoad(const Instruction *I) {
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
- // Verify it's an alloca.
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
- DenseMap<const AllocaInst*, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
- if (SI != FuncInfo.StaticAllocaMap.end()) {
- TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
- assert(SrcReg != 0 && "Nothing to store!");
- TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
- SrcReg, true /*isKill*/, SI->second, RC,
- TM.getRegisterInfo());
- return true;
- }
- }
- return false;
+ unsigned ResultReg;
+ if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
}
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
- unsigned DstReg, int Offset) {
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
unsigned StrOpc;
switch (VT.getSimpleVT().SimpleTy) {
+ // This is mostly going to be Neon/vector support.
default: return false;
- case MVT::i1:
- case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break;
- case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break;
- case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break;
+ case MVT::i1: {
+ unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass);
+ unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), Res)
+ .addReg(SrcReg).addImm(1));
+ SrcReg = Res;
+ } // Fallthrough here.
+ case MVT::i8:
+ StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12;
+ break;
+ case MVT::i16:
+ StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+ break;
+ case MVT::i32:
+ StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12;
+ break;
case MVT::f32:
if (!Subtarget->hasVFP2()) return false;
StrOpc = ARM::VSTRS;
@@ -548,91 +915,162 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
StrOpc = ARM::VSTRD;
break;
}
-
- if (isThumb)
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(StrOpc), SrcReg)
- .addReg(DstReg).addImm(Offset).addReg(0));
- else
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(StrOpc), SrcReg)
- .addReg(DstReg).addReg(0).addImm(Offset));
-
+ // Simplify this down to something we can handle.
+ ARMSimplifyAddress(Addr, VT);
+
+ // Create the base instruction, then add the operands.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc))
+ .addReg(SrcReg, getKillRegState(true));
+ AddLoadStoreOperands(VT, Addr, MIB);
return true;
}
-bool ARMFastISel::ARMSelectStore(const Instruction *I) {
+bool ARMFastISel::SelectStore(const Instruction *I) {
Value *Op0 = I->getOperand(0);
unsigned SrcReg = 0;
- // Yay type legalization
- EVT VT;
+ // Verify we have a legal type before going any further.
+ MVT VT;
if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
return false;
// Get the value to be stored into a register.
SrcReg = getRegForValue(Op0);
- if (SrcReg == 0)
- return false;
-
- // If we're an alloca we know we have a frame index and can emit the store
- // quickly.
- if (ARMStoreAlloca(I, SrcReg))
- return true;
-
- // Our register and offset with innocuous defaults.
- unsigned Reg = 0;
- int Offset = 0;
-
- // See if we can handle this as Reg + Offset
- if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset))
- return false;
-
- if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false;
-
- return false;
-
-}
+ if (SrcReg == 0) return false;
-bool ARMFastISel::ARMSelectLoad(const Instruction *I) {
- // If we're an alloca we know we have a frame index and can emit the load
- // directly in short order.
- if (ARMLoadAlloca(I))
- return true;
-
- // Verify we have a legal type before going any further.
- EVT VT;
- if (!isLoadTypeLegal(I->getType(), VT))
- return false;
-
- // Our register and offset with innocuous defaults.
- unsigned Reg = 0;
- int Offset = 0;
-
- // See if we can handle this as Reg + Offset
- if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset))
+ // See if we can handle this address.
+ Address Addr;
+ if (!ARMComputeAddress(I->getOperand(1), Addr))
return false;
-
- unsigned ResultReg;
- if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false;
-
- UpdateValueMap(I, ResultReg);
+
+ if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
return true;
}
-bool ARMFastISel::ARMSelectBranch(const Instruction *I) {
+static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // Needs two compares...
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UEQ:
+ default:
+ // AL is our "false" for now. The other two need more compares.
+ return ARMCC::AL;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::FCMP_OEQ:
+ return ARMCC::EQ;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::FCMP_OGT:
+ return ARMCC::GT;
+ case CmpInst::ICMP_SGE:
+ case CmpInst::FCMP_OGE:
+ return ARMCC::GE;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::FCMP_UGT:
+ return ARMCC::HI;
+ case CmpInst::FCMP_OLT:
+ return ARMCC::MI;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::FCMP_OLE:
+ return ARMCC::LS;
+ case CmpInst::FCMP_ORD:
+ return ARMCC::VC;
+ case CmpInst::FCMP_UNO:
+ return ARMCC::VS;
+ case CmpInst::FCMP_UGE:
+ return ARMCC::PL;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::FCMP_ULT:
+ return ARMCC::LT;
+ case CmpInst::ICMP_SLE:
+ case CmpInst::FCMP_ULE:
+ return ARMCC::LE;
+ case CmpInst::FCMP_UNE:
+ case CmpInst::ICMP_NE:
+ return ARMCC::NE;
+ case CmpInst::ICMP_UGE:
+ return ARMCC::HS;
+ case CmpInst::ICMP_ULT:
+ return ARMCC::LO;
+ }
+}
+
+bool ARMFastISel::SelectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
-
+
// Simple branch support.
- unsigned CondReg = getRegForValue(BI->getCondition());
- if (CondReg == 0) return false;
-
- unsigned CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
- unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+
+ // If we can, avoid recomputing the compare - redoing it could lead to wonky
+ // behavior.
+ // TODO: Factor this out.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
+ MVT VT;
+ const Type *Ty = CI->getOperand(0)->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned CmpOpc;
+ switch (VT.SimpleTy) {
+ default: return false;
+ // TODO: Verify compares.
+ case MVT::f32:
+ CmpOpc = ARM::VCMPES;
+ break;
+ case MVT::f64:
+ CmpOpc = ARM::VCMPED;
+ break;
+ case MVT::i32:
+ CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ break;
+ }
+
+ // Get the compare predicate.
+ ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ unsigned Arg1 = getRegForValue(CI->getOperand(0));
+ if (Arg1 == 0) return false;
+
+ unsigned Arg2 = getRegForValue(CI->getOperand(1));
+ if (Arg2 == 0) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CmpOpc))
+ .addReg(Arg1).addReg(Arg2));
+
+ // For floating point we need to move the result to a comparison register
+ // that we can then use for branches.
+ if (isFloat)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::FMSTAT)));
+
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
+ }
+
+ unsigned CmpReg = getRegForValue(BI->getCondition());
+ if (CmpReg == 0) return false;
+
+ // Re-set the flags just in case.
+ unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri;
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
- .addReg(CondReg).addReg(CondReg));
+ .addReg(CmpReg).addImm(0));
+
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
.addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
FastEmitBranch(FBB, DL);
@@ -640,18 +1078,809 @@ bool ARMFastISel::ARMSelectBranch(const Instruction *I) {
return true;
}
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ MVT VT;
+ const Type *Ty = CI->getOperand(0)->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned CmpOpc;
+ unsigned CondReg;
+ switch (VT.SimpleTy) {
+ default: return false;
+ // TODO: Verify compares.
+ case MVT::f32:
+ CmpOpc = ARM::VCMPES;
+ CondReg = ARM::FPSCR;
+ break;
+ case MVT::f64:
+ CmpOpc = ARM::VCMPED;
+ CondReg = ARM::FPSCR;
+ break;
+ case MVT::i32:
+ CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ CondReg = ARM::CPSR;
+ break;
+ }
+
+ // Get the compare predicate.
+ ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ unsigned Arg1 = getRegForValue(CI->getOperand(0));
+ if (Arg1 == 0) return false;
+
+ unsigned Arg2 = getRegForValue(CI->getOperand(1));
+ if (Arg2 == 0) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(Arg1).addReg(Arg2));
+
+ // For floating point we need to move the result to a comparison register
+ // that we can then use for branches.
+ if (isFloat)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::FMSTAT)));
+
+ // Now set a register based on the comparison. Explicitly set the predicates
+ // here.
+ unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
+ TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
+ : ARM::GPRRegisterClass;
+ unsigned DestReg = createResultReg(RC);
+ Constant *Zero
+ = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+ unsigned ZeroReg = TargetMaterializeConstant(Zero);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
+ .addReg(ZeroReg).addImm(1)
+ .addImm(ARMPred).addReg(CondReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPExt(const Instruction *I) {
+ // Make sure we have VFP and that we're extending float to double.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!I->getType()->isDoubleTy() ||
+ !V->getType()->isFloatTy()) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(ARM::DPRRegisterClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VCVTDS), Result)
+ .addReg(Op));
+ UpdateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
+ // Make sure we have VFP and that we're truncating double to float.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!(I->getType()->isFloatTy() &&
+ V->getType()->isDoubleTy())) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(ARM::SPRRegisterClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VCVTSD), Result)
+ .addReg(Op));
+ UpdateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectSIToFP(const Instruction *I) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ MVT DstVT;
+ const Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, DstVT))
+ return false;
+
+ unsigned Op = getRegForValue(I->getOperand(0));
+ if (Op == 0) return false;
+
+ // The conversion routine works on fp-reg to fp-reg and the operand above
+ // was an integer, move it to the fp registers if possible.
+ unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
+ if (FP == 0) return false;
+
+ unsigned Opc;
+ if (Ty->isFloatTy()) Opc = ARM::VSITOS;
+ else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
+ else return 0;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ ResultReg)
+ .addReg(FP));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPToSI(const Instruction *I) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ MVT DstVT;
+ const Type *RetTy = I->getType();
+ if (!isTypeLegal(RetTy, DstVT))
+ return false;
+
+ unsigned Op = getRegForValue(I->getOperand(0));
+ if (Op == 0) return false;
+
+ unsigned Opc;
+ const Type *OpTy = I->getOperand(0)->getType();
+ if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
+ else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
+ else return 0;
+
+ // f64->s32 or f32->s32 both need an intermediate f32 reg.
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ ResultReg)
+ .addReg(Op));
+
+ // This result needs to be in an integer register, but the conversion only
+ // takes place in fp-regs.
+ unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
+ if (IntReg == 0) return false;
+
+ UpdateValueMap(I, IntReg);
+ return true;
+}
+
+bool ARMFastISel::SelectSelect(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ // Things need to be register sized for register moves.
+ if (VT != MVT::i32) return false;
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+
+ unsigned CondReg = getRegForValue(I->getOperand(0));
+ if (CondReg == 0) return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ unsigned Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+
+ unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(CondReg).addImm(1));
+ unsigned ResultReg = createResultReg(RC);
+ unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+ .addReg(Op1Reg).addReg(Op2Reg)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectSDiv(const Instruction *I) {
+ MVT VT;
+ const Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ // If we have integer div support we should have selected this automagically.
+ // In case we have a real miss go ahead and return false and we'll pick
+ // it up later.
+ if (Subtarget->hasDivide()) return false;
+
+ // Otherwise emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = RTLIB::SDIV_I8;
+ else if (VT == MVT::i16)
+ LC = RTLIB::SDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectSRem(const Instruction *I) {
+ MVT VT;
+ const Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = RTLIB::SREM_I8;
+ else if (VT == MVT::i16)
+ LC = RTLIB::SREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT VT = TLI.getValueType(I->getType(), true);
+
+ // We can get here in the case when we want to use NEON for our fp
+ // operations, but can't figure out how to. Just use the vfp instructions
+ // if we have them.
+ // FIXME: It'd be nice to use NEON instructions.
+ const Type *Ty = I->getType();
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned Op1 = getRegForValue(I->getOperand(0));
+ if (Op1 == 0) return false;
+
+ unsigned Op2 = getRegForValue(I->getOperand(1));
+ if (Op2 == 0) return false;
+
+ unsigned Opc;
+ bool is64bit = VT == MVT::f64 || VT == MVT::i64;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::FADD:
+ Opc = is64bit ? ARM::VADDD : ARM::VADDS;
+ break;
+ case ISD::FSUB:
+ Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
+ break;
+ case ISD::FMUL:
+ Opc = is64bit ? ARM::VMULD : ARM::VMULS;
+ break;
+ }
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Op1).addReg(Op2));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Call Handling Code
+
+bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
+ EVT SrcVT, unsigned &ResultReg) {
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
+
+ if (RR != 0) {
+ ResultReg = RR;
+ return true;
+ } else
+ return false;
+}
+
+// This is largely taken directly from CCAssignFnForNode - we don't support
+// varargs in FastISel so that part has been removed.
+// TODO: We may not support all of this.
+CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ // Ignore fastcc. Silence compiler warnings.
+ (void)RetFastCC_ARM_APCS;
+ (void)FastCC_ARM_APCS;
+ // Fallthrough
+ case CallingConv::C:
+ // Use target triple & subtarget features to do actual dispatch.
+ if (Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() &&
+ FloatABIType == FloatABI::Hard)
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ else
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ } else
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ }
+}
+
+bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, false, TM, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackDown))
+ .addImm(NumBytes));
+
+ // Process the args.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // We don't handle NEON/vector parameters yet.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ return false;
+
+ // Handle arg promotion, etc.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::ZExt: {
+ bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::AExt: {
+ bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+
+ assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::BCvt: {
+ unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
+ /*TODO: Kill=*/false);
+ assert(BC != 0 && "Failed to emit a bitcast!");
+ Arg = BC;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ default: llvm_unreachable("Unknown arg promotion!");
+ }
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ VA.getLocReg())
+ .addReg(Arg);
+ RegArgs.push_back(VA.getLocReg());
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ if (VA.getLocVT() != MVT::f64) return false;
+
+ CCValAssign &NextVA = ArgLocs[++i];
+
+ // TODO: Only handle register args for now.
+ if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRRD), VA.getLocReg())
+ .addReg(NextVA.getLocReg(), RegState::Define)
+ .addReg(Arg));
+ RegArgs.push_back(VA.getLocReg());
+ RegArgs.push_back(NextVA.getLocReg());
+ } else {
+ assert(VA.isMemLoc());
+ // Need to store on the stack.
+ Address Addr;
+ Addr.BaseType = Address::RegBase;
+ Addr.Base.Reg = ARM::SP;
+ Addr.Offset = VA.getLocMemOffset();
+
+ if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+ }
+ }
+ return true;
+}
+
+bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes) {
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(0));
+
+ // Now the return value.
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
+
+ // Copy all of the result registers out of their specified physreg.
+ if (RVLocs.size() == 2 && RetVT == MVT::f64) {
+ // For this move we copy into two registers and then move into the
+ // double fp reg we want.
+ EVT DestVT = RVLocs[0].getValVT();
+ TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+ unsigned ResultReg = createResultReg(DstRC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVDRR), ResultReg)
+ .addReg(RVLocs[0].getLocReg())
+ .addReg(RVLocs[1].getLocReg()));
+
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[1].getLocReg());
+
+ // Finally update the result.
+ UpdateValueMap(I, ResultReg);
+ } else {
+ assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
+ EVT CopyVT = RVLocs[0].getValVT();
+ TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+ unsigned ResultReg = createResultReg(DstRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+
+ // Finally update the result.
+ UpdateValueMap(I, ResultReg);
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ if (F.isVarArg())
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+ // TODO: For now, don't try to handle cases where getLocInfo()
+ // says Full but the types don't match.
+ if (TLI.getValueType(RV->getType()) != VA.getValVT())
+ return false;
+
+ // Make the copy.
+ unsigned SrcReg = Reg + VA.getValNo();
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
+
+ // Mark the register as live out of the function.
+ MRI.addLiveOut(VA.getLocReg());
+ }
+
+ unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(RetOpc)));
+ return true;
+}
+
+// A quick function that will emit a call for a named libcall in F with the
+// vector of passed arguments for the Instruction in I. We can assume that we
+// can emit a call for any libcall we can produce. This is an abridged version
+// of the full call infrastructure since we won't need to worry about things
+// like computed function pointers or strange arguments at call sites.
+// TODO: Try to unify this and the normal call bits for ARM, then try to unify
+// with X86.
+bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
+ CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
+
+ // Handle *simple* calls for now.
+ const Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ // For now we're using BLX etc on the assumption that we have v5t ops.
+ if (!Subtarget->hasV5TOps()) return false;
+
+ // TODO: For now if we have long calls specified we don't handle the call.
+ if (EnableARMLongCalls) return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(I->getNumOperands());
+ ArgRegs.reserve(I->getNumOperands());
+ ArgVTs.reserve(I->getNumOperands());
+ ArgFlags.reserve(I->getNumOperands());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value *Op = I->getOperand(i);
+ unsigned Arg = getRegForValue(Op);
+ if (Arg == 0) return false;
+
+ const Type *ArgTy = Op->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT)) return false;
+
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(Op);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ return false;
+
+ // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+ // TODO: Turn this into the table of arm call ops.
+ MachineInstrBuilder MIB;
+ unsigned CallOpc;
+ if(isThumb) {
+ CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc)))
+ .addExternalSymbol(TLI.getLibcallName(Call));
+ } else {
+ CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addExternalSymbol(TLI.getLibcallName(Call)));
+ }
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+bool ARMFastISel::SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm or worry about intrinsics yet.
+ if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
+
+ // Only handle global variable Callees that are direct calls.
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV || Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()))
+ return false;
+
+ // Check the calling convention.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+
+ // TODO: Avoid some calling conventions?
+
+ // Let SDISel handle vararg functions.
+ const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ if (FTy->isVarArg())
+ return false;
+
+ // Handle *simple* calls for now.
+ const Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ // For now we're using BLX etc on the assumption that we have v5t ops.
+ // TODO: Maybe?
+ if (!Subtarget->hasV5TOps()) return false;
+
+ // TODO: For now if we have long calls specified we don't handle the call.
+ if (EnableARMLongCalls) return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(CS.arg_size());
+ ArgRegs.reserve(CS.arg_size());
+ ArgVTs.reserve(CS.arg_size());
+ ArgFlags.reserve(CS.arg_size());
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ unsigned Arg = getRegForValue(*i);
+
+ if (Arg == 0)
+ return false;
+ ISD::ArgFlagsTy Flags;
+ unsigned AttrInd = i - CS.arg_begin() + 1;
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ Flags.setZExt();
+
+ // FIXME: Only handle *easy* calls for now.
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+ CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+ CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ return false;
+
+ const Type *ArgTy = (*i)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT))
+ return false;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(*i);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ return false;
+
+ // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+ // TODO: Turn this into the table of arm call ops.
+ MachineInstrBuilder MIB;
+ unsigned CallOpc;
+ // Explicitly adding the predicate here.
+ if(isThumb) {
+ CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc)))
+ .addGlobalAddress(GV, 0, 0);
+ } else {
+ CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+ // Explicitly adding the predicate here.
+ MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(CallOpc))
+ .addGlobalAddress(GV, 0, 0));
+ }
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+
+}
+
// TODO: SoftFP support.
bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
- // No Thumb-1 for now.
- if (isThumb && !AFI->isThumb2Function()) return false;
-
+
switch (I->getOpcode()) {
case Instruction::Load:
- return ARMSelectLoad(I);
+ return SelectLoad(I);
case Instruction::Store:
- return ARMSelectStore(I);
+ return SelectStore(I);
case Instruction::Br:
- return ARMSelectBranch(I);
+ return SelectBranch(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return SelectCmp(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectSIToFP(I);
+ case Instruction::FPToSI:
+ return SelectFPToSI(I);
+ case Instruction::FAdd:
+ return SelectBinaryOp(I, ISD::FADD);
+ case Instruction::FSub:
+ return SelectBinaryOp(I, ISD::FSUB);
+ case Instruction::FMul:
+ return SelectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return SelectSDiv(I);
+ case Instruction::SRem:
+ return SelectSRem(I);
+ case Instruction::Call:
+ return SelectCall(I);
+ case Instruction::Select:
+ return SelectSelect(I);
+ case Instruction::Ret:
+ return SelectRet(I);
default: break;
}
return false;
@@ -659,7 +1888,14 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
namespace llvm {
llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
- if (EnableARMFastISel) return new ARMFastISel(funcInfo);
+ // Completely untested on non-darwin.
+ const TargetMachine &TM = funcInfo.MF->getTarget();
+
+ // Darwin and thumb1 only for now.
+ const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
+ !DisableARMFastISel)
+ return new ARMFastISel(funcInfo);
return 0;
}
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h
new file mode 100644
index 0000000..3d175e3
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFixupKinds.h
@@ -0,0 +1,97 @@
+//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARM_ARMFIXUPKINDS_H
+#define LLVM_ARM_ARMFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace ARM {
+enum Fixups {
+ // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol
+ // addresses
+ fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind,
+
+ // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with
+ // the 16-bit halfwords reordered.
+ fixup_t2_ldst_pcrel_12,
+
+ // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
+ // used in VFP instructions where the lower 2 bits are not encoded
+ // (so it's encoded as an 8-bit immediate).
+ fixup_arm_pcrel_10,
+ // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
+ // the short-swapped encoding of Thumb2 instructions.
+ fixup_t2_pcrel_10,
+ // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
+ // addresses where the lower 2 bits are not encoded (so it's encoded as an
+ // 8-bit immediate).
+ fixup_thumb_adr_pcrel_10,
+ // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+ // instruction.
+ fixup_arm_adr_pcrel_12,
+ // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+ // instruction.
+ fixup_t2_adr_pcrel_12,
+ // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch
+ // instructions.
+ fixup_arm_condbranch,
+ // fixup_arm_uncondbranch - 24-bit PC relative relocation for
+ // branch instructions. (unconditional)
+ fixup_arm_uncondbranch,
+ // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct
+ // uconditional branch instructions.
+ fixup_t2_condbranch,
+ // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct
+ // branch unconditional branch instructions.
+ fixup_t2_uncondbranch,
+
+ // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
+ fixup_arm_thumb_br,
+
+ // fixup_arm_thumb_blx - Fixup for Thumb BL instructions.
+ fixup_arm_thumb_bl,
+
+ // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions.
+ fixup_arm_thumb_blx,
+
+ // fixup_arm_thumb_cb - Fixup for Thumb branch instructions.
+ fixup_arm_thumb_cb,
+
+ // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs.
+ fixup_arm_thumb_cp,
+
+ // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions.
+ fixup_arm_thumb_bcc,
+
+ // The next two are for the movt/movw pair
+ // the 16bit imm field are split into imm{15-12} and imm{11-0}
+ fixup_arm_movt_hi16, // :upper16:
+ fixup_arm_movw_lo16, // :lower16:
+ fixup_t2_movt_hi16, // :upper16:
+ fixup_t2_movw_lo16, // :lower16:
+
+ // It is possible to create an "immediate" that happens to be pcrel.
+ // movw r0, :lower16:Foo-(Bar+8) and movt r0, :upper16:Foo-(Bar+8)
+ // result in different reloc tags than the above two.
+ // Needed to support ELF::R_ARM_MOVT_PREL and ELF::R_ARM_MOVW_PREL_NC
+ fixup_arm_movt_hi16_pcrel, // :upper16:
+ fixup_arm_movw_lo16_pcrel, // :lower16:
+ fixup_t2_movt_hi16_pcrel, // :upper16:
+ fixup_t2_movw_lo16_pcrel, // :lower16:
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameInfo.h b/contrib/llvm/lib/Target/ARM/ARMFrameInfo.h
deleted file mode 100644
index d5dae24..0000000
--- a/contrib/llvm/lib/Target/ARM/ARMFrameInfo.h
+++ /dev/null
@@ -1,32 +0,0 @@
-//===-- ARMTargetFrameInfo.h - Define TargetFrameInfo for ARM ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM_FRAMEINFO_H
-#define ARM_FRAMEINFO_H
-
-#include "ARM.h"
-#include "ARMSubtarget.h"
-#include "llvm/Target/TargetFrameInfo.h"
-
-namespace llvm {
-
-class ARMFrameInfo : public TargetFrameInfo {
-public:
- explicit ARMFrameInfo(const ARMSubtarget &ST)
- : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0, 4) {
- }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
new file mode 100644
index 0000000..f42c6db
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -0,0 +1,1021 @@
+//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMFrameLowering.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetDarwin())
+ return true;
+
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Always eliminate non-leaf frame pointers.
+ return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+ RegInfo->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+/// not required, we reserve argument space for call sites in the function
+/// immediately on entry to the current function. This eliminates the need for
+/// add/sub sp brackets around call sites. Returns true if the call frame is
+/// included as part of the stack frame.
+bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
+ return false;
+
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+/// call frame pseudos can be simplified. Unlike most targets, having a FP
+/// is not sufficient here since we still may reference some objects via SP
+/// even when FP is available in Thumb2 mode.
+bool
+ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+ return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI,
+ const ARMBaseInstrInfo &TII,
+ const unsigned *CSRegs) {
+ // Integer spill area is handled with "pop".
+ if (MI->getOpcode() == ARM::LDMIA_RET ||
+ MI->getOpcode() == ARM::t2LDMIA_RET ||
+ MI->getOpcode() == ARM::LDMIA_UPD ||
+ MI->getOpcode() == ARM::t2LDMIA_UPD ||
+ MI->getOpcode() == ARM::VLDMDIA_UPD) {
+ // The first two operands are predicates. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ if ((MI->getOpcode() == ARM::LDR_POST ||
+ MI->getOpcode() == ARM::t2LDR_POST) &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
+ MI->getOperand(1).getReg() == ARM::SP)
+ return true;
+
+ return false;
+}
+
+static void
+emitSPUpdate(bool isARM,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, const ARMBaseInstrInfo &TII,
+ int NumBytes,
+ ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII);
+}
+
+void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This emitPrologue does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ // Allocate the vararg register save area. This is not counted in NumBytes.
+ if (VARegSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetDarwin()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ // Move past area 1.
+ if (GPRCS1Size > 0) MBBI++;
+
+ // Set FP to point to the stack slot that contains the previous FP.
+ // For Darwin, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not Darwin, all the callee-saved registers go
+ // into spill area 1, including the FP in R11. In either case, it is
+ // now safe to emit this assignment.
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0);
+ AddDefaultCC(AddDefaultPred(MIB));
+ }
+
+ // Move past area 2.
+ if (GPRCS2Size > 0) MBBI++;
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ if (HasFP)
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+ NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+ // Move past area 3.
+ if (DPRCSSize > 0) MBBI++;
+
+ NumBytes = DPRCSOffset;
+ if (NumBytes) {
+ // Adjust SP after all the callee-save spills.
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+ if (HasFP && isARM)
+ // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
+ // Note it's not safe to do this in Thumb2 mode because it would have
+ // taken two instructions:
+ // mov sp, r7
+ // sub sp, #24
+ // If an interrupt is taken between the two instructions, then sp is in
+ // an inconsistent state (pointing to the middle of callee-saved area).
+ // The interrupt handler can end up clobbering the registers.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ if (STI.isTargetELF() && hasFP(MF))
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need dynamic stack realignment, do it here. Be paranoid and make
+ // sure if we also have VLAs, we have a base pointer for frame access.
+ if (RegInfo->needsStackRealignment(MF)) {
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert (!AFI->isThumb1OnlyFunction());
+ if (!AFI->isThumbFunction()) {
+ // Emit bic sp, sp, MaxAlign
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::BICri), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ } else {
+ // We cannot use sp as source/dest register here, thus we're emitting the
+ // following sequence:
+ // mov r4, sp
+ // bic r4, r4, MaxAlign
+ // mov sp, r4
+ // FIXME: It will be better just to find spare register here.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4)
+ .addReg(ARM::SP, RegState::Kill);
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::t2BICri), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+ .addReg(ARM::R4, RegState::Kill);
+ }
+
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (RegInfo->hasBasePointer(MF)) {
+ if (isARM)
+ BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::MOVr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::tMOVgpr2gpr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP);
+ }
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp. We can assume there's an FP here since hasFP already
+ // checks for hasVarSizedObjects.
+ if (MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
+}
+
+void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getDesc().isReturn() &&
+ "Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This emitEpilogue does not support Thumb1!");
+ bool isARM = !AFI->isThumbFunction();
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+ } else {
+ // Unwind MBBI to point to first LDR / VLDRD.
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+ if (!isCSRestore(MBBI, TII, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->shouldRestoreSPFromFP()) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ if (NumBytes) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else {
+ // It's not possible to restore SP from FP in a single instruction.
+ // For Darwin, this looks like:
+ // mov sp, r7
+ // sub sp, #24
+ // This is bad, if an interrupt is taken after the mov, sp is in an
+ // inconsistent state.
+ // Use the first callee-saved register as a scratch register.
+ assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+ .addReg(ARM::R4);
+ }
+ } else {
+ // Thumb2 or ARM.
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+ .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+ .addReg(FramePtr);
+ }
+ } else if (NumBytes)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+
+ // Increment past our save areas.
+ if (AFI->getDPRCalleeSavedAreaSize()) MBBI++;
+ if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
+ if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
+ }
+
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
+ RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
+ unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
+ ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)
+ : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ } else if (RetOpcode == ARM::TCRETURNriND) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ }
+
+ if (VARegSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+}
+
+/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
+/// debug info. It's the same as what we use for resolving the code-gen
+/// references for now. FIXME: This can go wrong when references are
+/// SP-relative and simple call frames aren't used.
+int
+ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg,
+ int SPAdj) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+ bool isFixed = MFI->isFixedObjectIndex(FI);
+
+ FrameReg = ARM::SP;
+ Offset += SPAdj;
+ if (AFI->isGPRCalleeSavedArea1Frame(FI))
+ return Offset - AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FI))
+ return Offset - AFI->getGPRCalleeSavedArea2Offset();
+ else if (AFI->isDPRCalleeSavedAreaFrame(FI))
+ return Offset - AFI->getDPRCalleeSavedAreaOffset();
+
+ // When dynamically realigning the stack, use the frame pointer for
+ // parameters, and the stack/base pointer for locals.
+ if (RegInfo->needsStackRealignment(MF)) {
+ assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+ if (isFixed) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ Offset = FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(RegInfo->hasBasePointer(MF) &&
+ "VLAs and dynamic stack alignment, but missing base pointer!");
+ FrameReg = RegInfo->getBaseRegister();
+ }
+ return Offset;
+ }
+
+ // If there is a frame pointer, use it when we can.
+ if (hasFP(MF) && AFI->hasStackFrame()) {
+ // Use frame pointer to reference fixed objects. Use it for locals if
+ // there are VLAs (and thus the SP isn't reliable as a base).
+ if (isFixed || (MFI->hasVarSizedObjects() &&
+ !RegInfo->hasBasePointer(MF))) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
+ // Try to use the frame pointer if we can, else use the base pointer
+ // since it's available. This is handy for the emergency spill slot, in
+ // particular.
+ if (AFI->isThumb2Function()) {
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ } else
+ FrameReg = RegInfo->getBaseRegister();
+ } else if (AFI->isThumb2Function()) {
+ // In Thumb2 mode, the negative offset is very limited. Try to avoid
+ // out of range references.
+ if (FPOffset >= -255 && FPOffset < 0) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+ // Otherwise, use SP or FP, whichever is closer to the stack slot.
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+ }
+ // Use the base pointer if we have one.
+ if (RegInfo->hasBasePointer(MF))
+ FrameReg = RegInfo->getBaseRegister();
+ return Offset;
+}
+
+int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ unsigned FrameReg;
+ return getFrameIndexReference(MF, FI, FrameReg);
+}
+
+void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned StmOpc, unsigned StrOpc,
+ bool NoGap,
+ bool(*Func)(unsigned, bool)) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ SmallVector<std::pair<unsigned,bool>, 4> Regs;
+ unsigned i = CSI.size();
+ while (i != 0) {
+ unsigned LastReg = 0;
+ for (; i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for
+ // @llvm.returnaddress then it's already added to the function and
+ // entry block live-in sets.
+ bool isKill = true;
+ if (Reg == ARM::LR) {
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ // If NoGap is true, push consecutive registers and then leave the rest
+ // for other instructions. e.g.
+ // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
+ if (NoGap && LastReg && LastReg != Reg-1)
+ break;
+ LastReg = Reg;
+ Regs.push_back(std::make_pair(Reg, isKill));
+ }
+
+ if (Regs.empty())
+ continue;
+ if (Regs.size() > 1 || StrOpc== 0) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
+ .addReg(ARM::SP));
+ for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+ MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
+ } else if (Regs.size() == 1) {
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
+ ARM::SP)
+ .addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(ARM::SP);
+ // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+ // that refactoring is complete (eventually).
+ if (StrOpc == ARM::STR_PRE) {
+ MIB.addReg(0);
+ MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift));
+ } else
+ MIB.addImm(-4);
+ AddDefaultPred(MIB);
+ }
+ Regs.clear();
+ }
+}
+
+void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned LdmOpc, unsigned LdrOpc,
+ bool isVarArg, bool NoGap,
+ bool(*Func)(unsigned, bool)) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RetOpcode = MI->getOpcode();
+ bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
+ RetOpcode == ARM::TCRETURNdiND ||
+ RetOpcode == ARM::TCRETURNri ||
+ RetOpcode == ARM::TCRETURNriND);
+
+ SmallVector<unsigned, 4> Regs;
+ unsigned i = CSI.size();
+ while (i != 0) {
+ unsigned LastReg = 0;
+ bool DeleteRet = false;
+ for (; i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+ if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+ Reg = ARM::PC;
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ // Fold the return instruction into the LDM.
+ DeleteRet = true;
+ }
+
+ // If NoGap is true, pop consecutive registers and then leave the rest
+ // for other instructions. e.g.
+ // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
+ if (NoGap && LastReg && LastReg != Reg-1)
+ break;
+
+ LastReg = Reg;
+ Regs.push_back(Reg);
+ }
+
+ if (Regs.empty())
+ continue;
+ if (Regs.size() > 1 || LdrOpc == 0) {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
+ .addReg(ARM::SP));
+ for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+ MIB.addReg(Regs[i], getDefRegState(true));
+ if (DeleteRet)
+ MI->eraseFromParent();
+ MI = MIB;
+ } else if (Regs.size() == 1) {
+ // If we adjusted the reg to PC from LR above, switch it back here. We
+ // only do that for LDM.
+ if (Regs[0] == ARM::PC)
+ Regs[0] = ARM::LR;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP);
+ // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+ // that refactoring is complete (eventually).
+ if (LdrOpc == ARM::LDR_POST) {
+ MIB.addReg(0);
+ MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
+ } else
+ MIB.addImm(4);
+ AddDefaultPred(MIB);
+ }
+ Regs.clear();
+ }
+}
+
+bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
+ unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE;
+ unsigned FltOpc = ARM::VSTMDDB_UPD;
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register);
+
+ return true;
+}
+
+bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+
+ unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
+ unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST;
+ unsigned FltOpc = ARM::VLDMDIA_UPD;
+ emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea2Register);
+ emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+ &isARMArea1Register);
+
+ return true;
+}
+
+// FIXME: Make generic?
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+ const ARMBaseInstrInfo &TII) {
+ unsigned FnSize = 0;
+ for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+ I != E; ++I)
+ FnSize += TII.GetInstSizeInBytes(I);
+ }
+ return FnSize;
+}
+
+/// estimateStackSize - Estimate and return the size of the frame.
+/// FIXME: Make generic?
+static unsigned estimateStackSize(MachineFunction &MF) {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ int Offset = 0;
+ for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -FFI->getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ if (FFI->isDeadObjectIndex(i))
+ continue;
+ Offset += FFI->getObjectSize(i);
+ unsigned Align = FFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+ }
+ return (unsigned)Offset;
+}
+
+/// estimateRSStackSizeLimit - Look at each instruction that references stack
+/// frames and return the stack size limit beyond which some of these
+/// instructions will require a scratch register during their expansion later.
+// FIXME: Move to TII?
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
+ const TargetFrameLowering *TFI) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned Limit = (1 << 12) - 1;
+ for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (!I->getOperand(i).isFI()) continue;
+
+ // When using ADDri to get the address of a stack object, 255 is the
+ // largest offset guaranteed to fit in the immediate offset.
+ if (I->getOpcode() == ARM::ADDri) {
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ }
+
+ // Otherwise check the addressing mode.
+ switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+ case ARMII::AddrMode3:
+ case ARMII::AddrModeT2_i8:
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode5:
+ case ARMII::AddrModeT2_i8s4:
+ Limit = std::min(Limit, ((1U << 8) - 1) * 4);
+ break;
+ case ARMII::AddrModeT2_i12:
+ // i12 supports only positive offset so these will be converted to
+ // i8 opcodes. See llvm::rewriteT2FrameIndex.
+ if (TFI->hasFP(MF) && AFI->hasStackFrame())
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
+ // Addressing modes 4 & 6 (load/store) instructions can't encode an
+ // immediate offset for stack references.
+ return 0;
+ default:
+ break;
+ }
+ break; // At most one FI per instruction
+ }
+ }
+ }
+
+ return Limit;
+}
+
+void
+ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // This tells PEI to spill the FP as if it is any other callee-save register
+ // to take advantage the eliminateFrameIndex machinery. This also ensures it
+ // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+ // to combine multiple loads / stores.
+ bool CanEliminateFrame = true;
+ bool CS1Spilled = false;
+ bool LRSpilled = false;
+ unsigned NumGPRSpills = 0;
+ SmallVector<unsigned, 4> UnspilledCS1GPRs;
+ SmallVector<unsigned, 4> UnspilledCS2GPRs;
+ const ARMBaseRegisterInfo *RegInfo =
+ static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // Spill R4 if Thumb2 function requires stack realignment - it will be used as
+ // scratch register. Also spill R4 if Thumb2 function has varsized objects,
+ // since it's not always possible to restore sp from fp in a single
+ // instruction.
+ // FIXME: It will be better just to find spare register here.
+ if (AFI->isThumb2Function() &&
+ (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+
+ if (AFI->isThumb1OnlyFunction()) {
+ // Spill LR if Thumb1 function uses variable length argument lists.
+ if (AFI->getVarArgsRegSaveSize() > 0)
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+
+ // Spill R4 if Thumb1 epilogue has to restore SP from FP since
+ // FIXME: It will be better just to find spare register here.
+ if (MFI->hasVarSizedObjects())
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+ }
+
+ // Spill the BasePtr if it's used.
+ if (RegInfo->hasBasePointer(MF))
+ MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+
+ // Don't spill FP if the frame can be eliminated. This is determined
+ // by scanning the callee-save registers to see if any is used.
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ bool Spilled = false;
+ if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ } else {
+ // Check alias registers too.
+ for (const unsigned *Aliases =
+ RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) {
+ if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ }
+ }
+ }
+
+ if (!ARM::GPRRegisterClass->contains(Reg))
+ continue;
+
+ if (Spilled) {
+ NumGPRSpills++;
+
+ if (!STI.isTargetDarwin()) {
+ if (Reg == ARM::LR)
+ LRSpilled = true;
+ CS1Spilled = true;
+ continue;
+ }
+
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R4: case ARM::R5:
+ case ARM::R6: case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (!STI.isTargetDarwin()) {
+ UnspilledCS1GPRs.push_back(Reg);
+ continue;
+ }
+
+ switch (Reg) {
+ case ARM::R4: case ARM::R5:
+ case ARM::R6: case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
+ }
+ }
+ }
+
+ bool ForceLRSpill = false;
+ if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
+ unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
+ // Force LR to be spilled if the Thumb function size is > 2048. This enables
+ // use of BL to implement far jump. If it turns out that it's not needed
+ // then the branch fix up path will undo it.
+ if (FnSize >= (1 << 11)) {
+ CanEliminateFrame = false;
+ ForceLRSpill = true;
+ }
+ }
+
+ // If any of the stack slot references may be out of range of an immediate
+ // offset, make sure a register (or a spill slot) is available for the
+ // register scavenger. Note that if we're indexing off the frame pointer, the
+ // effective stack size is 4 bytes larger since the FP points to the stack
+ // slot of the previous FP. Also, if we have variable sized objects in the
+ // function, stack slot references will often be negative, and some of
+ // our instructions are positive-offset only, so conservatively consider
+ // that case to want a spill slot (or register) as well. Similarly, if
+ // the function adjusts the stack pointer during execution and the
+ // adjustments aren't already part of our stack size estimate, our offset
+ // calculations may be off, so be conservative.
+ // FIXME: We could add logic to be more precise about negative offsets
+ // and which instructions will need a scratch register for them. Is it
+ // worth the effort and added fragility?
+ bool BigStack =
+ (RS &&
+ (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ estimateRSStackSizeLimit(MF, this)))
+ || MFI->hasVarSizedObjects()
+ || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+
+ bool ExtraCSSpill = false;
+ if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+ AFI->setHasStackFrame(true);
+
+ // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+ // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+ if (!LRSpilled && CS1Spilled) {
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ NumGPRSpills++;
+ UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+ UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+ ForceLRSpill = false;
+ ExtraCSSpill = true;
+ }
+
+ if (hasFP(MF)) {
+ MF.getRegInfo().setPhysRegUsed(FramePtr);
+ NumGPRSpills++;
+ }
+
+ // If stack and double are 8-byte aligned and we are spilling an odd number
+ // of GPRs, spill one extra callee save GPR so we won't have to pad between
+ // the integer and double callee save areas.
+ unsigned TargetAlign = getStackAlignment();
+ if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+ if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+ for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+ unsigned Reg = UnspilledCS1GPRs[i];
+ // Don't spill high register if the function is thumb1
+ if (!AFI->isThumb1OnlyFunction() ||
+ isARMLowRegister(Reg) || Reg == ARM::LR) {
+ MF.getRegInfo().setPhysRegUsed(Reg);
+ if (!RegInfo->isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ break;
+ }
+ }
+ } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
+ unsigned Reg = UnspilledCS2GPRs.front();
+ MF.getRegInfo().setPhysRegUsed(Reg);
+ if (!RegInfo->isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ }
+ }
+
+ // Estimate if we might need to scavenge a register at some point in order
+ // to materialize a stack offset. If so, either spill one additional
+ // callee-saved register or reserve a special spill slot to facilitate
+ // register scavenging. Thumb1 needs a spill slot for stack pointer
+ // adjustments also, even when the frame itself is small.
+ if (BigStack && !ExtraCSSpill) {
+ // If any non-reserved CS register isn't spilled, just spill one or two
+ // extra. That should take care of it!
+ unsigned NumExtras = TargetAlign / 4;
+ SmallVector<unsigned, 2> Extras;
+ while (NumExtras && !UnspilledCS1GPRs.empty()) {
+ unsigned Reg = UnspilledCS1GPRs.back();
+ UnspilledCS1GPRs.pop_back();
+ if (!RegInfo->isReservedReg(MF, Reg) &&
+ (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
+ Reg == ARM::LR)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ // For non-Thumb1 functions, also check for hi-reg CS registers
+ if (!AFI->isThumb1OnlyFunction()) {
+ while (NumExtras && !UnspilledCS2GPRs.empty()) {
+ unsigned Reg = UnspilledCS2GPRs.back();
+ UnspilledCS2GPRs.pop_back();
+ if (!RegInfo->isReservedReg(MF, Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ }
+ if (Extras.size() && NumExtras == 0) {
+ for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+ MF.getRegInfo().setPhysRegUsed(Extras[i]);
+ }
+ } else if (!AFI->isThumb1OnlyFunction()) {
+ // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
+ // closest to SP or frame pointer.
+ const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+ }
+ }
+
+ if (ForceLRSpill) {
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ AFI->setLRIsSpilledForFarJump(true);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
new file mode 100644
index 0000000..1288b70
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -0,0 +1,74 @@
+//==-- ARMTargetFrameLowering.h - Define frame lowering for ARM --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_FRAMEINFO_H
+#define ARM_FRAMEINFO_H
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class ARMFrameLowering : public TargetFrameLowering {
+protected:
+ const ARMSubtarget &STI;
+
+public:
+ explicit ARMFrameLowering(const ARMSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+ STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const;
+ int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg, int SPAdj) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+
+ private:
+ void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
+ unsigned StrOpc, bool NoGap,
+ bool(*Func)(unsigned, bool)) const;
+ void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
+ unsigned LdrOpc, bool isVarArg, bool NoGap,
+ bool(*Func)(unsigned, bool)) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
index 85b0c6c..3f02383 100644
--- a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -12,7 +12,8 @@
// global). Such a transformation can significantly reduce the register pressure
// when many globals are involved.
//
-// For example, consider the code which touches several global variables at once:
+// For example, consider the code which touches several global variables at
+// once:
//
// static int foo[N], bar[N], baz[N];
//
@@ -48,7 +49,7 @@
// str r0, [r5], #4
//
// note that we saved 2 registers here almostly "for free".
-// ===----------------------------------------------------------------------===//
+// ===---------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-global-merge"
#include "ARM.h"
@@ -64,16 +65,17 @@
#include "llvm/Pass.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
namespace {
- class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass {
+ class ARMGlobalMerge : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// target type sizes.
const TargetLowering *TLI;
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
- Module &M, bool) const;
+ Module &M, bool isConst) const;
public:
static char ID; // Pass identification, replacement for typeid.
@@ -81,7 +83,7 @@ namespace {
: FunctionPass(ID), TLI(tli) {}
virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function& F);
+ virtual bool runOnFunction(Function &F);
const char *getPassName() const {
return "Merge internal globals";
@@ -95,13 +97,11 @@ namespace {
struct GlobalCmp {
const TargetData *TD;
- GlobalCmp(const TargetData *td):
- TD(td) { }
+ GlobalCmp(const TargetData *td) : TD(td) { }
- bool operator() (const GlobalVariable* GV1,
- const GlobalVariable* GV2) {
- const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType();
- const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+ bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ const Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ const Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
}
@@ -130,27 +130,27 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
uint64_t MergedSize = 0;
std::vector<const Type*> Tys;
std::vector<Constant*> Inits;
- for (j = i; MergedSize < MaxOffset && j != e; ++j) {
- const Type* Ty = Globals[j]->getType()->getElementType();
+ for (j = i; j != e; ++j) {
+ const Type *Ty = Globals[j]->getType()->getElementType();
+ MergedSize += TD->getTypeAllocSize(Ty);
+ if (MergedSize > MaxOffset) {
+ break;
+ }
Tys.push_back(Ty);
Inits.push_back(Globals[j]->getInitializer());
- MergedSize += TD->getTypeAllocSize(Ty);
}
- StructType* MergedTy = StructType::get(M.getContext(), Tys);
- Constant* MergedInit = ConstantStruct::get(MergedTy, Inits);
- GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst,
+ StructType *MergedTy = StructType::get(M.getContext(), Tys);
+ Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+ GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
GlobalValue::InternalLinkage,
- MergedInit, "merged");
+ MergedInit, "_MergedGlobals");
for (size_t k = i; k < j; ++k) {
- SmallVector<Constant*, 2> Idx;
- Idx.push_back(ConstantInt::get(Int32Ty, 0));
- Idx.push_back(ConstantInt::get(Int32Ty, k-i));
-
- Constant* GEP =
- ConstantExpr::getInBoundsGetElementPtr(MergedGV,
- &Idx[0], Idx.size());
-
+ Constant *Idx[2] = {
+ ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, k-i)
+ };
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2);
Globals[k]->replaceAllUsesWith(GEP);
Globals[k]->eraseFromParent();
}
@@ -161,8 +161,8 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
}
-bool ARMGlobalMerge::doInitialization(Module& M) {
- SmallVector<GlobalVariable*, 16> Globals, ConstGlobals;
+bool ARMGlobalMerge::doInitialization(Module &M) {
+ SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
const TargetData *TD = TLI->getTargetData();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
@@ -183,8 +183,11 @@ bool ARMGlobalMerge::doInitialization(Module& M) {
I->getName().startswith(".llvm."))
continue;
- if (TD->getTypeAllocSize(I->getType()) < MaxOffset) {
- if (I->isConstant())
+ if (TD->getTypeAllocSize(I->getType()->getElementType()) < MaxOffset) {
+ const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
+ if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
+ BSSGlobals.push_back(I);
+ else if (I->isConstant())
ConstGlobals.push_back(I);
else
Globals.push_back(I);
@@ -193,17 +196,19 @@ bool ARMGlobalMerge::doInitialization(Module& M) {
if (Globals.size() > 1)
Changed |= doMerge(Globals, M, false);
+ if (BSSGlobals.size() > 1)
+ Changed |= doMerge(BSSGlobals, M, false);
+
// FIXME: This currently breaks the EH processing due to way how the
// typeinfo detection works. We might want to detect the TIs and ignore
// them in the future.
-
// if (ConstGlobals.size() > 1)
// Changed |= doMerge(ConstGlobals, M, true);
return Changed;
}
-bool ARMGlobalMerge::runOnFunction(Function& F) {
+bool ARMGlobalMerge::runOnFunction(Function &F) {
return false;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
new file mode 100644
index 0000000..676b01e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -0,0 +1,121 @@
+//===-- ARMHazardRecognizer.cpp - ARM postra hazard recognizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMHazardRecognizer.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
+ const TargetRegisterInfo &TRI) {
+ // FIXME: Detect integer instructions properly.
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+ if (Domain == ARMII::DomainVFP) {
+ unsigned Opcode = MI->getOpcode();
+ if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
+ Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ } else if (Domain == ARMII::DomainNEON) {
+ if (MI->getDesc().mayStore() || MI->getDesc().mayLoad())
+ return false;
+ } else
+ return false;
+ return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead");
+
+ MachineInstr *MI = SU->getInstr();
+
+ if (!MI->isDebugValue()) {
+ if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
+ return Hazard;
+
+ // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
+ // a VMLA / VMLS will cause 4 cycle stall.
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (LastMI && (TID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
+ MachineInstr *DefMI = LastMI;
+ const TargetInstrDesc &LastTID = LastMI->getDesc();
+ // Skip over one non-VFP / NEON instruction.
+ if (!LastTID.isBarrier() &&
+ (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
+ MachineBasicBlock::iterator I = LastMI;
+ if (I != LastMI->getParent()->begin()) {
+ I = llvm::prior(I);
+ DefMI = &*I;
+ }
+ }
+
+ if (TII.isFpMLxInstruction(DefMI->getOpcode()) &&
+ (TII.canCauseFpMLxStall(MI->getOpcode()) ||
+ hasRAWHazard(DefMI, MI, TRI))) {
+ // Try to schedule another instruction for the next 4 cycles.
+ if (FpMLxStalls == 0)
+ FpMLxStalls = 4;
+ return Hazard;
+ }
+ }
+ }
+
+ return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void ARMHazardRecognizer::Reset() {
+ LastMI = 0;
+ FpMLxStalls = 0;
+ ITBlockSize = 0;
+ ScoreboardHazardRecognizer::Reset();
+}
+
+void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
+ MachineInstr *MI = SU->getInstr();
+ unsigned Opcode = MI->getOpcode();
+ if (ITBlockSize) {
+ --ITBlockSize;
+ } else if (Opcode == ARM::t2IT) {
+ unsigned Mask = MI->getOperand(1).getImm();
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ ITBlockSize = 4 - NumTZ;
+ MachineBasicBlock::iterator I = MI;
+ for (unsigned i = 0; i < ITBlockSize; ++i) {
+ // Advance to the next instruction, skipping any dbg_value instructions.
+ do {
+ ++I;
+ } while (I->isDebugValue());
+ ITBlockMIs[ITBlockSize-1-i] = &*I;
+ }
+ }
+
+ if (!MI->isDebugValue()) {
+ LastMI = MI;
+ FpMLxStalls = 0;
+ }
+
+ ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void ARMHazardRecognizer::AdvanceCycle() {
+ if (FpMLxStalls && --FpMLxStalls == 0)
+ // Stalled for 4 cycles but still can't schedule any other instructions.
+ LastMI = 0;
+ ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void ARMHazardRecognizer::RecedeCycle() {
+ llvm_unreachable("reverse ARM hazard checking unsupported");
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
new file mode 100644
index 0000000..2bc218d
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h
@@ -0,0 +1,54 @@
+//===-- ARMHazardRecognizer.h - ARM Hazard Recognizers ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling ARM functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMHAZARDRECOGNIZER_H
+#define ARMHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+
+namespace llvm {
+
+class ARMBaseInstrInfo;
+class ARMBaseRegisterInfo;
+class ARMSubtarget;
+class MachineInstr;
+
+class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
+ const ARMBaseInstrInfo &TII;
+ const ARMBaseRegisterInfo &TRI;
+ const ARMSubtarget &STI;
+
+ MachineInstr *LastMI;
+ unsigned FpMLxStalls;
+ unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
+ MachineInstr *ITBlockMIs[4];
+
+public:
+ ARMHazardRecognizer(const InstrItineraryData *ItinData,
+ const ARMBaseInstrInfo &tii,
+ const ARMBaseRegisterInfo &tri,
+ const ARMSubtarget &sti,
+ const ScheduleDAG *DAG) :
+ ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
+ TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void Reset();
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void RecedeCycle();
+};
+
+} // end namespace llvm
+
+#endif // ARMHAZARDRECOGNIZER_H
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 51a30c1..a506cff 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -13,6 +13,7 @@
#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMAddressingModes.h"
#include "ARMTargetMachine.h"
#include "llvm/CallingConv.h"
@@ -41,13 +42,25 @@ DisableShifterOp("disable-shifter-op", cl::Hidden,
cl::desc("Disable isel of shifter-op"),
cl::init(false));
+static cl::opt<bool>
+CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
+ cl::desc("Check fp vmla / vmls hazard at isel time"),
+ cl::init(false));
+
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
///
namespace {
+
+enum AddrMode2Type {
+ AM2_BASE, // Simple AM2 (+-imm12)
+ AM2_SHOP // Shifter-op AM2
+};
+
class ARMDAGToDAGISel : public SelectionDAGISel {
ARMBaseTargetMachine &TM;
+ const ARMBaseInstrInfo *TII;
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -57,7 +70,8 @@ public:
explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel), TM(tm),
- Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+ TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
}
virtual const char *getPassName() const {
@@ -72,60 +86,101 @@ public:
SDNode *Select(SDNode *N);
- bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A,
+
+ bool hasNoVMLxHazardUse(SDNode *N) const;
+ bool isShifterOpProfitable(const SDValue &Shift,
+ ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
+ bool SelectShifterOperandReg(SDValue N, SDValue &A,
SDValue &B, SDValue &C);
- bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Offset, SDValue &Opc);
+ bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
+ SDValue &B, SDValue &C);
+ bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
+
+ AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
+ }
+
+ bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
+ }
+
+ bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ SelectAddrMode2Worker(N, Base, Offset, Opc);
+// return SelectAddrMode2ShOp(N, Base, Offset, Opc);
+ // This always matches one way or another.
+ return true;
+ }
+
bool SelectAddrMode2Offset(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
- bool SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectAddrMode3(SDValue N, SDValue &Base,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
- bool SelectAddrMode4(SDNode *Op, SDValue N, SDValue &Addr,
- SDValue &Mode);
- bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectAddrMode5(SDValue N, SDValue &Base,
SDValue &Offset);
- bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align);
-
- bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset,
- SDValue &Label);
-
- bool SelectThumbAddrModeRR(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Offset);
- bool SelectThumbAddrModeRI5(SDNode *Op, SDValue N, unsigned Scale,
- SDValue &Base, SDValue &OffImm,
- SDValue &Offset);
- bool SelectThumbAddrModeS1(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &OffImm, SDValue &Offset);
- bool SelectThumbAddrModeS2(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &OffImm, SDValue &Offset);
- bool SelectThumbAddrModeS4(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &OffImm, SDValue &Offset);
- bool SelectThumbAddrModeSP(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &OffImm);
-
- bool SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
+ bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+
+ bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
+
+ // Thumb Addressing Modes:
+ bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
+ unsigned Scale);
+ bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
+
+ // Thumb 2 Addressing Modes:
+ bool SelectT2ShifterOperandReg(SDValue N,
SDValue &BaseReg, SDValue &Opc);
- bool SelectT2AddrModeImm12(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &OffImm);
- bool SelectT2AddrModeImm8(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
SDValue &OffImm);
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
SDValue &OffImm);
- bool SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &OffImm);
- bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
SDValue &OffReg, SDValue &ShImm);
+ inline bool is_so_imm(unsigned Imm) const {
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ }
+
+ inline bool is_so_imm_not(unsigned Imm) const {
+ return ARM_AM::getSOImmVal(~Imm) != -1;
+ }
+
+ inline bool is_t2_so_imm(unsigned Imm) const {
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ }
+
+ inline bool is_t2_so_imm_not(unsigned Imm) const {
+ return ARM_AM::getT2SOImmVal(~Imm) != -1;
+ }
+
inline bool Pred_so_imm(SDNode *inN) const {
ConstantSDNode *N = cast<ConstantSDNode>(inN);
- return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
+ return is_so_imm(N->getZExtValue());
}
inline bool Pred_t2_so_imm(SDNode *inN) const {
ConstantSDNode *N = cast<ConstantSDNode>(inN);
- return ARM_AM::getT2SOImmVal(N->getZExtValue()) != -1;
+ return is_t2_so_imm(N->getZExtValue());
}
// Include the pieces autogenerated from the target description.
@@ -141,22 +196,30 @@ private:
/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// loads of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs <= 2, QOpcodes1 is not used.
- SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
+ SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
/// SelectVST - Select NEON store intrinsics. NumVecs should
/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// stores of D registers and even subregs and odd subregs of Q registers.
/// For NumVecs <= 2, QOpcodes1 is not used.
- SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
+ SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
- /// load/store of D registers and even subregs and odd subregs of Q registers.
- SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs,
- unsigned *DOpcodes, unsigned *QOpcodes0,
- unsigned *QOpcodes1);
+ /// load/store of D registers and Q registers.
+ SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes);
+
+ /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
+ /// should be 2, 3 or 4. The opcode array specifies the instructions used
+ /// for loading D registers. (Q registers are not supported.)
+ SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+ unsigned *Opcodes);
/// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
/// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
@@ -174,10 +237,10 @@ private:
SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
- SDNode *SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
- SDNode *SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
@@ -199,9 +262,8 @@ private:
SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
- // Form sequences of 8 consecutive D registers.
- SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
- SDValue V4, SDValue V5, SDValue V6, SDValue V7);
+ // Get the alignment operand for a NEON VLD or VST instruction.
+ SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
};
}
@@ -229,9 +291,85 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
isInt32Immediate(N->getOperand(1).getNode(), Imm);
}
+/// \brief Check whether a particular node is a constant value representable as
+/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax).
+///
+/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
+static bool isScaledConstantInRange(SDValue Node, unsigned Scale,
+ int RangeMin, int RangeMax,
+ int &ScaledConstant) {
+ assert(Scale && "Invalid scale!");
+
+ // Check that this is a constant.
+ const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
+ if (!C)
+ return false;
-bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
- SDValue N,
+ ScaledConstant = (int) C->getZExtValue();
+ if ((ScaledConstant % Scale) != 0)
+ return false;
+
+ ScaledConstant /= Scale;
+ return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
+}
+
+/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
+/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
+/// least on current ARM implementations) which should be avoidded.
+bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
+ if (OptLevel == CodeGenOpt::None)
+ return true;
+
+ if (!CheckVMLxHazard)
+ return true;
+
+ if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9())
+ return true;
+
+ if (!N->hasOneUse())
+ return false;
+
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::CopyToReg)
+ return true;
+ if (Use->isMachineOpcode()) {
+ const TargetInstrDesc &TID = TII->get(Use->getMachineOpcode());
+ if (TID.mayStore())
+ return true;
+ unsigned Opcode = TID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return true;
+ // vmlx feeding into another vmlx. We actually want to unfold
+ // the use later in the MLxExpansion pass. e.g.
+ // vmla
+ // vmla (stall 8 cycles)
+ //
+ // vmul (5 cycles)
+ // vadd (5 cycles)
+ // vmla
+ // This adds up to about 18 - 19 cycles.
+ //
+ // vmla
+ // vmul (stall 4 cycles)
+ // vadd adds up to about 14 cycles.
+ return TII->isFpMLxInstruction(Opcode);
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
+ ARM_AM::ShiftOpc ShOpcVal,
+ unsigned ShAmt) {
+ if (!Subtarget->isCortexA9())
+ return true;
+ if (Shift.hasOneUse())
+ return true;
+ // R << 2 is free.
+ return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+}
+
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc) {
@@ -251,16 +389,92 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
ShImmVal = RHS->getZExtValue() & 31;
} else {
ShReg = N.getOperand(1);
+ if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+ return false;
}
Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
MVT::i32);
return true;
}
-bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
- SDValue &Base, SDValue &Offset,
+bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N,
+ SDValue &BaseReg,
+ SDValue &ShReg,
+ SDValue &Opc) {
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ // Do not check isShifterOpProfitable. This must return true.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShReg = CurDAG->getRegister(0, MVT::i32);
+ ShImmVal = RHS->getZExtValue() & 31;
+ } else {
+ ShReg = N.getOperand(1);
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
+ SDValue &Base,
+ SDValue &OffImm) {
+ // Match simple R + imm12 operands.
+
+ // Base only.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N)) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ // Match frame index.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ } else
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+
+bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
SDValue &Opc) {
- if (N.getOpcode() == ISD::MUL) {
+ if (N.getOpcode() == ISD::MUL &&
+ (!Subtarget->isCortexA9() || N.hasOneUse())) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
// X * [3,5,9] -> X + X * [2,4,8] etc.
int RHSC = (int)RHS->getZExtValue();
@@ -283,7 +497,114 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
}
}
- if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ // ISD::OR that is equivalent to an ISD::ADD.
+ !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Leave simple R +/- imm12 operands for LDRi12
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -0x1000+1, 0x1000, RHSC)) // 12 bits.
+ return false;
+ }
+
+ if (Subtarget->isCortexA9() && !N.hasOneUse())
+ // Compute R +/- (R << N) and reuse it.
+ return false;
+
+ // Otherwise this is R +/- [possibly shifted] R.
+ ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+ Offset = N.getOperand(1).getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+ !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ if (!Subtarget->isCortexA9() ||
+ (N.hasOneUse() &&
+ isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+
+
+
+//-----
+
+AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
+ SDValue &Base,
+ SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL &&
+ (!Subtarget->isCortexA9() || N.hasOneUse())) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return AM2_SHOP;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ // ISD::OR that is equivalent to an ADD.
+ !CurDAG->isBaseWithConstantOffset(N)) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
@@ -297,36 +618,45 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
ARM_AM::no_shift),
MVT::i32);
- return true;
+ return AM2_BASE;
}
// Match simple R +/- imm12 operands.
- if (N.getOpcode() == ISD::ADD)
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
- if ((RHSC >= 0 && RHSC < 0x1000) ||
- (RHSC < 0 && RHSC > -0x1000)) { // 12 bits.
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
- }
- Offset = CurDAG->getRegister(0, MVT::i32);
+ if (N.getOpcode() != ISD::SUB) {
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -0x1000+1, 0x1000, RHSC)) { // 12 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
- ARM_AM::AddrOpc AddSub = ARM_AM::add;
- if (RHSC < 0) {
- AddSub = ARM_AM::sub;
- RHSC = - RHSC;
- }
- Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
- ARM_AM::no_shift),
- MVT::i32);
- return true;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
}
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
}
+ }
+
+ if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+ // Compute R +/- (R << N) and reuse it.
+ Base = N;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return AM2_BASE;
+ }
// Otherwise this is R +/- [possibly shifted] R.
- ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
+ ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
unsigned ShAmt = 0;
@@ -339,14 +669,20 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
ShAmt = Sh->getZExtValue();
- Offset = N.getOperand(1).getOperand(0);
+ if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+ Offset = N.getOperand(1).getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
} else {
ShOpcVal = ARM_AM::no_shift;
}
}
// Try matching (R shl C) + (R).
- if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+ if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+ !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
if (ShOpcVal != ARM_AM::no_shift) {
// Check to see if the RHS of the shift is a constant, if not, we can't
@@ -354,8 +690,15 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
if (ConstantSDNode *Sh =
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
ShAmt = Sh->getZExtValue();
- Offset = N.getOperand(0).getOperand(0);
- Base = N.getOperand(1);
+ if (!Subtarget->isCortexA9() ||
+ (N.hasOneUse() &&
+ isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
} else {
ShOpcVal = ARM_AM::no_shift;
}
@@ -364,7 +707,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
MVT::i32);
- return true;
+ return AM2_SHOP;
}
bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
@@ -375,15 +718,13 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
: cast<StoreSDNode>(Op)->getAddressingMode();
ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
? ARM_AM::add : ARM_AM::sub;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
- int Val = (int)C->getZExtValue();
- if (Val >= 0 && Val < 0x1000) { // 12 bits.
- Offset = CurDAG->getRegister(0, MVT::i32);
- Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
- ARM_AM::no_shift),
- MVT::i32);
- return true;
- }
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
}
Offset = N;
@@ -394,7 +735,12 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
// it.
if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
ShAmt = Sh->getZExtValue();
- Offset = N.getOperand(0);
+ if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
+ Offset = N.getOperand(0);
+ else {
+ ShAmt = 0;
+ ShOpcVal = ARM_AM::no_shift;
+ }
} else {
ShOpcVal = ARM_AM::no_shift;
}
@@ -406,7 +752,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
}
-bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
SDValue &Base, SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::SUB) {
@@ -417,7 +763,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N,
return true;
}
- if (N.getOpcode() != ISD::ADD) {
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
@@ -429,25 +775,23 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N,
}
// If the RHS is +/- imm8, fold into addr mode.
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
- if ((RHSC >= 0 && RHSC < 256) ||
- (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed.
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
- }
- Offset = CurDAG->getRegister(0, MVT::i32);
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+ -256 + 1, 256, RHSC)) { // 8 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
- ARM_AM::AddrOpc AddSub = ARM_AM::add;
- if (RHSC < 0) {
- AddSub = ARM_AM::sub;
- RHSC = - RHSC;
- }
- Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
- return true;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = -RHSC;
}
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+ return true;
}
Base = N.getOperand(0);
@@ -464,13 +808,11 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
: cast<StoreSDNode>(Op)->getAddressingMode();
ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
? ARM_AM::add : ARM_AM::sub;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
- int Val = (int)C->getZExtValue();
- if (Val >= 0 && Val < 256) {
- Offset = CurDAG->getRegister(0, MVT::i32);
- Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
- return true;
- }
+ int Val;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+ return true;
}
Offset = N;
@@ -478,16 +820,9 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
return true;
}
-bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N,
- SDValue &Addr, SDValue &Mode) {
- Addr = N;
- Mode = CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32);
- return true;
-}
-
-bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
SDValue &Base, SDValue &Offset) {
- if (N.getOpcode() != ISD::ADD) {
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
@@ -503,28 +838,23 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N,
}
// If the RHS is +/- imm8, fold into addr mode.
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
- if ((RHSC & 3) == 0) { // The constant is implicitly multiplied by 4.
- RHSC >>= 2;
- if ((RHSC >= 0 && RHSC < 256) ||
- (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed.
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
- }
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
+ -256 + 1, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
- ARM_AM::AddrOpc AddSub = ARM_AM::add;
- if (RHSC < 0) {
- AddSub = ARM_AM::sub;
- RHSC = - RHSC;
- }
- Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
- MVT::i32);
- return true;
- }
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = -RHSC;
}
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+ MVT::i32);
+ return true;
}
Base = N;
@@ -533,30 +863,50 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N,
return true;
}
-bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N,
- SDValue &Addr, SDValue &Align) {
+bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
+ SDValue &Align) {
Addr = N;
- // Default to no alignment.
- Align = CurDAG->getTargetConstant(0, MVT::i32);
+
+ unsigned Alignment = 0;
+ if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
+ // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
+ // The maximum alignment is equal to the memory size being referenced.
+ unsigned LSNAlign = LSN->getAlignment();
+ unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
+ if (LSNAlign > MemSize && MemSize > 1)
+ Alignment = MemSize;
+ } else {
+ // All other uses of addrmode6 are for intrinsics. For now just record
+ // the raw alignment value; it will be refined later based on the legal
+ // alignment operands for the intrinsic.
+ Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
+ }
+
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
return true;
}
-bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
SDValue &Offset, SDValue &Label) {
if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
Offset = N.getOperand(0);
SDValue N1 = N.getOperand(1);
- Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i32);
+ Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ MVT::i32);
return true;
}
+
return false;
}
-bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N,
+
+//===----------------------------------------------------------------------===//
+// Thumb Addressing Modes
+//===----------------------------------------------------------------------===//
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
SDValue &Base, SDValue &Offset){
- // FIXME dl should come from the parent load or store, not the address
- if (N.getOpcode() != ISD::ADD) {
+ if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
if (!NC || !NC->isNullValue())
return false;
@@ -571,82 +921,137 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N,
}
bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDNode *Op, SDValue N,
- unsigned Scale, SDValue &Base,
- SDValue &OffImm, SDValue &Offset) {
+ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
+ SDValue &Offset, unsigned Scale) {
if (Scale == 4) {
SDValue TmpBase, TmpOffImm;
- if (SelectThumbAddrModeSP(Op, N, TmpBase, TmpOffImm))
+ if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
return false; // We want to select tLDRspi / tSTRspi instead.
+
if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
return false; // We want to select tLDRpci instead.
}
- if (N.getOpcode() != ISD::ADD) {
+ if (!CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ // Thumb does not have [sp, r] address mode.
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+ if ((LHSR && LHSR->getReg() == ARM::SP) ||
+ (RHSR && RHSR->getReg() == ARM::SP))
+ return false;
+
+ // FIXME: Why do we explicitly check for a match here and then return false?
+ // Presumably to allow something else to match, but shouldn't this be
+ // documented?
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
+ return false;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 1);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 2);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
+ SDValue &Base,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI(N, Base, Offset, 4);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
+ SDValue &Base, SDValue &OffImm) {
+ if (Scale == 4) {
+ SDValue TmpBase, TmpOffImm;
+ if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+ return false; // We want to select tLDRspi / tSTRspi instead.
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select tLDRpci instead.
+ }
+
+ if (!CurDAG->isBaseWithConstantOffset(N)) {
if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
Base = N.getOperand(0);
- } else
+ } else {
Base = N;
+ }
- Offset = CurDAG->getRegister(0, MVT::i32);
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
- // Thumb does not have [sp, r] address mode.
RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
if ((LHSR && LHSR->getReg() == ARM::SP) ||
(RHSR && RHSR->getReg() == ARM::SP)) {
+ ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
+ unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
+
+ // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
+ if (LHSC != 0 || RHSC != 0) return false;
+
Base = N;
- Offset = CurDAG->getRegister(0, MVT::i32);
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
// If the RHS is + imm5 * scale, fold into addr mode.
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
- if ((RHSC & (Scale-1)) == 0) { // The constant is implicitly multiplied.
- RHSC /= Scale;
- if (RHSC >= 0 && RHSC < 32) {
- Base = N.getOperand(0);
- Offset = CurDAG->getRegister(0, MVT::i32);
- OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
- return true;
- }
- }
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
+ Base = N.getOperand(0);
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
}
Base = N.getOperand(0);
- Offset = N.getOperand(1);
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
-bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDNode *Op, SDValue N,
- SDValue &Base, SDValue &OffImm,
- SDValue &Offset) {
- return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset);
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
}
-bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDNode *Op, SDValue N,
- SDValue &Base, SDValue &OffImm,
- SDValue &Offset) {
- return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset);
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
}
-bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDNode *Op, SDValue N,
- SDValue &Base, SDValue &OffImm,
- SDValue &Offset) {
- return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset);
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+ SDValue &OffImm) {
+ return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
}
-bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N,
- SDValue &Base, SDValue &OffImm) {
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
@@ -654,35 +1059,35 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N,
return true;
}
- if (N.getOpcode() != ISD::ADD)
+ if (!CurDAG->isBaseWithConstantOffset(N))
return false;
RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
(LHSR && LHSR->getReg() == ARM::SP)) {
// If the RHS is + imm8 * scale, fold into addr mode.
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
- if ((RHSC & 3) == 0) { // The constant is implicitly multiplied.
- RHSC >>= 2;
- if (RHSC >= 0 && RHSC < 256) {
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
- }
- OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
- return true;
- }
+ int RHSC;
+ if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
}
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
}
}
return false;
}
-bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
- SDValue &BaseReg,
+
+//===----------------------------------------------------------------------===//
+// Thumb 2 Addressing Modes
+//===----------------------------------------------------------------------===//
+
+
+bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
SDValue &Opc) {
if (DisableShifterOp)
return false;
@@ -704,19 +1109,22 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
return false;
}
-bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R + imm12 operands.
// Base only.
- if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N)) {
if (N.getOpcode() == ISD::FrameIndex) {
- // Match frame index...
+ // Match frame index.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
- } else if (N.getOpcode() == ARMISD::Wrapper &&
+ }
+
+ if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
Base = N.getOperand(0);
@@ -729,7 +1137,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N,
}
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- if (SelectT2AddrModeImm8(Op, N, Base, OffImm))
+ if (SelectT2AddrModeImm8(N, Base, OffImm))
// Let t2LDRi8 handle (R - imm8).
return false;
@@ -754,24 +1162,26 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N,
return true;
}
-bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R - imm8 operands.
- if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::SUB) {
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getSExtValue();
- if (N.getOpcode() == ISD::SUB)
- RHSC = -RHSC;
-
- if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
- }
- OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
- return true;
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+ !CurDAG->isBaseWithConstantOffset(N))
+ return false;
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getSExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
}
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
}
}
@@ -784,52 +1194,22 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
? cast<LoadSDNode>(Op)->getAddressingMode()
: cast<StoreSDNode>(Op)->getAddressingMode();
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N)) {
- int RHSC = (int)RHS->getZExtValue();
- if (RHSC >= 0 && RHSC < 0x100) { // 8 bits.
- OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
- ? CurDAG->getTargetConstant(RHSC, MVT::i32)
- : CurDAG->getTargetConstant(-RHSC, MVT::i32);
- return true;
- }
- }
-
- return false;
-}
-
-bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N,
- SDValue &Base, SDValue &OffImm) {
- if (N.getOpcode() == ISD::ADD) {
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
- // 8 bits.
- if (((RHSC & 0x3) == 0) &&
- ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) {
- Base = N.getOperand(0);
- OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
- return true;
- }
- }
- } else if (N.getOpcode() == ISD::SUB) {
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
- // 8 bits.
- if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) {
- Base = N.getOperand(0);
- OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32);
- return true;
- }
- }
+ int RHSC;
+ if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
+ OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
+ ? CurDAG->getTargetConstant(RHSC, MVT::i32)
+ : CurDAG->getTargetConstant(-RHSC, MVT::i32);
+ return true;
}
return false;
}
-bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
SDValue &Base,
SDValue &OffReg, SDValue &ShImm) {
// (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
- if (N.getOpcode() != ISD::ADD)
+ if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
return false;
// Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
@@ -841,6 +1221,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N,
return false;
}
+ if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+ // Compute R + (R << [1,2,3]) and reuse it.
+ Base = N;
+ return false;
+ }
+
// Look for (R + R) or (R + (R << [1,2,3])).
unsigned ShAmt = 0;
Base = N.getOperand(0);
@@ -859,11 +1245,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N,
// it.
if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
ShAmt = Sh->getZExtValue();
- if (ShAmt >= 4) {
+ if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
+ OffReg = OffReg.getOperand(0);
+ else {
ShAmt = 0;
ShOpcVal = ARM_AM::no_shift;
- } else
- OffReg = OffReg.getOperand(0);
+ }
} else {
ShOpcVal = ARM_AM::no_shift;
}
@@ -1045,52 +1432,43 @@ SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
}
-/// OctoDRegs - Form 8 consecutive D registers.
-///
-SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1,
- SDValue V2, SDValue V3,
- SDValue V4, SDValue V5,
- SDValue V6, SDValue V7) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
- SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
- SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
- SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
- SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
- SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32);
- SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32);
- SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32);
- SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32);
- const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3,
- V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16);
+/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
+/// of a NEON VLD or VST instruction. The supported values depend on the
+/// number of registers being loaded.
+SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
+ bool is64BitVector) {
+ unsigned NumRegs = NumVecs;
+ if (!is64BitVector && NumVecs < 3)
+ NumRegs *= 2;
+
+ unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ if (Alignment >= 32 && NumRegs == 4)
+ Alignment = 32;
+ else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
+ Alignment = 16;
+ else if (Alignment >= 8)
+ Alignment = 8;
+ else
+ Alignment = 0;
+
+ return CurDAG->getTargetConstant(Alignment, MVT::i32);
}
-/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type
-/// for a 64-bit subregister of the vector.
-static EVT GetNEONSubregVT(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("unhandled NEON type");
- case MVT::v16i8: return MVT::v8i8;
- case MVT::v8i16: return MVT::v4i16;
- case MVT::v4f32: return MVT::v2f32;
- case MVT::v4i32: return MVT::v2i32;
- case MVT::v2i64: return MVT::v1i64;
- }
-}
-
-SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
+SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
EVT VT = N->getValueType(0);
bool is64BitVector = VT.is64BitVector();
+ Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
@@ -1120,88 +1498,97 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
ResTyElts *= 2;
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
}
+ std::vector<EVT> ResTys;
+ ResTys.push_back(ResTy);
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SDValue SuperReg;
- if (is64BitVector) {
- unsigned Opc = DOpcodes[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
- if (NumVecs == 1)
- return VLd;
-
- SuperReg = SDValue(VLd, 0);
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
- dl, VT, SuperReg);
- ReplaceUses(SDValue(N, Vec), D);
- }
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
- return NULL;
- }
-
- if (NumVecs <= 2) {
- // Quad registers are directly supported for VLD1 and VLD2,
- // loading pairs of D regs.
- unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
- if (NumVecs == 1)
- return VLd;
+ SDNode *VLd;
+ SmallVector<SDValue, 7> Ops;
- SuperReg = SDValue(VLd, 0);
- Chain = SDValue(VLd, 1);
+ // Double registers and VLD1/VLD2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
} else {
// Otherwise, quad registers are loaded with two separate instructions,
// where one loads the even registers and the other loads the odd registers.
EVT AddrTy = MemAddr.getValueType();
- // Load the even subregs.
- unsigned Opc = QOpcodes0[OpcodeIndex];
+ // Load the even subregs. This is always an updating load, so that it
+ // provides the address to the second load for the odd subregs.
SDValue ImplDef =
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
- SDNode *VLdA =
- CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7);
+ SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ ResTy, AddrTy, MVT::Other, OpsA, 7);
Chain = SDValue(VLdA, 2);
// Load the odd subregs.
- Opc = QOpcodes1[OpcodeIndex];
- const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0),
- Pred, Reg0, Chain };
- SDNode *VLdB =
- CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7);
- SuperReg = SDValue(VLdB, 0);
- Chain = SDValue(VLdB, 2);
- }
-
- // Extract out the Q registers.
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
- dl, VT, SuperReg);
- ReplaceUses(SDValue(N, Vec), Q);
- }
- ReplaceUses(SDValue(N, NumVecs), Chain);
+ Ops.push_back(SDValue(VLdA, 1));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa<ConstantSDNode>(Inc.getNode()) &&
+ "only constant post-increment update allowed for VLD3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(SDValue(VLdA, 0));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
+ }
+
+ if (NumVecs == 1)
+ return VLd;
+
+ // Extract out the subregisters.
+ SDValue SuperReg = SDValue(VLd, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
return NULL;
}
-SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
+SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
- EVT VT = N->getOperand(3).getValueType();
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
bool is64BitVector = VT.is64BitVector();
+ Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
@@ -1222,119 +1609,128 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
break;
}
+ std::vector<EVT> ResTys;
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
-
SmallVector<SDValue, 7> Ops;
- Ops.push_back(MemAddr);
- Ops.push_back(Align);
- if (is64BitVector) {
+ // Double registers and VST1/VST2 quad registers are directly supported.
+ if (is64BitVector || NumVecs <= 2) {
+ SDValue SrcReg;
if (NumVecs == 1) {
- Ops.push_back(N->getOperand(3));
- } else {
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
-
+ SrcReg = N->getOperand(Vec0Idx);
+ } else if (is64BitVector) {
// Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
if (NumVecs == 2)
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
else {
- SDValue V2 = N->getOperand(2+3);
- // If it's a vld3, form a quad D-register and leave the last part as
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ // If it's a vst3, form a quad D-register and leave the last part as
// an undef.
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ : N->getOperand(Vec0Idx + 3);
+ SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
- Ops.push_back(RegSeq);
- }
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- unsigned Opc = DOpcodes[OpcodeIndex];
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
- }
-
- if (NumVecs <= 2) {
- // Quad registers are directly supported for VST1 and VST2.
- unsigned Opc = QOpcodes0[OpcodeIndex];
- if (NumVecs == 1) {
- Ops.push_back(N->getOperand(3));
} else {
// Form a QQ register.
- SDValue Q0 = N->getOperand(3);
- SDValue Q1 = N->getOperand(4);
- Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0));
+ SDValue Q0 = N->getOperand(Vec0Idx);
+ SDValue Q1 = N->getOperand(Vec0Idx + 1);
+ SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+ }
+
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex]);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
+ Ops.push_back(SrcReg);
Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Reg0);
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
+ return CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
// Form the QQQQ REG_SEQUENCE.
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- SDValue V2 = N->getOperand(2+3);
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
- : N->getOperand(3+3);
+ : N->getOperand(Vec0Idx + 3);
SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
- // Store the even D registers.
- Ops.push_back(Reg0); // post-access address offset
- Ops.push_back(RegSeq);
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- unsigned Opc = QOpcodes0[OpcodeIndex];
- SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), 7);
+ // Store the even D registers. This is always an updating store, so that it
+ // provides the address to the second store for the odd subregs.
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
+ SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+ MemAddr.getValueType(),
+ MVT::Other, OpsA, 7);
Chain = SDValue(VStA, 1);
// Store the odd D registers.
- Ops[0] = SDValue(VStA, 0); // MemAddr
- Ops[6] = Chain;
- Opc = QOpcodes1[OpcodeIndex];
- SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), 7);
- Chain = SDValue(VStB, 1);
- ReplaceUses(SDValue(N, 0), Chain);
- return NULL;
+ Ops.push_back(SDValue(VStA, 0));
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ assert(isa<ConstantSDNode>(Inc.getNode()) &&
+ "only constant post-increment update allowed for VST3/4");
+ (void)Inc;
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(RegSeq);
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
}
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
- unsigned NumVecs, unsigned *DOpcodes,
- unsigned *QOpcodes0,
- unsigned *QOpcodes1) {
+ bool isUpdating, unsigned NumVecs,
+ unsigned *DOpcodes,
+ unsigned *QOpcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
unsigned Lane =
- cast<ConstantSDNode>(N->getOperand(NumVecs+3))->getZExtValue();
- EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType();
+ cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
bool is64BitVector = VT.is64BitVector();
- // Quad registers are handled by load/store of subregs. Find the subreg info.
- unsigned NumElts = 0;
- bool Even = false;
- EVT RegVT = VT;
- if (!is64BitVector) {
- RegVT = GetNEONSubregVT(VT);
- NumElts = RegVT.getVectorNumElements();
- Even = Lane < NumElts;
- }
+ unsigned Alignment = 0;
+ if (NumVecs != 3) {
+ Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ if (Alignment < 8 && Alignment < NumBytes)
+ Alignment = 0;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment == 1)
+ Alignment = 0;
+ }
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
@@ -1350,124 +1746,144 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
case MVT::v4i32: OpcodeIndex = 1; break;
}
+ std::vector<EVT> ResTys;
+ if (IsLoad) {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
+ MVT::i64, ResTyElts));
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 10> Ops;
+ SmallVector<SDValue, 8> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+ }
- unsigned Opc = 0;
- if (is64BitVector) {
- Opc = DOpcodes[OpcodeIndex];
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- } else {
- SDValue V2 = N->getOperand(2+3);
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq));
+ SDValue SuperReg;
+ SDValue V0 = N->getOperand(Vec0Idx + 0);
+ SDValue V1 = N->getOperand(Vec0Idx + 1);
+ if (NumVecs == 2) {
+ if (is64BitVector)
+ SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else
+ SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
} else {
- // Check if this is loading the even or odd subreg of a Q register.
- if (Lane < NumElts) {
- Opc = QOpcodes0[OpcodeIndex];
- } else {
- Lane -= NumElts;
- Opc = QOpcodes1[OpcodeIndex];
- }
-
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
- } else {
- SDValue V2 = N->getOperand(2+3);
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
- }
-
- // Extract the subregs of the input vector.
- unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
- RegSeq));
+ SDValue V2 = N->getOperand(Vec0Idx + 2);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(Vec0Idx + 3);
+ if (is64BitVector)
+ SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ else
+ SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
}
+ Ops.push_back(SuperReg);
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes[OpcodeIndex]);
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
+ Ops.data(), Ops.size());
if (!IsLoad)
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6);
+ return VLdLn;
- std::vector<EVT> ResTys(NumVecs, RegVT);
- ResTys.push_back(MVT::Other);
- SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6);
+ // Extract the subregisters.
+ SuperReg = SDValue(VLdLn, 0);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+ ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+ return NULL;
+}
- // Form a REG_SEQUENCE to force register allocation.
- SDValue RegSeq;
- if (is64BitVector) {
- SDValue V0 = SDValue(VLdLn, 0);
- SDValue V1 = SDValue(VLdLn, 1);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- } else {
- SDValue V2 = SDValue(VLdLn, 2);
- // If it's a vld3, form a quad D-register but discard the last part.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : SDValue(VLdLn, 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
- } else {
- // For 128-bit vectors, take the 64-bit results of the load and insert
- // them as subregs into the result.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- if (Even) {
- V[i] = SDValue(VLdLn, Vec);
- V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
- } else {
- V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
- V[i+1] = SDValue(VLdLn, Vec);
- }
- }
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
+SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+ unsigned NumVecs, unsigned *Opcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
- if (NumVecs == 2)
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
- else
- RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
+ SDValue MemAddr, Align;
+ if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ unsigned Alignment = 0;
+ if (NumVecs != 3) {
+ Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ if (Alignment < 8 && Alignment < NumBytes)
+ Alignment = 0;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment == 1)
+ Alignment = 0;
+ }
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vld-dup type");
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ }
+
+ SDValue Pred = getAL(CurDAG);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue SuperReg;
+ unsigned Opc = Opcodes[OpcodeIndex];
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(2);
+ Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ std::vector<EVT> ResTys;
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts));
+ if (isUpdating)
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::Other);
+ SDNode *VLdDup =
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ SuperReg = SDValue(VLdDup, 0);
+
+ // Extract the subregisters.
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ unsigned SubIdx = ARM::dsub_0;
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
- CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
return NULL;
}
@@ -1486,7 +1902,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
else {
SDValue V2 = N->getOperand(FirstTblReg + 2);
- // If it's a vtbl3, form a quad D-register and leave the last part as
+ // If it's a vtbl3, form a quad D-register and leave the last part as
// an undef.
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
@@ -1494,17 +1910,10 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
- // Now extract the D registers back out.
SmallVector<SDValue, 6> Ops;
if (IsExt)
Ops.push_back(N->getOperand(1));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq));
-
+ Ops.push_back(RegSeq);
Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
Ops.push_back(getAL(CurDAG)); // predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
@@ -1574,7 +1983,7 @@ SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
SDValue CPTmp0;
SDValue CPTmp1;
- if (SelectT2ShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1)) {
+ if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
unsigned Opc = 0;
@@ -1602,7 +2011,7 @@ SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
SDValue CPTmp0;
SDValue CPTmp1;
SDValue CPTmp2;
- if (SelectShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+ if (SelectShifterOperandReg(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7);
@@ -1611,36 +2020,66 @@ SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
}
SDNode *ARMDAGToDAGISel::
-SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
if (!T)
return 0;
- if (Pred_t2_so_imm(TrueVal.getNode())) {
- SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
+ unsigned Opc = 0;
+ unsigned TrueImm = T->getZExtValue();
+ if (is_t2_so_imm(TrueImm)) {
+ Opc = ARM::t2MOVCCi;
+ } else if (TrueImm <= 0xffff) {
+ Opc = ARM::t2MOVCCi16;
+ } else if (is_t2_so_imm_not(TrueImm)) {
+ TrueImm = ~TrueImm;
+ Opc = ARM::t2MVNCCi;
+ } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
+ // Large immediate.
+ Opc = ARM::t2MOVCCi32imm;
+ }
+
+ if (Opc) {
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N,
- ARM::t2MOVCCi, MVT::i32, Ops, 5);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
}
+
return 0;
}
SDNode *ARMDAGToDAGISel::
-SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
if (!T)
return 0;
- if (Pred_so_imm(TrueVal.getNode())) {
- SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
+ unsigned Opc = 0;
+ unsigned TrueImm = T->getZExtValue();
+ bool isSoImm = is_so_imm(TrueImm);
+ if (isSoImm) {
+ Opc = ARM::MOVCCi;
+ } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
+ Opc = ARM::MOVCCi16;
+ } else if (is_so_imm_not(TrueImm)) {
+ TrueImm = ~TrueImm;
+ Opc = ARM::MVNCCi;
+ } else if (TrueVal.getNode()->hasOneUse() &&
+ (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
+ // Large immediate.
+ Opc = ARM::MOVCCi32imm;
+ }
+
+ if (Opc) {
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N,
- ARM::MOVCCi, MVT::i32, Ops, 5);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
}
+
return 0;
}
@@ -1688,18 +2127,18 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
// (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
// Pattern complexity = 10 cost = 1 size = 0
if (Subtarget->isThumb()) {
- SDNode *Res = SelectT2CMOVSoImmOp(N, FalseVal, TrueVal,
+ SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
CCVal, CCR, InFlag);
if (!Res)
- Res = SelectT2CMOVSoImmOp(N, TrueVal, FalseVal,
+ Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
if (Res)
return Res;
} else {
- SDNode *Res = SelectARMCMOVSoImmOp(N, FalseVal, TrueVal,
+ SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
CCVal, CCR, InFlag);
if (!Res)
- Res = SelectARMCMOVSoImmOp(N, TrueVal, FalseVal,
+ Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
if (Res)
return Res;
@@ -1742,13 +2181,7 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
EVT VT = N->getValueType(0);
if (!VT.is128BitVector() || N->getNumOperands() != 2)
llvm_unreachable("unexpected CONCAT_VECTORS");
- DebugLoc dl = N->getDebugLoc();
- SDValue V0 = N->getOperand(0);
- SDValue V1 = N->getOperand(1);
- SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
- SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
- const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+ return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
}
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
@@ -1788,19 +2221,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
- ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
+ ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
Ops, 4);
} else {
SDValue Ops[] = {
CPIdx,
- CurDAG->getRegister(0, MVT::i32),
CurDAG->getTargetConstant(0, MVT::i32),
getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getEntryNode()
};
ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
- Ops, 6);
+ Ops, 5);
}
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
return NULL;
@@ -1930,7 +2362,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::UMULL : ARM::UMULLv5,
+ dl, MVT::i32, MVT::i32, Ops, 5);
}
}
case ISD::SMUL_LOHI: {
@@ -1944,7 +2378,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+ return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+ ARM::SMULL : ARM::SMULLv5,
+ dl, MVT::i32, MVT::i32, Ops, 5);
}
}
case ISD::LOAD: {
@@ -1987,7 +2423,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
MVT::i32);
SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
- MVT::Flag, Ops, 5);
+ MVT::Glue, Ops, 5);
Chain = SDValue(ResNode, 0);
if (N->getNumValues() == 2) {
InFlag = SDValue(ResNode, 1);
@@ -2088,12 +2524,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
EVT VecVT = N->getValueType(0);
EVT EltVT = VecVT.getVectorElementType();
unsigned NumElts = VecVT.getVectorNumElements();
- if (EltVT.getSimpleVT() == MVT::f64) {
+ if (EltVT == MVT::f64) {
assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
}
- assert(EltVT.getSimpleVT() == MVT::f32 &&
- "unexpected type for BUILD_VECTOR");
+ assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
if (NumElts == 2)
return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
@@ -2101,6 +2536,170 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
N->getOperand(2), N->getOperand(3));
}
+ case ARMISD::VLD2DUP: {
+ unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
+ ARM::VLD2DUPd32Pseudo };
+ return SelectVLDDup(N, false, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP: {
+ unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
+ ARM::VLD3DUPd32Pseudo };
+ return SelectVLDDup(N, false, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP: {
+ unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo,
+ ARM::VLD4DUPd32Pseudo };
+ return SelectVLDDup(N, false, 4, Opcodes);
+ }
+
+ case ARMISD::VLD2DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
+ ARM::VLD2DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 2, Opcodes);
+ }
+
+ case ARMISD::VLD3DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD,
+ ARM::VLD3DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 3, Opcodes);
+ }
+
+ case ARMISD::VLD4DUP_UPD: {
+ unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD,
+ ARM::VLD4DUPd32Pseudo_UPD };
+ return SelectVLDDup(N, true, 4, Opcodes);
+ }
+
+ case ARMISD::VLD1_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
+ ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
+ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+ return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VLD2_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
+ ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
+ ARM::VLD2q32Pseudo_UPD };
+ return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VLD3_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
+ ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
+ return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD4_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
+ ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
+ return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VLD2LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD,
+ ARM::VLD2LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
+ ARM::VLD2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD3LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD,
+ ARM::VLD3LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
+ ARM::VLD3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VLD4LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD,
+ ARM::VLD4LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
+ ARM::VLD4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST1_UPD: {
+ unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
+ ARM::VST1d32_UPD, ARM::VST1d64_UPD };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
+ ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+ return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VST2_UPD: {
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
+ ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
+ ARM::VST2q32Pseudo_UPD };
+ return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
+ }
+
+ case ARMISD::VST3_UPD: {
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
+ ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
+ return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST4_UPD: {
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
+ ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
+ return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ }
+
+ case ARMISD::VST2LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD,
+ ARM::VST2LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
+ ARM::VST2LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST3LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD,
+ ARM::VST3LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
+ ARM::VST3LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
+ }
+
+ case ARMISD::VST4LN_UPD: {
+ unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD,
+ ARM::VST4LNd32Pseudo_UPD };
+ unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
+ ARM::VST4LNq32Pseudo_UPD };
+ return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
+ }
+
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
@@ -2113,7 +2712,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD1d32, ARM::VLD1d64 };
unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
- return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
+ return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
@@ -2121,7 +2720,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
ARM::VLD2q32Pseudo };
- return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
+ return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
@@ -2130,10 +2729,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
ARM::VLD3q16Pseudo_UPD,
ARM::VLD3q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
- ARM::VLD3q16oddPseudo_UPD,
- ARM::VLD3q32oddPseudo_UPD };
- return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
+ ARM::VLD3q16oddPseudo,
+ ARM::VLD3q32oddPseudo };
+ return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
@@ -2142,31 +2741,31 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
ARM::VLD4q16Pseudo_UPD,
ARM::VLD4q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
- ARM::VLD4q16oddPseudo_UPD,
- ARM::VLD4q32oddPseudo_UPD };
- return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
+ ARM::VLD4q16oddPseudo,
+ ARM::VLD4q32oddPseudo };
+ return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld2lane: {
- unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 };
- unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd };
- return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
+ ARM::VLD2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld3lane: {
- unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 };
- unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd };
- return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
+ ARM::VLD3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld4lane: {
- unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 };
- unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd };
- return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
+ ARM::VLD4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
+ return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst1: {
@@ -2174,7 +2773,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST1d32, ARM::VST1d64 };
unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
- return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
+ return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
@@ -2182,7 +2781,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
ARM::VST2q32Pseudo };
- return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
+ return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
@@ -2191,10 +2790,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
ARM::VST3q16Pseudo_UPD,
ARM::VST3q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
- ARM::VST3q16oddPseudo_UPD,
- ARM::VST3q32oddPseudo_UPD };
- return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
+ ARM::VST3q16oddPseudo,
+ ARM::VST3q32oddPseudo };
+ return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
@@ -2203,31 +2802,31 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
ARM::VST4q16Pseudo_UPD,
ARM::VST4q32Pseudo_UPD };
- unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
- ARM::VST4q16oddPseudo_UPD,
- ARM::VST4q32oddPseudo_UPD };
- return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
+ ARM::VST4q16oddPseudo,
+ ARM::VST4q32oddPseudo };
+ return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst2lane: {
- unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
- unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 };
- unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd };
- return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
+ ARM::VST2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst3lane: {
- unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
- unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 };
- unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd };
- return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
+ ARM::VST3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst4lane: {
- unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
- unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 };
- unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd };
- return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
+ ARM::VST4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
+ return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
}
}
break;
@@ -2240,18 +2839,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
break;
case Intrinsic::arm_neon_vtbl2:
- return SelectVTBL(N, false, 2, ARM::VTBL2);
+ return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
case Intrinsic::arm_neon_vtbl3:
- return SelectVTBL(N, false, 3, ARM::VTBL3);
+ return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
case Intrinsic::arm_neon_vtbl4:
- return SelectVTBL(N, false, 4, ARM::VTBL4);
+ return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
case Intrinsic::arm_neon_vtbx2:
- return SelectVTBL(N, true, 2, ARM::VTBX2);
+ return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
case Intrinsic::arm_neon_vtbx3:
- return SelectVTBL(N, true, 3, ARM::VTBX3);
+ return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
case Intrinsic::arm_neon_vtbx4:
- return SelectVTBL(N, true, 4, ARM::VTBX4);
+ return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
}
break;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index ce4a2c9..1835ec0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMAddressingModes.h"
+#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
@@ -28,9 +29,11 @@
#include "llvm/Function.h"
#include "llvm/GlobalValue.h"
#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -41,6 +44,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -50,6 +54,7 @@
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
+STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
// This option should go away when tail calls fully work.
static cl::opt<bool>
@@ -57,14 +62,7 @@ EnableARMTailCalls("arm-tail-calls", cl::Hidden,
cl::desc("Generate tail calls (TEMPORARY OPTION)."),
cl::init(false));
-// This option should go away when Machine LICM is smart enough to hoist a
-// reg-to-reg VDUP.
-static cl::opt<bool>
-EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden,
- cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."),
- cl::init(false));
-
-static cl::opt<bool>
+cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
cl::desc("Generate calls via indirect call instructions"),
cl::init(false));
@@ -74,28 +72,6 @@ ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
-static cl::opt<bool>
-EnableARMCodePlacement("arm-code-placement", cl::Hidden,
- cl::desc("Enable code placement pass for ARM"),
- cl::init(false));
-
-static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
EVT PromotedBitwiseVT) {
if (VT != PromotedLdStVT) {
@@ -111,8 +87,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
EVT ElemTy = VT.getVectorElementType();
if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
- if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
if (ElemTy != MVT::i32) {
setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
@@ -122,7 +97,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
if (VT.isInteger()) {
@@ -131,6 +106,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction(VT.getSimpleVT(),
+ (MVT::SimpleValueType)InnerVT, Expand);
}
setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
@@ -177,6 +156,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
RegInfo = TM.getRegisterInfo();
+ Itins = TM.getInstrItineraryData();
if (Subtarget->isTargetDarwin()) {
// Uses VFP for Thumb libfuncs if available.
@@ -260,13 +240,157 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLibcallName(RTLIB::SRL_I128, 0);
setLibcallName(RTLIB::SRA_I128, 0);
- // Libcalls should use the AAPCS base standard ABI, even if hard float
- // is in effect, as per the ARM RTABI specification, section 4.1.2.
if (Subtarget->isAAPCS_ABI()) {
- for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
- setLibcallCallingConv(static_cast<RTLIB::Libcall>(i),
- CallingConv::ARM_AAPCS);
- }
+ // Double-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 2
+ setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
+ setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
+ setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
+ setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
+ setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
+
+ // Double-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 3
+ setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
+ setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun");
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun");
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+ setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
+
+ // Single-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 4
+ setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
+ setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
+ setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
+ setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
+ setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
+
+ // Single-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 5
+ setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
+ setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun");
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun");
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+ setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
+
+ // Floating-point to integer conversions.
+ // RTABI chapter 4.1.2, Table 6
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
+ setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
+
+ // Conversions between floating types.
+ // RTABI chapter 4.1.2, Table 7
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d");
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
+
+ // Integer to floating-point conversions.
+ // RTABI chapter 4.1.2, Table 8
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
+ setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+
+ // Long long helper functions
+ // RTABI chapter 4.2, Table 9
+ setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul");
+ setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
+ setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
+ setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
+ setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
+ setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
+ setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
+
+ // Integer division functions
+ // RTABI chapter 4.3.1
+ setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
+ setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
+ setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
}
if (Subtarget->isThumb1Only())
@@ -330,9 +454,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ // Custom handling for some vector types to avoid expensive expansions
+ setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
+ setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRL);
@@ -341,6 +472,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SELECT_CC);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::STORE);
}
computeRegisterProperties();
@@ -397,7 +532,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
// These are expanded into libcalls.
- if (!Subtarget->hasDivide()) {
+ if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
// v7M has a hardware divider
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
@@ -423,14 +558,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
- // FIXME: Shouldn't need this, since no register is used, but the legalizer
- // doesn't yet know how to not do that for SjLj.
- setExceptionSelectorRegister(ARM::R0);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setExceptionPointerRegister(ARM::R0);
+ setExceptionSelectorRegister(ARM::R1);
+
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
if (Subtarget->hasDataBarrier() ||
- (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) {
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
@@ -474,6 +610,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+
// Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
if (!Subtarget->hasV6Ops()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -484,7 +622,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
- setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
}
@@ -493,6 +631,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (Subtarget->isTargetDarwin()) {
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
}
setOperationAction(ISD::SETCC, MVT::i32, Expand);
@@ -547,8 +686,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
- if (Subtarget->hasV6T2Ops())
+ if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
setTargetDAGCombine(ISD::OR);
+ if (Subtarget->hasNEON())
+ setTargetDAGCombine(ISD::AND);
setStackPointerRegisterToSaveRestore(ARM::SP);
@@ -557,16 +698,26 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
else
setSchedulingPreference(Sched::Hybrid);
- maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
+ //// temporary - rewrite interface to use type
+ maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
setMinStackArgumentAlignment(4);
- if (EnableARMCodePlacement)
- benefitFromCodePlacementOpt = true;
+ benefitFromCodePlacementOpt = true;
}
+// FIXME: It might make sense to define the representative register class as the
+// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
+// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
+// SPR's representative would be DPR_VFP2. This should work well if register
+// pressure tracking were modified such that a register use would increment the
+// pressure of the register class's representative and all of it's super
+// classes' representatives transitively. We have not implemented this because
+// of the difficulty prior to coalescing of modeling operand register classes
+// due to the common occurence of cross class copies and subregister insertions
+// and extractions.
std::pair<const TargetRegisterClass*, uint8_t>
ARMTargetLowering::findRepresentativeClass(EVT VT) const{
const TargetRegisterClass *RRC = 0;
@@ -580,6 +731,12 @@ ARMTargetLowering::findRepresentativeClass(EVT VT) const{
case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
RRC = ARM::DPRRegisterClass;
+ // When NEON is used for SP, only half of the register file is available
+ // because operations that define both SP and DP results will be constrained
+ // to the VFP2 class (D0-D15). We currently model this constraint prior to
+ // coalescing by double-counting the SP regs. See the FIXME above.
+ if (Subtarget->useNEONForSinglePrecisionFP())
+ Cost = 2;
break;
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
@@ -602,6 +759,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
case ARMISD::Wrapper: return "ARMISD::Wrapper";
+ case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN";
+ case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
case ARMISD::CALL: return "ARMISD::CALL";
case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
@@ -612,7 +771,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
- case ARMISD::AND: return "ARMISD::AND";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
@@ -633,25 +791,33 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
- case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
- case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
+ case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
+ case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
-
+
case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
- case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER";
+ case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
+
+ case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
case ARMISD::VCEQ: return "ARMISD::VCEQ";
+ case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
case ARMISD::VCGE: return "ARMISD::VCGE";
+ case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
+ case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
case ARMISD::VCGEU: return "ARMISD::VCGEU";
case ARMISD::VCGT: return "ARMISD::VCGT";
+ case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
+ case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
case ARMISD::VCGTU: return "ARMISD::VCGTU";
case ARMISD::VTST: return "ARMISD::VTST";
@@ -693,6 +859,28 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
case ARMISD::BFI: return "ARMISD::BFI";
+ case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
+ case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
+ case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
+ case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
+ case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
+ case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
+ case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
+ case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
+ case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
+ case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
+ case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
+ case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
+ case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
+ case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
+ case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
+ case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
+ case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
+ case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
+ case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
+ case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
+ case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
+ case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
}
}
@@ -735,6 +923,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
for (unsigned i = 0; i != NumVals; ++i) {
EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
if (VT.isFloatingPoint() || VT.isVector())
return Sched::Latency;
}
@@ -746,25 +936,29 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
// is not available.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
- if (TID.mayLoad())
- return Sched::Latency;
- const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData();
- if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2)
+ if (TID.getNumDefs() == 0)
+ return Sched::RegPressure;
+ if (!Itins->isEmpty() &&
+ Itins->getOperandCycle(TID.getSchedClass(), 0) > 2)
return Sched::Latency;
+
return Sched::RegPressure;
}
+// FIXME: Move to RegInfo
unsigned
ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
switch (RC->getID()) {
default:
return 0;
case ARM::tGPRRegClassID:
- return RegInfo->hasFP(MF) ? 4 : 5;
+ return TFI->hasFP(MF) ? 4 : 5;
case ARM::GPRRegClassID: {
- unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
+ unsigned FP = TFI->hasFP(MF) ? 1 : 0;
return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
}
case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
@@ -829,136 +1023,6 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
#include "ARMGenCallingConv.inc"
-// APCS f64 is in register pairs, possibly split to stack
-static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- CCState &State, bool CanFail) {
- static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-
- // Try to get the first register.
- if (unsigned Reg = State.AllocateReg(RegList, 4))
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- else {
- // For the 2nd half of a v2f64, do not fail.
- if (CanFail)
- return false;
-
- // Put the whole thing on the stack.
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(8, 4),
- LocVT, LocInfo));
- return true;
- }
-
- // Try to get the second register.
- if (unsigned Reg = State.AllocateReg(RegList, 4))
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- else
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(4, 4),
- LocVT, LocInfo));
- return true;
-}
-
-static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
- return false;
- if (LocVT == MVT::v2f64 &&
- !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
- return false;
- return true; // we handled it
-}
-
-// AAPCS f64 is in aligned register pairs
-static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- CCState &State, bool CanFail) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
- static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
- static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
-
- unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
- if (Reg == 0) {
- // For the 2nd half of a v2f64, do not just fail.
- if (CanFail)
- return false;
-
- // Put the whole thing on the stack.
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(8, 8),
- LocVT, LocInfo));
- return true;
- }
-
- unsigned i;
- for (i = 0; i < 2; ++i)
- if (HiRegList[i] == Reg)
- break;
-
- unsigned T = State.AllocateReg(LoRegList[i]);
- (void)T;
- assert(T == LoRegList[i] && "Could not allocate register");
-
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
- LocVT, LocInfo));
- return true;
-}
-
-static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
- return false;
- if (LocVT == MVT::v2f64 &&
- !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
- return false;
- return true; // we handled it
-}
-
-static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo, CCState &State) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
- static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
-
- unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
- if (Reg == 0)
- return false; // we didn't handle it
-
- unsigned i;
- for (i = 0; i < 2; ++i)
- if (HiRegList[i] == Reg)
- break;
-
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
- LocVT, LocInfo));
- return true;
-}
-
-static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
- return false;
- if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
- return false;
- return true; // we handled it
-}
-
-static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
- State);
-}
-
/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
/// given CallingConvention value.
CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
@@ -967,23 +1031,29 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
- case CallingConv::C:
case CallingConv::Fast:
+ if (Subtarget->hasVFP2() && !isVarArg) {
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+ // For AAPCS ABI targets, just use VFP variant of the calling convention.
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ }
+ // Fallthrough
+ case CallingConv::C: {
// Use target triple & subtarget features to do actual dispatch.
- if (Subtarget->isAAPCS_ABI()) {
- if (Subtarget->hasVFP2() &&
- FloatABIType == FloatABI::Hard && !isVarArg)
- return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
- else
- return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
- } else
- return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ else if (Subtarget->hasVFP2() &&
+ FloatABIType == FloatABI::Hard && !isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ }
case CallingConv::ARM_AAPCS_VFP:
- return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
case CallingConv::ARM_AAPCS:
- return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
- return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
}
}
@@ -1050,7 +1120,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::BCvt:
- Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
+ Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
break;
}
@@ -1073,7 +1143,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile=*/false, /*AlwaysInline=*/false,
- NULL, 0, NULL, 0);
+ MachinePointerInfo(0), MachinePointerInfo(0));
}
/// LowerMemOpCallTo - Store the argument to the stack.
@@ -1086,11 +1156,11 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
- if (Flags.isByVal()) {
+ if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
- }
+
return DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), LocMemOffset,
+ MachinePointerInfo::getStack(LocMemOffset),
false, false, 0);
}
@@ -1198,7 +1268,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
break;
}
@@ -1289,7 +1359,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
// Create a constant pool entry for the callee address
- unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
ARMPCLabelIndex,
ARMCP::CPValue, 0);
@@ -1298,13 +1368,13 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
// Create a constant pool entry for the callee address
- unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
Sym, ARMPCLabelIndex, 0);
// Get the address of the callee into a register
@@ -1312,7 +1382,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
@@ -1326,7 +1396,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
- unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
ARMPCLabelIndex,
ARMCP::CPValue, 4);
@@ -1334,13 +1404,19 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
- } else
- Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ } else {
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ unsigned OpFlags = 0;
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ }
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
bool isStub = Subtarget->isTargetDarwin() &&
@@ -1349,20 +1425,26 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// tBX takes a register source operand.
const char *Sym = S->getSymbol();
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
- unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
Sym, ARMPCLabelIndex, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
- } else
- Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ } else {
+ unsigned OpFlags = 0;
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
+ }
}
// FIXME: handle tail calls differently.
@@ -1391,7 +1473,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (InFlag.getNode())
Ops.push_back(InFlag);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (isTailCall)
return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
@@ -1421,7 +1503,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
- if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
+ if (!TargetRegisterInfo::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
@@ -1490,32 +1572,15 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// LR. This means if we need to reload LR, it takes an extra instructions,
// which outweighs the value of the tail call; but here we don't know yet
// whether LR is going to be used. Probably the right approach is to
- // generate the tail call here and turn it back into CALL/RET in
+ // generate the tail call here and turn it back into CALL/RET in
// emitEpilogue if LR is used.
- if (Subtarget->isThumb1Only())
- return false;
-
- // For the moment, we can only do this to functions defined in this
- // compilation, or to indirect calls. A Thumb B to an ARM function,
- // or vice versa, is not easily fixed up in the linker unlike BL.
- // (We could do this by loading the address of the callee into a register;
- // that is an extra instruction over the direct call and burns a register
- // as well, so is not likely to be a win.)
-
- // It might be safe to remove this restriction on non-Darwin.
// Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
// but we need to make sure there are enough registers; the only valid
// registers are the 4 used for parameters. We don't currently do this
// case.
- if (isa<ExternalSymbolSDNode>(Callee))
- return false;
-
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- if (GV->isDeclaration() || GV->isWeakForLinker())
- return false;
- }
+ if (Subtarget->isThumb1Only())
+ return false;
// If the calling conventions do not match, then we'd better make sure the
// results are returned in the same way as what the caller expects.
@@ -1583,7 +1648,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (!VA.isRegLoc())
return false;
if (!ArgLocs[++i].isRegLoc())
- return false;
+ return false;
if (RegVT == MVT::v2f64) {
if (!ArgLocs[++i].isRegLoc())
return false;
@@ -1643,7 +1708,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
break;
}
@@ -1693,6 +1758,61 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
return result;
}
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ unsigned NumCopies = 0;
+ SDNode* Copies[2];
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::CopyToReg) {
+ Copies[NumCopies++] = Use;
+ } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+ // f64 returned in a pair of GPRs.
+ for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != ISD::CopyToReg)
+ return false;
+ Copies[UI.getUse().getResNo()] = *UI;
+ ++NumCopies;
+ }
+ } else if (Use->getOpcode() == ISD::BITCAST) {
+ // f32 returned in a single GPR.
+ if (!Use->hasNUsesOfValue(1, 0))
+ return false;
+ Use = *Use->use_begin();
+ if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+ return false;
+ Copies[NumCopies++] = Use;
+ } else {
+ return false;
+ }
+
+ if (NumCopies != 1 && NumCopies != 2)
+ return false;
+
+ bool HasRet = false;
+ for (unsigned i = 0; i < NumCopies; ++i) {
+ SDNode *Copy = Copies[i];
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ SDNode *Use = *UI;
+ if (Use == Copies[0] || Use == Copies[1])
+ continue;
+ return false;
+ }
+ if (UI->getOpcode() != ARMISD::RET_FLAG)
+ return false;
+ HasRet = true;
+ }
+ }
+
+ return HasRet;
+}
+
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
@@ -1732,7 +1852,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
} else {
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
- ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+ ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
ARMCP::CPBlockAddress,
PCAdj);
@@ -1740,7 +1860,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
if (RelocM == Reloc::Static)
return Result;
@@ -1757,14 +1877,14 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
ARMConstantPoolValue *CPV =
new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
- ARMCP::CPValue, PCAdj, "tlsgd", true);
+ ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
SDValue Chain = Argument.getValue(1);
@@ -1802,16 +1922,16 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
if (GV->isDeclaration()) {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
// Initial exec model.
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
- ARMCP::CPValue, PCAdj, "gottpoff", true);
+ ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
Chain = Offset.getValue(1);
@@ -1819,15 +1939,15 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
} else {
// local exec model
- ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
}
@@ -1859,51 +1979,72 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
if (RelocM == Reloc::PIC_) {
bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
ARMConstantPoolValue *CPV =
- new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT");
+ new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
SDValue Chain = Result.getValue(1);
SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
if (!UseGOTOFF)
Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- PseudoSourceValue::getGOT(), 0,
- false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, 0);
return Result;
+ }
+
+ // If we have T2 ops, we can materialize the address directly via movt/movw
+ // pair. This is always cheaper.
+ if (Subtarget->useMovt()) {
+ ++NumMovwMovt;
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
} else {
- // If we have T2 ops, we can materialize the address directly via movt/movw
- // pair. This is always cheaper.
- if (Subtarget->useMovt()) {
- return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, dl, PtrVT));
- } else {
- SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
- false, false, 0);
- }
+ SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(),
+ false, false, 0);
}
}
SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned ARMPCLabelIndex = 0;
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (Subtarget->useMovt()) {
+ ++NumMovwMovt;
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ if (RelocM == Reloc::Static)
+ return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+
+ unsigned Wrapper = (RelocM == Reloc::PIC_)
+ ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
+ SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+ Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, 0);
+ return Result;
+ }
+
+ unsigned ARMPCLabelIndex = 0;
SDValue CPAddr;
- if (RelocM == Reloc::Static)
+ if (RelocM == Reloc::Static) {
CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
- else {
- ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+ } else {
+ ARMPCLabelIndex = AFI->createPICLabelUId();
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
ARMConstantPoolValue *CPV =
new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
@@ -1912,7 +2053,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
SDValue Chain = Result.getValue(1);
@@ -1922,8 +2063,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
}
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
- Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- PseudoSourceValue::getGOT(), 0,
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
false, false, 0);
return Result;
@@ -1935,7 +2075,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
"GLOBAL OFFSET TABLE not implemented for non-ELF targets");
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
@@ -1945,13 +2085,21 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
SDValue
+ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
+ const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
SDValue Val = DAG.getConstant(0, MVT::i32);
@@ -1980,7 +2128,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
case Intrinsic::eh_sjlj_lsda: {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
@@ -1994,7 +2142,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result =
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
if (RelocM == Reloc::PIC_) {
@@ -2009,21 +2157,55 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
DebugLoc dl = Op.getDebugLoc();
- SDValue Op5 = Op.getOperand(5);
- unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
- // Some subtargets which have dmb and dsb instructions can handle barriers
- // directly. Some ARMv6 cpus can support them with the help of mcr
- // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should
- // never get here.
- unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
- if (Subtarget->hasDataBarrier())
- return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
- else {
- assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() &&
+ if (!Subtarget->hasDataBarrier()) {
+ // Some ARMv6 cpus can support data barriers with an mcr instruction.
+ // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+ // here.
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
"Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
- return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
+ return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
}
+
+ SDValue Op5 = Op.getOperand(5);
+ bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
+ unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
+
+ ARM_MB::MemBOpt DMBOpt;
+ if (isDeviceBarrier)
+ DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
+ else
+ DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
+ return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(DMBOpt, MVT::i32));
+}
+
+static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ // ARM pre v5TE and Thumb1 does not have preload instructions.
+ if (!(Subtarget->isThumb2() ||
+ (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
+ // Just preserve the chain.
+ return Op.getOperand(0);
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
+ if (!isRead &&
+ (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
+ // ARMv7 with MP extension has PLDW.
+ return Op.getOperand(0);
+
+ if (Subtarget->isThumb())
+ // Invert the bits.
+ isRead = ~isRead & 1;
+ unsigned isData = Subtarget->isThumb() ? 0 : 1;
+
+ // Currently there is no intrinsic that matches pli.
+ return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
+ DAG.getConstant(isData, MVT::i32));
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
@@ -2036,8 +2218,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
- false, false, 0);
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
}
SDValue
@@ -2054,7 +2236,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
RC = ARM::GPRRegisterClass;
// Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
SDValue ArgValue2;
@@ -2065,10 +2247,10 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0);
} else {
- Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
+ Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
}
@@ -2119,7 +2301,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0);
} else {
ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
@@ -2149,7 +2331,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
}
@@ -2160,7 +2342,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::BCvt:
- ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
break;
case CCValAssign::SExt:
ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
@@ -2188,7 +2370,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
@@ -2202,7 +2384,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned NumGPRs = CCInfo.getFirstUnallocated
(GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
- unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
unsigned VARegSize = (4 - NumGPRs) * 4;
unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
unsigned ArgOffset = CCInfo.getNextStackOffset();
@@ -2214,7 +2396,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
AFI->setVarArgsFrameIndex(
MFI->CreateFixedObject(VARegSaveSize,
ArgOffset + VARegSaveSize - VARegSize,
- true));
+ false));
SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
getPointerTy());
@@ -2226,12 +2408,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
else
RC = ARM::GPRRegisterClass;
- unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()),
- 0, false, false, 0);
+ MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+ false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
@@ -2320,7 +2502,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
break;
}
ARMcc = DAG.getConstant(CondCode, MVT::i32);
- return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
+ return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
}
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
@@ -2329,10 +2511,10 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
DebugLoc dl) const {
SDValue Cmp;
if (!isFloatingPointZero(RHS))
- Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
else
- Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
- return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+ return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
}
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
@@ -2444,8 +2626,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
- Ld->getChain(), Ld->getBasePtr(),
- Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->getAlignment());
@@ -2464,7 +2645,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
SDValue Ptr = Ld->getBasePtr();
RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
Ld->getChain(), Ptr,
- Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->getAlignment());
@@ -2474,7 +2655,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
PtrType, Ptr, DAG.getConstant(4, PtrType));
RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
Ld->getChain(), NewPtr,
- Ld->getSrcValue(), Ld->getSrcValueOffset() + 4,
+ Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
NewAlign);
return;
@@ -2524,7 +2705,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
expandf64Toi32(RHS, DAG, RHS1, RHS2);
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
ARMcc = DAG.getConstant(CondCode, MVT::i32);
- SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
}
@@ -2564,7 +2745,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
if (CondCode2 != ARMCC::AL) {
@@ -2599,14 +2780,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
}
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0,
+ MachinePointerInfo::getJumpTable(),
false, false, 0);
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
} else {
Addr = DAG.getLoad(PTy, dl, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0, false, false, 0);
+ MachinePointerInfo::getJumpTable(), false, false, 0);
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
}
@@ -2627,7 +2808,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
break;
}
Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
}
static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -2646,7 +2827,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
break;
}
- Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
return DAG.getNode(Opc, dl, VT, Op);
}
@@ -2657,12 +2838,46 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
- SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
- SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
- SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
- SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
+ bool F2IisFast = Subtarget->isCortexA9() ||
+ Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR;
+
+ // Bitcast operand 1 to i32.
+ if (SrcVT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp1, 1).getValue(1);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
+
+ // If float to int conversion isn't going to be super expensive, then simply
+ // or in the signbit.
+ if (F2IisFast) {
+ SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+ SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+ Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+ if (VT == MVT::f32) {
+ Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
+ }
+
+ // f64: Or the high part with signbit and then combine two parts.
+ Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp0, 1);
+ SDValue Lo = Tmp0.getValue(0);
+ SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+ Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ }
+
+ // Remove the signbit of operand 0.
+ Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
+
+ // If operand 1 signbit is one, then negate operand 0.
+ SDValue ARMcc;
+ SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32),
+ ISD::SETLT, ARMcc, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
+ return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp);
}
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
@@ -2678,11 +2893,11 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
SDValue Offset = DAG.getConstant(4, MVT::i32);
return DAG.getLoad(VT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
- NULL, 0, false, false, 0);
+ MachinePointerInfo(), false, false, 0);
}
// Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl);
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
@@ -2697,17 +2912,18 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
? ARM::R7 : ARM::R11;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
- FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
false, false, 0);
return FrameAddr;
}
-/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
+/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
/// operand type is illegal (e.g., v2f32 for a target that doesn't support
/// vectors), since the legalizer won't know what to do with that.
-static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
+static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
DebugLoc dl = N->getDebugLoc();
SDValue Op = N->getOperand(0);
@@ -2717,7 +2933,7 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
EVT SrcVT = Op.getValueType();
EVT DstVT = N->getValueType(0);
assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
- "ExpandBIT_CONVERT called for non-i64 type");
+ "ExpandBITCAST called for non-i64 type");
// Turn i64->f64 into VMOVDRR.
if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
@@ -2725,7 +2941,7 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
DAG.getConstant(0, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(1, MVT::i32));
- return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT,
+ return DAG.getNode(ISD::BITCAST, dl, DstVT,
DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
}
@@ -2752,7 +2968,7 @@ static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
}
/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
@@ -2825,7 +3041,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
return DAG.getMergeValues(Ops, 2, dl);
}
-SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
// The rounding mode is in bits 23:22 of the FPSCR.
// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
@@ -2835,11 +3051,11 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
DAG.getConstant(Intrinsic::arm_get_fpscr,
MVT::i32));
- SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+ SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
DAG.getConstant(1U << 22, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, MVT::i32));
- return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, MVT::i32));
}
@@ -2860,33 +3076,40 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
DebugLoc dl = N->getDebugLoc();
+ if (!VT.isVector())
+ return SDValue();
+
// Lower vector shifts on NEON to use VSHL.
- if (VT.isVector()) {
- assert(ST->hasNEON() && "unexpected vector shift");
-
- // Left shifts translate directly to the vshiftu intrinsic.
- if (N->getOpcode() == ISD::SHL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
- N->getOperand(0), N->getOperand(1));
-
- assert((N->getOpcode() == ISD::SRA ||
- N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
-
- // NEON uses the same intrinsics for both left and right shifts. For
- // right shifts, the shift amounts are negative, so negate the vector of
- // shift amounts.
- EVT ShiftVT = N->getOperand(1).getValueType();
- SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
- getZeroVector(ShiftVT, DAG, dl),
- N->getOperand(1));
- Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
- Intrinsic::arm_neon_vshifts :
- Intrinsic::arm_neon_vshiftu);
+ assert(ST->hasNEON() && "unexpected vector shift");
+
+ // Left shifts translate directly to the vshiftu intrinsic.
+ if (N->getOpcode() == ISD::SHL)
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(vshiftInt, MVT::i32),
- N->getOperand(0), NegatedCount);
- }
+ DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
+ N->getOperand(0), N->getOperand(1));
+
+ assert((N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+
+ // NEON uses the same intrinsics for both left and right shifts. For
+ // right shifts, the shift amounts are negative, so negate the vector of
+ // shift amounts.
+ EVT ShiftVT = N->getOperand(1).getValueType();
+ SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
+ getZeroVector(ShiftVT, DAG, dl),
+ N->getOperand(1));
+ Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
+ Intrinsic::arm_neon_vshifts :
+ Intrinsic::arm_neon_vshiftu);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(vshiftInt, MVT::i32),
+ N->getOperand(0), NegatedCount);
+}
+
+static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
// We can get here for a node like i32 = ISD::SHL i32, i64
if (VT != MVT::i64)
@@ -2912,7 +3135,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
// First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
// captures the result into a carry flag.
unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
- Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
// The low part is an ARMISD::RRX operand, which shifts the carry in.
Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
@@ -2998,13 +3221,13 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
AndOp = Op1;
// Ignore bitconvert.
- if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT)
+ if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
AndOp = AndOp.getOperand(0);
if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
Opc = ARMISD::VTST;
- Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0));
- Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1));
+ Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
+ Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
Invert = !Invert;
}
}
@@ -3013,7 +3236,38 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
if (Swap)
std::swap(Op0, Op1);
- SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ // If one of the operands is a constant vector zero, attempt to fold the
+ // comparison to a specialized compare-against-zero form.
+ SDValue SingleOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ SingleOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
+ if (Opc == ARMISD::VCGE)
+ Opc = ARMISD::VCLEZ;
+ else if (Opc == ARMISD::VCGT)
+ Opc = ARMISD::VCLTZ;
+ SingleOp = Op1;
+ }
+
+ SDValue Result;
+ if (SingleOp.getNode()) {
+ switch (Opc) {
+ case ARMISD::VCEQ:
+ Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGE:
+ Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLEZ:
+ Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGT:
+ Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLTZ:
+ Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+ default:
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
+ } else {
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
if (Invert)
Result = DAG.getNOT(dl, Result, VT);
@@ -3026,7 +3280,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
/// operand (e.g., VMOV). If so, return the encoded value.
static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
- EVT &VT, bool is128Bits, bool isVMOV) {
+ EVT &VT, bool is128Bits, NEONModImmType type) {
unsigned OpCmode, Imm;
// SplatBitSize is set to the smallest size that splats the vector, so a
@@ -3039,7 +3293,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
switch (SplatBitSize) {
case 8:
- if (!isVMOV)
+ if (type != VMOVModImm)
return SDValue();
// Any 1-byte value is OK. Op=0, Cmode=1110.
assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
@@ -3096,6 +3350,9 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
break;
}
+ // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
+ if (type == OtherModImm) return SDValue();
+
if ((SplatBits & ~0xffff) == 0 &&
((SplatBits | SplatUndef) & 0xff) == 0xff) {
// Value = 0x0000nnff: Op=x, Cmode=1100.
@@ -3122,7 +3379,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
return SDValue();
case 64: {
- if (!isVMOV)
+ if (type != VMOVModImm)
return SDValue();
// NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
uint64_t BitMask = 0xff;
@@ -3376,8 +3633,8 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
-static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
@@ -3391,10 +3648,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
EVT VmovVT;
SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
- DAG, VmovVT, VT.is128BitVector(), true);
+ DAG, VmovVT, VT.is128BitVector(),
+ VMOVModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
}
// Try an immediate VMVN.
@@ -3402,10 +3660,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
((1LL << SplatBitSize) - 1));
Val = isNEONModifiedImm(NegatedImm,
SplatUndef.getZExtValue(), SplatBitSize,
- DAG, VmovVT, VT.is128BitVector(), false);
+ DAG, VmovVT, VT.is128BitVector(),
+ VMVNModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
}
}
}
@@ -3439,26 +3698,25 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
- if (EnableARMVDUPsplat) {
- // Use VDUP for non-constant splats. For f32 constant splats, reduce to
- // i32 and try again.
- if (usesOnlyOneValue && EltSize <= 32) {
- if (!isConstant)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
- if (VT.getVectorElementType().isFloatingPoint()) {
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0; i < NumElts; ++i)
- Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
- Op.getOperand(i)));
- SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
- NumElts);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
- LowerBUILD_VECTOR(Val, DAG, ST));
- }
- SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (usesOnlyOneValue && EltSize <= 32) {
+ if (!isConstant)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+ Op.getOperand(i)));
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
+ Val = LowerBUILD_VECTOR(Val, DAG, ST);
if (Val.getNode())
- return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
}
// If all elements are constants and the case above didn't get hit, fall back
@@ -3467,10 +3725,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (isConstant)
return SDValue();
- if (!EnableARMVDUPsplat) {
- // Use VDUP for non-constant splats.
- if (usesOnlyOneValue && EltSize <= 32)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
+ if (NumElts >= 4) {
+ SDValue shuffle = ReconstructShuffle(Op, DAG);
+ if (shuffle != SDValue())
+ return shuffle;
}
// Vectors with 32- or 64-bit elements can be built by directly assigning
@@ -3483,14 +3742,144 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
- Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i)));
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
return SDValue();
}
+// Gather data to see if the operation can be modelled as a
+// shuffle in combination with VEXTs.
+SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 2> SourceVecs;
+ SmallVector<unsigned, 2> MinElts;
+ SmallVector<unsigned, 2> MaxElts;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
+ // A shuffle can only come from building a vector from various
+ // elements of other vectors.
+ return SDValue();
+ }
+
+ // Record this extraction against the appropriate vector if possible...
+ SDValue SourceVec = V.getOperand(0);
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ bool FoundSource = false;
+ for (unsigned j = 0; j < SourceVecs.size(); ++j) {
+ if (SourceVecs[j] == SourceVec) {
+ if (MinElts[j] > EltNo)
+ MinElts[j] = EltNo;
+ if (MaxElts[j] < EltNo)
+ MaxElts[j] = EltNo;
+ FoundSource = true;
+ break;
+ }
+ }
+
+ // Or record a new source if not...
+ if (!FoundSource) {
+ SourceVecs.push_back(SourceVec);
+ MinElts.push_back(EltNo);
+ MaxElts.push_back(EltNo);
+ }
+ }
+
+ // Currently only do something sane when at most two source vectors
+ // involved.
+ if (SourceVecs.size() > 2)
+ return SDValue();
+
+ SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+ int VEXTOffsets[2] = {0, 0};
+
+ // This loop extracts the usage patterns of the source vectors
+ // and prepares appropriate SDValues for a shuffle if possible.
+ for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+ if (SourceVecs[i].getValueType() == VT) {
+ // No VEXT necessary
+ ShuffleSrcs[i] = SourceVecs[i];
+ VEXTOffsets[i] = 0;
+ continue;
+ } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+ // It probably isn't worth padding out a smaller vector just to
+ // break it down again in a shuffle.
+ return SDValue();
+ }
+
+ // Since only 64-bit and 128-bit vectors are legal on ARM and
+ // we've eliminated the other cases...
+ assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
+ "unexpected vector sizes in ReconstructShuffle");
+
+ if (MaxElts[i] - MinElts[i] >= NumElts) {
+ // Span too large for a VEXT to cope
+ return SDValue();
+ }
+
+ if (MinElts[i] >= NumElts) {
+ // The extraction can just take the second half
+ VEXTOffsets[i] = NumElts;
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(NumElts));
+ } else if (MaxElts[i] < NumElts) {
+ // The extraction can just take the first half
+ VEXTOffsets[i] = 0;
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(0));
+ } else {
+ // An actual VEXT is needed
+ VEXTOffsets[i] = MinElts[i];
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i],
+ DAG.getIntPtrConstant(NumElts));
+ ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
+ DAG.getConstant(VEXTOffsets[i], MVT::i32));
+ }
+ }
+
+ SmallVector<int, 8> Mask;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Entry = Op.getOperand(i);
+ if (Entry.getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ SDValue ExtractVec = Entry.getOperand(0);
+ int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
+ .getOperand(1))->getSExtValue();
+ if (ExtractVec == SourceVecs[0]) {
+ Mask.push_back(ExtractElt - VEXTOffsets[0]);
+ } else {
+ Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+ }
+ }
+
+ // Final check before we try to produce nonsense...
+ if (isShuffleMaskLegal(Mask, VT))
+ return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
+ &Mask[0]);
+
+ return SDValue();
+}
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
@@ -3706,8 +4095,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// registers are defined to use, and since i64 is not legal.
EVT EltVT = EVT::getFloatingPointVT(EltSize);
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
- V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
+ V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i) {
if (ShuffleMask[i] < 0)
@@ -3719,21 +4108,26 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
MVT::i32)));
}
SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
return SDValue();
}
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
- SDValue Vec = Op.getOperand(0);
+ // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
SDValue Lane = Op.getOperand(1);
- assert(VT == MVT::i32 &&
- Vec.getValueType().getVectorElementType().getSizeInBits() < 32 &&
- "unexpected type for custom-lowering vector extract");
- return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
+ if (!isa<ConstantSDNode>(Lane))
+ return SDValue();
+
+ SDValue Vec = Op.getOperand(0);
+ if (Op.getValueType() == MVT::i32 &&
+ Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
+ }
+
+ return Op;
}
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
@@ -3747,25 +4141,123 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDValue Op1 = Op.getOperand(1);
if (Op0.getOpcode() != ISD::UNDEF)
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0),
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
DAG.getIntPtrConstant(0));
if (Op1.getOpcode() != ISD::UNDEF)
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1),
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
DAG.getIntPtrConstant(1));
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
+}
+
+/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
+/// element has been zero/sign-extended, depending on the isSigned parameter,
+/// from an integer type half its size.
+static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
+ bool isSigned) {
+ // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
+ SDNode *BVN = N->getOperand(0).getNode();
+ if (BVN->getValueType(0) != MVT::v4i32 ||
+ BVN->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ unsigned HiElt = 1 - LoElt;
+ ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
+ ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
+ ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
+ ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
+ if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
+ return false;
+ if (isSigned) {
+ if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
+ Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
+ return true;
+ } else {
+ if (Hi0->isNullValue() && Hi1->isNullValue())
+ return true;
+ }
+ return false;
+ }
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ unsigned HalfSize = EltSize / 2;
+ if (isSigned) {
+ int64_t SExtVal = C->getSExtValue();
+ if ((SExtVal >> HalfSize) != (SExtVal >> EltSize))
+ return false;
+ } else {
+ if ((C->getZExtValue() >> HalfSize) != 0)
+ return false;
+ }
+ continue;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/// isSignExtended - Check if a node is a vector value that is sign-extended
+/// or a constant BUILD_VECTOR with sign-extended elements.
+static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, true))
+ return true;
+ return false;
+}
+
+/// isZeroExtended - Check if a node is a vector value that is zero-extended
+/// or a constant BUILD_VECTOR with zero-extended elements.
+static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
+ return true;
+ if (isExtendedBUILD_VECTOR(N, DAG, false))
+ return true;
+ return false;
}
-/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or
-/// an extending load, return the unextended value.
+/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
+/// load, or BUILD_VECTOR with extended elements, return the unextended value.
static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
return N->getOperand(0);
- LoadSDNode *LD = cast<LoadSDNode>(N);
- return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
- LD->getBasePtr(), LD->getSrcValue(),
- LD->getSrcValueOffset(), LD->isVolatile(),
- LD->isNonTemporal(), LD->getAlignment());
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
+ LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+ // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
+ // have been legalized as a BITCAST from v4i32.
+ if (N->getOpcode() == ISD::BITCAST) {
+ SDNode *BVN = N->getOperand(0).getNode();
+ assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
+ BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
+ unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
+ BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
+ }
+ // Construct a new BUILD_VECTOR with elements truncated to half the size.
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
+ EVT VT = N->getValueType(0);
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT TruncVT = MVT::getIntegerVT(EltSize);
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
+ const APInt &CInt = C->getAPIntValue();
+ Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
}
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
@@ -3776,19 +4268,16 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();
unsigned NewOpc = 0;
- if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) &&
- (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) {
+ if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG))
NewOpc = ARMISD::VMULLs;
- } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) &&
- (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) {
+ else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG))
NewOpc = ARMISD::VMULLu;
- } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) {
+ else if (VT == MVT::v2i64)
// Fall through to expand this. It is not legal.
return SDValue();
- } else {
+ else
// Other vector multiplications are legal.
return Op;
- }
// Legalize to a VMULL instruction.
DebugLoc DL = Op.getDebugLoc();
@@ -3801,6 +4290,181 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
}
+static SDValue
+LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
+ // Convert to float
+ // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
+ // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
+ X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
+ Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
+ X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
+ Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
+ // Get reciprocal estimate.
+ // float4 recip = vrecpeq_f32(yf);
+ Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
+ // Because char has a smaller range than uchar, we can actually get away
+ // without any newton steps. This requires that we use a weird bias
+ // of 0xb000, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
+ X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
+ Y = DAG.getConstant(0xb000, MVT::i32);
+ Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
+ X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
+ X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
+ // Convert back to short.
+ X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
+ X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
+ return X;
+}
+
+static SDValue
+LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
+ SDValue N2;
+ // Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_s16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_s16(x));
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and one refinement step.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ N1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Because short has a smaller range than ushort, we can actually get away
+ // with only a single newton step. This requires that we use a weird bias
+ // of 89, however (again, this has been exhaustively tested).
+ // float4 result = as_float4(as_int4(xf*recip) + 89);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(89, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_s32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::SDIV");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0));
+
+ N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
+ return N0;
+ }
+ return LowerSDIV_v4i16(N0, N1, dl, DAG);
+}
+
+static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+ "unexpected type for custom-lowering ISD::UDIV");
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2, N3;
+
+ if (VT == MVT::v8i8) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
+
+ N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(4));
+ N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(4));
+ N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+ DAG.getIntPtrConstant(0));
+ N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+ DAG.getIntPtrConstant(0));
+
+ N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
+ N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
+
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+ N0 = LowerCONCAT_VECTORS(N0, DAG);
+
+ N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
+ DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
+ N0);
+ return N0;
+ }
+
+ // v4i16 sdiv ... Convert to float.
+ // float4 yf = vcvt_f32_s32(vmovl_u16(y));
+ // float4 xf = vcvt_f32_s32(vmovl_u16(x));
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
+ N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+ N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+ // Use reciprocal estimate and two refinement steps.
+ // float4 recip = vrecpeq_f32(yf);
+ // recip *= vrecpsq_f32(yf, recip);
+ // recip *= vrecpsq_f32(yf, recip);
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ N1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+ N1, N2);
+ N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+ // Simply multiplying by the reciprocal estimate can leave us a few ulps
+ // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
+ // and that it will never cause us to return an answer too large).
+ // float4 result = as_float4(as_int4(xf*recip) + 89);
+ N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+ N1 = DAG.getConstant(2, MVT::i32);
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+ N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+ N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+ // Convert back to integer and return.
+ // return vmovn_u32(vcvt_s32_f32(result));
+ N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+ N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+ return N0;
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@@ -3816,6 +4480,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
+ case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:
@@ -3826,9 +4491,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
- case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG);
+ case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
case ISD::SHL:
case ISD::SRL:
case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
@@ -3843,6 +4509,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::UDIV: return LowerUDIV(Op, DAG);
}
return SDValue();
}
@@ -3857,12 +4525,12 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
default:
llvm_unreachable("Don't know how to custom expand this!");
break;
- case ISD::BIT_CONVERT:
- Res = ExpandBIT_CONVERT(N, DAG);
+ case ISD::BITCAST:
+ Res = ExpandBITCAST(N, DAG);
break;
case ISD::SRL:
case ISD::SRA:
- Res = LowerShift(N, DAG, Subtarget);
+ Res = Expand64BitShift(N, DAG, Subtarget);
break;
}
if (Res.getNode())
@@ -3892,7 +4560,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
case 1:
ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
break;
case 2:
ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
@@ -4183,6 +4851,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::BCCi64:
case ARM::BCCZi64: {
+ // If there is an unconditional branch to the other successor, remove it.
+ BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+
// Compare both parts that make up the double comparison separately for
// equality.
bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
@@ -4341,10 +5012,6 @@ static SDValue PerformMULCombine(SDNode *N,
if (Subtarget->isThumb1Only())
return SDValue();
- if (DAG.getMachineFunction().
- getFunction()->hasFnAttr(Attribute::OptimizeForSize))
- return SDValue();
-
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
@@ -4389,10 +5056,67 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Attempt to use immediate-form VBIC
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VbicVT;
+ SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VbicVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
+ SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
static SDValue PerformORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
+ // Attempt to use immediate-form VORR
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN && Subtarget->hasNEON() &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VorrVT;
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VorrVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
+ SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
+ }
+ }
+ }
+
// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
// reasonable.
@@ -4400,7 +5124,6 @@ static SDValue PerformORCombine(SDNode *N,
if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
return SDValue();
- SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
// 1) or (and A, mask), val => ARMbfi A, val, mask
@@ -4415,40 +5138,46 @@ static SDValue PerformORCombine(SDNode *N,
if (N0.getOpcode() != ISD::AND)
return SDValue();
- EVT VT = N->getValueType(0);
if (VT != MVT::i32)
return SDValue();
+ SDValue N00 = N0.getOperand(0);
// The value and the mask need to be constants so we can verify this is
// actually a bitfield set. If the mask is 0xffff, we can do better
// via a movt instruction, so don't use BFI in that case.
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!C)
+ SDValue MaskOp = N0.getOperand(1);
+ ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
+ if (!MaskC)
return SDValue();
- unsigned Mask = C->getZExtValue();
+ unsigned Mask = MaskC->getZExtValue();
if (Mask == 0xffff)
return SDValue();
SDValue Res;
// Case (1): or (and A, mask), val => ARMbfi A, val, mask
- if ((C = dyn_cast<ConstantSDNode>(N1))) {
- unsigned Val = C->getZExtValue();
- if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val)
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N1C) {
+ unsigned Val = N1C->getZExtValue();
+ if ((Val & ~Mask) != Val)
return SDValue();
- Val >>= CountTrailingZeros_32(~Mask);
- Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
- DAG.getConstant(Val, MVT::i32),
- DAG.getConstant(Mask, MVT::i32));
+ if (ARM::isBitFieldInvertedMask(Mask)) {
+ Val >>= CountTrailingZeros_32(~Mask);
- // Do not add new nodes to DAG combiner worklist.
- DCI.CombineTo(N, Res, false);
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
+ DAG.getConstant(Val, MVT::i32),
+ DAG.getConstant(Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+ }
} else if (N1.getOpcode() == ISD::AND) {
// case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
- C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
- if (!C)
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
return SDValue();
- unsigned Mask2 = C->getZExtValue();
+ unsigned Mask2 = N11C->getZExtValue();
if (ARM::isBitFieldInvertedMask(Mask) &&
ARM::isBitFieldInvertedMask(~Mask2) &&
@@ -4462,10 +5191,11 @@ static SDValue PerformORCombine(SDNode *N,
unsigned lsb = CountTrailingZeros_32(Mask2);
Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
DAG.getConstant(lsb, MVT::i32));
- Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res,
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
DAG.getConstant(Mask, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
+ return SDValue();
} else if (ARM::isBitFieldInvertedMask(~Mask) &&
ARM::isBitFieldInvertedMask(Mask2) &&
(CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
@@ -4476,40 +5206,472 @@ static SDValue PerformORCombine(SDNode *N,
return SDValue();
// 2b
unsigned lsb = CountTrailingZeros_32(Mask);
- Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+ Res = DAG.getNode(ISD::SRL, DL, VT, N00,
DAG.getConstant(lsb, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
DAG.getConstant(Mask2, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
+ return SDValue();
}
}
+ if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
+ N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
+ ARM::isBitFieldInvertedMask(~Mask)) {
+ // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
+ // where lsb(mask) == #shamt and masked bits of B are known zero.
+ SDValue ShAmt = N00.getOperand(1);
+ unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ unsigned LSB = CountTrailingZeros_32(Mask);
+ if (ShAmtC != LSB)
+ return SDValue();
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
+ DAG.getConstant(~Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ }
+
+ return SDValue();
+}
+
+/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff
+/// C1 & C2 == C1.
+static SDValue PerformBFICombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::AND) {
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return SDValue();
+ unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned Mask2 = N11C->getZExtValue();
+ if ((Mask & Mask2) == Mask2)
+ return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(0), N1.getOperand(0),
+ N->getOperand(2));
+ }
return SDValue();
}
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- // fmrrd(fmdrr x, y) -> x,y
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // vmovrrd(vmovdrr x, y) -> x,y
SDValue InDouble = N->getOperand(0);
if (InDouble.getOpcode() == ARMISD::VMOVDRR)
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
return SDValue();
}
+/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
+static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
+ // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() == ISD::BITCAST)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::BITCAST)
+ Op1 = Op1.getOperand(0);
+ if (Op0.getOpcode() == ARMISD::VMOVRRD &&
+ Op0.getNode() == Op1.getNode() &&
+ Op0.getResNo() == 0 && Op1.getResNo() == 1)
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ N->getValueType(0), Op0.getOperand(0));
+ return SDValue();
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Bitcast an i64 store extracted from a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ SDValue StVal = St->getValue();
+ if (!ISD::isNormalStore(St) || St->isVolatile() ||
+ StVal.getValueType() != MVT::i64 ||
+ StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = StVal.getDebugLoc();
+ SDValue IntVec = StVal.getOperand(0);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ IntVec.getValueType().getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+ SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ Vec, StVal.getOperand(1));
+ dl = N->getDebugLoc();
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(ExtElt.getNode());
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment(),
+ St->getTBAAInfo());
+}
+
+/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
+/// are normal, non-volatile loads. If so, it is profitable to bitcast an
+/// i64 vector to have f64 elements, since the value can then be loaded
+/// directly into a VFP register.
+static bool hasNormalLoadOperand(SDNode *N) {
+ unsigned NumElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDNode *Elt = N->getOperand(i).getNode();
+ if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
+ return true;
+ }
+ return false;
+}
+
+/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
+/// ISD::BUILD_VECTOR.
+static SDValue PerformBUILD_VECTORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI){
+ // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
+ // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
+ // into a pair of GPRs, which is fine when the value is used as a scalar,
+ // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
+ SelectionDAG &DAG = DCI.DAG;
+ if (N->getNumOperands() == 2) {
+ SDValue RV = PerformVMOVDRRCombine(N, DAG);
+ if (RV.getNode())
+ return RV;
+ }
+
+ // Load i64 elements as f64 values so that type legalization does not split
+ // them up into i32 values.
+ EVT VT = N->getValueType(0);
+ if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
+ return SDValue();
+ DebugLoc dl = N->getDebugLoc();
+ SmallVector<SDValue, 8> Ops;
+ unsigned NumElts = VT.getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
+ Ops.push_back(V);
+ // Make the DAGCombiner fold the bitcast.
+ DCI.AddToWorklist(V.getNode());
+ }
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
+ return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+/// PerformInsertEltCombine - Target-specific dag combine xforms for
+/// ISD::INSERT_VECTOR_ELT.
+static SDValue PerformInsertEltCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Bitcast an i64 load inserted into a vector to f64.
+ // Otherwise, the i64 value will be legalized to a pair of i32 values.
+ EVT VT = N->getValueType(0);
+ SDNode *Elt = N->getOperand(1).getNode();
+ if (VT.getVectorElementType() != MVT::i64 ||
+ !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+ VT.getVectorNumElements());
+ SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
+ SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
+ // Make the DAGCombiner fold the bitcasts.
+ DCI.AddToWorklist(Vec.getNode());
+ DCI.AddToWorklist(V.getNode());
+ SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
+ Vec, V, N->getOperand(2));
+ return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
+}
+
+/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
+/// ISD::VECTOR_SHUFFLE.
+static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
+ // The LLVM shufflevector instruction does not require the shuffle mask
+ // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
+ // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
+ // operands do not match the mask length, they are extended by concatenating
+ // them with undef vectors. That is probably the right thing for other
+ // targets, but for NEON it is better to concatenate two double-register
+ // size vector operands into a single quad-register size vector. Do that
+ // transformation here:
+ // shuffle(concat(v1, undef), concat(v2, undef)) ->
+ // shuffle(concat(v1, v2), undef)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
+ Op1.getOpcode() != ISD::CONCAT_VECTORS ||
+ Op0.getNumOperands() != 2 ||
+ Op1.getNumOperands() != 2)
+ return SDValue();
+ SDValue Concat0Op1 = Op0.getOperand(1);
+ SDValue Concat1Op1 = Op1.getOperand(1);
+ if (Concat0Op1.getOpcode() != ISD::UNDEF ||
+ Concat1Op1.getOpcode() != ISD::UNDEF)
+ return SDValue();
+ // Skip the transformation if any of the types are illegal.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = N->getValueType(0);
+ if (!TLI.isTypeLegal(VT) ||
+ !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
+ !TLI.isTypeLegal(Concat1Op1.getValueType()))
+ return SDValue();
+
+ SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ Op0.getOperand(0), Op1.getOperand(0));
+ // Translate the shuffle mask.
+ SmallVector<int, 16> NewMask;
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfElts = NumElts/2;
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned n = 0; n < NumElts; ++n) {
+ int MaskElt = SVN->getMaskElt(n);
+ int NewElt = -1;
+ if (MaskElt < (int)HalfElts)
+ NewElt = MaskElt;
+ else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
+ NewElt = HalfElts + MaskElt - NumElts;
+ NewMask.push_back(NewElt);
+ }
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
+ DAG.getUNDEF(VT), NewMask.data());
+}
+
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
+/// NEON load/store intrinsics to merge base address updates.
+static SDValue CombineBaseUpdate(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+ N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+ unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+ SDValue Addr = N->getOperand(AddrOpIdx);
+
+ // Search for a use of the address operand that is an increment.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::ADD ||
+ UI.getUse().getResNo() != Addr.getResNo())
+ continue;
+
+ // Check that the add is independent of the load/store. Otherwise, folding
+ // it would create a cycle.
+ if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ continue;
+
+ // Find the new opcode for the updating load/store.
+ bool isLoad = true;
+ bool isLaneOp = false;
+ unsigned NewOpc = 0;
+ unsigned NumVecs = 0;
+ if (isIntrinsic) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: assert(0 && "unexpected intrinsic for Neon base update");
+ case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1; break;
+ case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
+ NumVecs = 2; break;
+ case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
+ NumVecs = 3; break;
+ case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
+ NumVecs = 4; break;
+ case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
+ NumVecs = 2; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
+ NumVecs = 3; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
+ NumVecs = 4; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
+ NumVecs = 2; isLoad = false; break;
+ case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
+ NumVecs = 3; isLoad = false; break;
+ case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
+ NumVecs = 4; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
+ NumVecs = 2; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
+ NumVecs = 3; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
+ NumVecs = 4; isLoad = false; isLaneOp = true; break;
+ }
+ } else {
+ isLaneOp = true;
+ switch (N->getOpcode()) {
+ default: assert(0 && "unexpected opcode for Neon base update");
+ case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
+ case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
+ case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+ }
+ }
+
+ // Find the size of memory referenced by the load/store.
+ EVT VecTy;
+ if (isLoad)
+ VecTy = N->getValueType(0);
+ else
+ VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+ if (isLaneOp)
+ NumBytes /= VecTy.getVectorNumElements();
+
+ // If the increment is a constant, it must match the memory ref size.
+ SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+ if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
+ uint64_t IncVal = CInc->getZExtValue();
+ if (IncVal != NumBytes)
+ continue;
+ } else if (NumBytes >= 3 * 16) {
+ // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
+ // separate instructions that make it harder to use a non-constant update.
+ continue;
+ }
+
+ // Create the new updating load/store node.
+ EVT Tys[6];
+ unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+ unsigned n;
+ for (n = 0; n < NumResultVecs; ++n)
+ Tys[n] = VecTy;
+ Tys[n++] = MVT::i32;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(N->getOperand(0)); // incoming chain
+ Ops.push_back(N->getOperand(AddrOpIdx));
+ Ops.push_back(Inc);
+ for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+ MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
+ Ops.data(), Ops.size(),
+ MemInt->getMemoryVT(),
+ MemInt->getMemOperand());
+
+ // Update the uses.
+ std::vector<SDValue> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i) {
+ NewResults.push_back(SDValue(UpdN.getNode(), i));
+ }
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
+ DCI.CombineTo(N, NewResults);
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+ break;
+ }
+ return SDValue();
+}
+
+/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
+/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
+/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
+/// return true.
+static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ // vldN-dup instructions only support 64-bit vectors for N > 1.
+ if (!VT.is64BitVector())
+ return false;
+
+ // Check if the VDUPLANE operand is a vldN-dup intrinsic.
+ SDNode *VLD = N->getOperand(0).getNode();
+ if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return false;
+ unsigned NumVecs = 0;
+ unsigned NewOpc = 0;
+ unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::arm_neon_vld2lane) {
+ NumVecs = 2;
+ NewOpc = ARMISD::VLD2DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
+ NumVecs = 3;
+ NewOpc = ARMISD::VLD3DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
+ NumVecs = 4;
+ NewOpc = ARMISD::VLD4DUP;
+ } else {
+ return false;
+ }
+
+ // First check that all the vldN-lane uses are VDUPLANEs and that the lane
+ // numbers match the load.
+ unsigned VLDLaneNo =
+ cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ // Ignore uses of the chain result.
+ if (UI.getUse().getResNo() == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ if (User->getOpcode() != ARMISD::VDUPLANE ||
+ VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
+ return false;
+ }
+
+ // Create the vldN-dup node.
+ EVT Tys[5];
+ unsigned n;
+ for (n = 0; n < NumVecs; ++n)
+ Tys[n] = VT;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
+ SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
+ MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
+ SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
+ Ops, 2, VLDMemInt->getMemoryVT(),
+ VLDMemInt->getMemOperand());
+
+ // Update the uses.
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ unsigned ResNo = UI.getUse().getResNo();
+ // Ignore uses of the chain result.
+ if (ResNo == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
+ }
+
+ // Now the vldN-lane intrinsic is dead except for its chain result.
+ // Update uses of the chain.
+ std::vector<SDValue> VLDDupResults;
+ for (unsigned n = 0; n < NumVecs; ++n)
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
+ DCI.CombineTo(VLD, VLDDupResults);
+
+ return true;
+}
+
/// PerformVDUPLANECombine - Target-specific dag combine xforms for
/// ARMISD::VDUPLANE.
static SDValue PerformVDUPLANECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
- // redundant.
SDValue Op = N->getOperand(0);
- EVT VT = N->getValueType(0);
- // Ignore bit_converts.
- while (Op.getOpcode() == ISD::BIT_CONVERT)
+ // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
+ // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
+ if (CombineVLDDUP(N, DCI))
+ return SDValue(N, 0);
+
+ // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+ // redundant. Ignore bit_converts for now; element sizes are checked below.
+ while (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
return SDValue();
@@ -4521,11 +5683,11 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
unsigned EltBits;
if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
EltSize = 8;
+ EVT VT = N->getValueType(0);
if (EltSize > VT.getVectorElementType().getSizeInBits())
return SDValue();
- SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
- return DCI.CombineTo(N, Res, false);
+ return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
}
/// getVShiftImm - Check if this is a valid build_vector for the immediate
@@ -4533,7 +5695,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
/// build_vector must have the same constant integer value.
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
// Ignore bit_converts.
- while (Op.getOpcode() == ISD::BIT_CONVERT)
+ while (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
APInt SplatBits, SplatUndef;
@@ -4747,7 +5909,8 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
// Nothing to be done for scalar shifts.
- if (! VT.isVector())
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!VT.isVector() || !TLI.isTypeLegal(VT))
return SDValue();
assert(ST->hasNEON() && "unexpected vector shift");
@@ -4793,7 +5956,8 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
if (VT == MVT::i32 &&
(EltVT == MVT::i8 || EltVT == MVT::i16) &&
- TLI.isTypeLegal(Vec.getValueType())) {
+ TLI.isTypeLegal(Vec.getValueType()) &&
+ isa<ConstantSDNode>(Lane)) {
unsigned Opc = 0;
switch (N->getOpcode()) {
@@ -4906,7 +6070,14 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
+ case ISD::AND: return PerformANDCombine(N, DCI);
+ case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+ case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
+ case ISD::STORE: return PerformSTORECombine(N, DCI);
+ case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
+ case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
+ case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
@@ -4916,20 +6087,42 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::VLD2DUP:
+ case ARMISD::VLD3DUP:
+ case ARMISD::VLD4DUP:
+ return CombineBaseUpdate(N, DCI);
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane:
+ return CombineBaseUpdate(N, DCI);
+ default: break;
+ }
+ break;
}
return SDValue();
}
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
- if (!Subtarget->hasV6Ops())
- // Pre-v6 does not support unaligned mem access.
- return false;
+bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
+ EVT VT) const {
+ return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
+}
- // v6+ may or may not support unaligned mem access depending on the system
- // configuration.
- // FIXME: This is pretty conservative. Should we provide cmdline option to
- // control the behaviour?
- if (!Subtarget->isTargetDarwin())
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+ if (!Subtarget->allowsUnalignedMem())
return false;
switch (VT.getSimpleVT().SimpleTy) {
@@ -5143,7 +6336,7 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
if (!Subtarget->isThumb())
return ARM_AM::getSOImmVal(Imm) != -1;
if (Subtarget->isThumb2())
- return ARM_AM::getT2SOImmVal(Imm) != -1;
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
return Imm >= 0 && Imm <= 255;
}
@@ -5348,6 +6541,37 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
// ARM Inline Assembly Support
//===----------------------------------------------------------------------===//
+bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
+ // Looking for "rev" which is V6+.
+ if (!Subtarget->hasV6Ops())
+ return false;
+
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ std::string AsmStr = IA->getAsmString();
+ SmallVector<StringRef, 4> AsmPieces;
+ SplitString(AsmStr, AsmPieces, ";\n");
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ AsmStr = AsmPieces[0];
+ AsmPieces.clear();
+ SplitString(AsmStr, AsmPieces, " \t,");
+
+ // rev $0, $1
+ if (AsmPieces.size() == 3 &&
+ AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
+ IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (Ty && Ty->getBitWidth() == 32)
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ break;
+ }
+
+ return false;
+}
+
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
ARMTargetLowering::ConstraintType
@@ -5362,6 +6586,40 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
return TargetLowering::getConstraintType(Constraint);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ARMTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'l':
+ if (type->isIntegerTy()) {
+ if (Subtarget->isThumb())
+ weight = CW_SpecificReg;
+ else
+ weight = CW_Register;
+ }
+ break;
+ case 'w':
+ if (type->isFloatingPointTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
std::pair<unsigned, const TargetRegisterClass*>
ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const {
@@ -5664,3 +6922,63 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return ARM::getVFPf64Imm(Imm) != -1;
return false;
}
+
+/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
+/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
+/// specified in the intrinsic calls.
+bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ // Conservatively set memVT to the entire set of vectors loaded.
+ uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile loads with NEON intrinsics not supported
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ // Conservatively set memVT to the entire set of vectors stored.
+ unsigned NumElts = 0;
+ for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ const Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ if (!ArgTy->isVectorTy())
+ break;
+ NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
+ }
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile stores with NEON intrinsics not supported
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index ba9ea7f..dc400c4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -34,6 +34,10 @@ namespace llvm {
Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
// TargetExternalSymbol, and TargetGlobalAddress.
+ WrapperDYN, // WrapperDYN - A wrapper node for TargetGlobalAddress in
+ // DYN mode.
+ WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in
+ // PIC mode.
WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
CALL, // Function call.
@@ -47,8 +51,6 @@ namespace llvm {
PIC_ADD, // Add with a PC operand and a PIC label.
- AND, // ARM "and" instruction that sets the 's' flag in CPSR.
-
CMP, // ARM compare instructions.
CMPZ, // ARM compare that sets only Z flag.
CMPFP, // ARM VFP compare instruction, sets FPSCR.
@@ -73,8 +75,9 @@ namespace llvm {
VMOVRRD, // double to two gprs.
VMOVDRR, // Two gprs to double.
- EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
- EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_DISPATCHSETUP, // SjLj exception handling dispatch setup.
TC_RETURN, // Tail call return pseudo.
@@ -82,13 +85,20 @@ namespace llvm {
DYN_ALLOC, // Dynamic allocation on the stack.
- MEMBARRIER, // Memory barrier
- SYNCBARRIER, // Memory sync barrier
+ MEMBARRIER, // Memory barrier (DMB)
+ MEMBARRIER_MCR, // Memory barrier (MCR)
+
+ PRELOAD, // Preload
VCEQ, // Vector compare equal.
+ VCEQZ, // Vector compare equal to zero.
VCGE, // Vector compare greater than or equal.
+ VCGEZ, // Vector compare greater than or equal to zero.
+ VCLEZ, // Vector compare less than or equal to zero.
VCGEU, // Vector compare unsigned greater than or equal.
VCGT, // Vector compare greater than.
+ VCGTZ, // Vector compare greater than zero.
+ VCLTZ, // Vector compare less than zero.
VCGTU, // Vector compare unsigned greater than.
VTST, // Vector test bits.
@@ -161,7 +171,38 @@ namespace llvm {
FMIN,
// Bit-field insert
- BFI
+ BFI,
+
+ // Vector OR with immediate
+ VORRIMM,
+ // Vector AND with NOT of immediate
+ VBICIMM,
+
+ // Vector load N-element structure to all lanes:
+ VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ VLD3DUP,
+ VLD4DUP,
+
+ // NEON loads with post-increment base updates:
+ VLD1_UPD,
+ VLD2_UPD,
+ VLD3_UPD,
+ VLD4_UPD,
+ VLD2LN_UPD,
+ VLD3LN_UPD,
+ VLD4LN_UPD,
+ VLD2DUP_UPD,
+ VLD3DUP_UPD,
+ VLD4DUP_UPD,
+
+ // NEON stores with post-increment base updates:
+ VST1_UPD,
+ VST2_UPD,
+ VST3_UPD,
+ VST4_UPD,
+ VST2LN_UPD,
+ VST3LN_UPD,
+ VST4LN_UPD
};
}
@@ -193,14 +234,16 @@ namespace llvm {
virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
virtual const char *getTargetNodeName(unsigned Opcode) const;
virtual MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
+
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type.
/// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
@@ -241,7 +284,15 @@ namespace llvm {
unsigned Depth) const;
+ virtual bool ExpandInlineAsm(CallInst *CI) const;
+
ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
@@ -290,6 +341,9 @@ namespace llvm {
/// materialize the FP immediate as a load from a constant pool.
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const;
protected:
std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(EVT VT) const;
@@ -301,6 +355,8 @@ namespace llvm {
const TargetRegisterInfo *RegInfo;
+ const InstrItineraryData *Itins;
+
/// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
///
unsigned ARMPCLabelIndex;
@@ -329,6 +385,7 @@ namespace llvm {
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -350,6 +407,10 @@ namespace llvm {
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const;
+
+ SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
@@ -393,6 +454,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS,
@@ -410,6 +473,13 @@ namespace llvm {
};
+ enum NEONModImmType {
+ VMOVModImm,
+ VMVNModImm,
+ OtherModImm
+ };
+
+
namespace ARM {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
index 113cfff..765cba4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -1,4 +1,4 @@
-//===- ARMInstrFormats.td - ARM Instruction Formats --*- tablegen -*---------=//
+//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -71,7 +71,7 @@ def NVTBLFrm : Format<41>;
// Misc flags.
-// the instruction has a Rn register operand.
+// The instruction has an Rn register operand.
// UnaryDP - Indicates this is a unary data processing instruction, i.e.
// it doesn't have a Rn operand.
class UnaryDP { bit isUnaryDataProc = 1; }
@@ -84,9 +84,10 @@ class Xform16Bit { bit canXformTo16Bit = 1; }
// ARM Instruction flags. These need to match ARMBaseInstrInfo.h.
//
+// FIXME: Once the JIT is MC-ized, these can go away.
// Addressing mode.
-class AddrMode<bits<4> val> {
- bits<4> Value = val;
+class AddrMode<bits<5> val> {
+ bits<5> Value = val;
}
def AddrModeNone : AddrMode<0>;
def AddrMode1 : AddrMode<1>;
@@ -104,6 +105,7 @@ def AddrModeT2_i8 : AddrMode<12>;
def AddrModeT2_so : AddrMode<13>;
def AddrModeT2_pc : AddrMode<14>;
def AddrModeT2_i8s4 : AddrMode<15>;
+def AddrMode_i12 : AddrMode<16>;
// Instruction size.
class SizeFlagVal<bits<3> val> {
@@ -134,7 +136,6 @@ def NeonDomain : Domain<2>; // Instructions in Neon domain only
def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
//===----------------------------------------------------------------------===//
-
// ARM special operands.
//
@@ -143,6 +144,39 @@ def CondCodeOperand : AsmOperandClass {
let SuperClasses = [];
}
+def CCOutOperand : AsmOperandClass {
+ let Name = "CCOut";
+ let SuperClasses = [];
+}
+
+def MemBarrierOptOperand : AsmOperandClass {
+ let Name = "MemBarrierOpt";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseMemBarrierOptOperand";
+}
+
+def ProcIFlagsOperand : AsmOperandClass {
+ let Name = "ProcIFlags";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseProcIFlagsOperand";
+}
+
+def MSRMaskOperand : AsmOperandClass {
+ let Name = "MSRMask";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseMSRMaskOperand";
+}
+
+// ARM imod and iflag operands, used only by the CPS instruction.
+def imod_op : Operand<i32> {
+ let PrintMethod = "printCPSIMod";
+}
+
+def iflags_op : Operand<i32> {
+ let PrintMethod = "printCPSIFlag";
+ let ParserMatchClass = ProcIFlagsOperand;
+}
+
// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
// register whose default is 0 (no register).
def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
@@ -153,16 +187,23 @@ def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
// Conditional code result for instructions whose 's' bit is set, e.g. subs.
def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+ let EncoderMethod = "getCCOutOpValue";
let PrintMethod = "printSBitModifierOperand";
+ let ParserMatchClass = CCOutOperand;
}
// Same as cc_out except it defaults to setting CPSR.
def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
+ let EncoderMethod = "getCCOutOpValue";
let PrintMethod = "printSBitModifierOperand";
+ let ParserMatchClass = CCOutOperand;
}
// ARM special operands for disassembly only.
//
+def setend_op : Operand<i32> {
+ let PrintMethod = "printSetendOperand";
+}
def cps_opt : Operand<i32> {
let PrintMethod = "printCPSOptionOperand";
@@ -170,6 +211,7 @@ def cps_opt : Operand<i32> {
def msr_mask : Operand<i32> {
let PrintMethod = "printMSRMaskOperand";
+ let ParserMatchClass = MSRMaskOperand;
}
// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0.
@@ -179,7 +221,6 @@ def neg_zero : Operand<i32> {
}
//===----------------------------------------------------------------------===//
-
// ARM Instruction templates.
//
@@ -198,14 +239,17 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
bit isUnaryDataProc = 0;
bit canXformTo16Bit = 0;
+ // If this is a pseudo instruction, mark it isCodeGenOnly.
+ let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
// The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h.
- let TSFlags{3-0} = AM.Value;
- let TSFlags{6-4} = SZ.Value;
- let TSFlags{8-7} = IndexModeBits;
- let TSFlags{14-9} = Form;
- let TSFlags{15} = isUnaryDataProc;
- let TSFlags{16} = canXformTo16Bit;
- let TSFlags{18-17} = D.Value;
+ let TSFlags{4-0} = AM.Value;
+ let TSFlags{7-5} = SZ.Value;
+ let TSFlags{9-8} = IndexModeBits;
+ let TSFlags{15-10} = Form;
+ let TSFlags{16} = isUnaryDataProc;
+ let TSFlags{17} = canXformTo16Bit;
+ let TSFlags{19-18} = D.Value;
let Constraints = cstr;
let Itinerary = itin;
@@ -225,25 +269,51 @@ class InstThumb<AddrMode am, SizeFlagVal sz, IndexMode im,
Format f, Domain d, string cstr, InstrItinClass itin>
: InstTemplate<am, sz, im, f, d, cstr, itin>;
-class PseudoInst<dag oops, dag iops, InstrItinClass itin,
- string asm, list<dag> pattern>
+class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
+ // FIXME: This really should derive from InstTemplate instead, as pseudos
+ // don't need encoding information. TableGen doesn't like that
+ // currently. Need to figure out why and fix it.
: InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain,
"", itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
let Pattern = pattern;
}
+// PseudoInst that's ARM-mode only.
+class ARMPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let SZ = sz;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// PseudoInst that's Thumb-mode only.
+class tPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let SZ = sz;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+// PseudoInst that's Thumb2-mode only.
+class t2PseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+ list<dag> pattern>
+ : PseudoInst<oops, iops, itin, pattern> {
+ let SZ = sz;
+ list<Predicate> Predicates = [IsThumb2];
+}
// Almost all ARM instructions are predicable.
class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
string opc, string asm, string cstr,
list<dag> pattern>
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
@@ -270,9 +340,14 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
string opc, string asm, string cstr,
list<dag> pattern>
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p; // Predicate operand
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{31-28} = p;
+ let Inst{20} = s;
+
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
- let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
@@ -319,10 +394,6 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
asm, "", pattern> {
let Inst{27-24} = opcod;
}
-class ABXIx2<dag oops, dag iops, InstrItinClass itin,
- string asm, list<dag> pattern>
- : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, Pseudo, itin,
- asm, "", pattern>;
// BR_JT instructions
class JTI<dag oops, dag iops, InstrItinClass itin,
@@ -335,19 +406,42 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
+ bits<4> Rt;
+ bits<4> Rn;
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
let Inst{11-0} = 0b111110011111;
}
class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rt;
+ bits<4> Rn;
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
let Inst{20} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
let Inst{11-4} = 0b11111001;
+ let Inst{3-0} = Rt;
+}
+class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
+ : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, [$Rn]", pattern> {
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> Rn;
+ let Inst{27-23} = 0b00010;
+ let Inst{22} = b;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
+ let Inst{11-4} = 0b00001001;
+ let Inst{3-0} = Rt2;
}
// addrmode1 instructions
@@ -372,387 +466,125 @@ class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{24-21} = opcod;
let Inst{27-26} = 0b00;
}
-class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : I<oops, iops, AddrMode1, Size8Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern>;
-
-
-// addrmode2 loads and stores
-class AI2<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern> {
- let Inst{27-26} = 0b01;
-}
// loads
-class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern> {
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 0; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-26} = 0b01;
-}
-class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
- string asm, list<dag> pattern>
- : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
- asm, "", pattern> {
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 0; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-26} = 0b01;
-}
-class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern> {
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 1; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-26} = 0b01;
-}
-class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
- string asm, list<dag> pattern>
- : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
- asm, "", pattern> {
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 1; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-26} = 0b01;
-}
-
-// stores
-class AI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern> {
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 0; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-26} = 0b01;
-}
-class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
- string asm, list<dag> pattern>
- : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
- asm, "", pattern> {
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 0; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-26} = 0b01;
-}
-class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern> {
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 1; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-26} = 0b01;
-}
-class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
- string asm, list<dag> pattern>
- : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
- asm, "", pattern> {
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 1; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-26} = 0b01;
-}
-// Pre-indexed loads
-class AI2ldwpr<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{20} = 1; // L bit
- let Inst{21} = 1; // W bit
- let Inst{22} = 0; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-26} = 0b01;
-}
-class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{20} = 1; // L bit
- let Inst{21} = 1; // W bit
- let Inst{22} = 1; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-26} = 0b01;
-}
-
-// Pre-indexed stores
-class AI2stwpr<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{20} = 0; // L bit
- let Inst{21} = 1; // W bit
- let Inst{22} = 0; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-26} = 0b01;
-}
-class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin,
+// LDR/LDRB/STR/STRB/...
+class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
+ Format f, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : I<oops, iops, am, Size4Bytes, IndexModeNone, f, itin, opc, asm,
+ "", pattern> {
+ let Inst{27-25} = op;
+ let Inst{24} = 1; // 24 == P
+ // 23 == U
+ let Inst{22} = isByte;
+ let Inst{21} = 0; // 21 == W
+ let Inst{20} = isLd;
+}
+// Indexed load/stores
+class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, im, f, itin,
opc, asm, cstr, pattern> {
- let Inst{20} = 0; // L bit
- let Inst{21} = 1; // W bit
- let Inst{22} = 1; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-26} = 0b01;
-}
-
-// Post-indexed loads
-class AI2ldwpo<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin,
- opc, asm, cstr,pattern> {
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 0; // B bit
- let Inst{24} = 0; // P bit
- let Inst{27-26} = 0b01;
-}
-class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin,
- opc, asm, cstr,pattern> {
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 1; // B bit
- let Inst{24} = 0; // P bit
- let Inst{27-26} = 0b01;
-}
-
-// Post-indexed stores
-class AI2stwpo<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin,
- opc, asm, cstr,pattern> {
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 0; // B bit
- let Inst{24} = 0; // P bit
- let Inst{27-26} = 0b01;
-}
-class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin,
- opc, asm, cstr,pattern> {
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 1; // B bit
- let Inst{24} = 0; // P bit
+ bits<4> Rt;
let Inst{27-26} = 0b01;
+ let Inst{24} = isPre; // P bit
+ let Inst{22} = isByte; // B bit
+ let Inst{21} = isPre; // W bit
+ let Inst{20} = isLd; // L bit
+ let Inst{15-12} = Rt;
+}
+class AI2stridx<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = offset{13};
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = offset{11-0};
}
// addrmode3 instructions
-class AI3<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern>;
-class AXI3<dag oops, dag iops, Format f, InstrItinClass itin,
- string asm, list<dag> pattern>
- : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
- asm, "", pattern>;
-
-// loads
-class AI3ldh<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 0; // S bit
- let Inst{7} = 1;
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 1; // P bit
- let Inst{27-25} = 0b000;
-}
-class AXI3ldh<dag oops, dag iops, Format f, InstrItinClass itin,
- string asm, list<dag> pattern>
- : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
- asm, "", pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 0; // S bit
- let Inst{7} = 1;
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 1; // P bit
-}
-class AI3ldsh<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 1; // P bit
+ bits<14> addr;
+ bits<4> Rt;
let Inst{27-25} = 0b000;
-}
-class AXI3ldsh<dag oops, dag iops, Format f, InstrItinClass itin,
- string asm, list<dag> pattern>
- : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
- asm, "", pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 1; // P bit
-}
-class AI3ldsb<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern> {
- let Inst{4} = 1;
- let Inst{5} = 0; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 1; // P bit
+ let Inst{24} = 1; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{21} = 0; // W bit
+ let Inst{20} = op20; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+}
+
+class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin,
+ opc, asm, cstr, pattern> {
+ bits<4> Rt;
let Inst{27-25} = 0b000;
-}
-class AXI3ldsb<dag oops, dag iops, Format f, InstrItinClass itin,
- string asm, list<dag> pattern>
- : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
- asm, "", pattern> {
- let Inst{4} = 1;
- let Inst{5} = 0; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 1; // P bit
-}
-class AI3ldd<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern> {
- let Inst{4} = 1;
- let Inst{5} = 0; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 1; // P bit
+ let Inst{24} = isPre; // P bit
+ let Inst{21} = isPre; // W bit
+ let Inst{20} = op20; // L bit
+ let Inst{15-12} = Rt; // Rt
+ let Inst{7-4} = op;
+}
+class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM3 store w/ two operands: (GPR, am3offset)
+ bits<14> offset;
+ bits<4> Rt;
+ bits<4> Rn;
let Inst{27-25} = 0b000;
+ let Inst{23} = offset{8};
+ let Inst{22} = offset{9};
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
}
// stores
-class AI3sth<dag oops, dag iops, Format f, InstrItinClass itin,
+class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 0; // S bit
- let Inst{7} = 1;
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 1; // P bit
- let Inst{27-25} = 0b000;
-}
-class AXI3sth<dag oops, dag iops, Format f, InstrItinClass itin,
- string asm, list<dag> pattern>
- : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
- asm, "", pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 0; // S bit
- let Inst{7} = 1;
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 1; // P bit
-}
-class AI3std<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 1; // P bit
+ bits<14> addr;
+ bits<4> Rt;
let Inst{27-25} = 0b000;
+ let Inst{24} = 1; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{21} = 0; // W bit
+ let Inst{20} = 0; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
}
-// Pre-indexed loads
-class AI3ldhpr<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 0; // S bit
- let Inst{7} = 1;
- let Inst{20} = 1; // L bit
- let Inst{21} = 1; // W bit
- let Inst{24} = 1; // P bit
- let Inst{27-25} = 0b000;
-}
-class AI3ldshpr<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 1; // L bit
- let Inst{21} = 1; // W bit
- let Inst{24} = 1; // P bit
- let Inst{27-25} = 0b000;
-}
-class AI3ldsbpr<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{4} = 1;
- let Inst{5} = 0; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 1; // L bit
- let Inst{21} = 1; // W bit
- let Inst{24} = 1; // P bit
- let Inst{27-25} = 0b000;
-}
-class AI3lddpr<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{4} = 1;
- let Inst{5} = 0; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 0; // L bit
- let Inst{21} = 1; // W bit
- let Inst{24} = 1; // P bit
- let Inst{27-25} = 0b000;
-}
-
-
// Pre-indexed stores
class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -781,60 +613,6 @@ class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{27-25} = 0b000;
}
-// Post-indexed loads
-class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
- opc, asm, cstr,pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 0; // S bit
- let Inst{7} = 1;
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 0; // P bit
- let Inst{27-25} = 0b000;
-}
-class AI3ldshpo<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
- opc, asm, cstr,pattern> {
- let Inst{4} = 1;
- let Inst{5} = 1; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 0; // P bit
- let Inst{27-25} = 0b000;
-}
-class AI3ldsbpo<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
- opc, asm, cstr,pattern> {
- let Inst{4} = 1;
- let Inst{5} = 0; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 0; // P bit
- let Inst{27-25} = 0b000;
-}
-class AI3lddpo<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
- opc, asm, cstr, pattern> {
- let Inst{4} = 1;
- let Inst{5} = 0; // H bit
- let Inst{6} = 1; // S bit
- let Inst{7} = 1;
- let Inst{20} = 0; // L bit
- let Inst{21} = 0; // W bit
- let Inst{24} = 0; // P bit
- let Inst{27-25} = 0b000;
-}
-
// Post-indexed stores
class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -864,21 +642,17 @@ class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
}
// addrmode4 instructions
-class AXI4ld<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
- : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin,
- asm, cstr, pattern> {
- let Inst{20} = 1; // L bit
- let Inst{22} = 0; // S bit
+class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin, asm, cstr, pattern> {
+ bits<4> p;
+ bits<16> regs;
+ bits<4> Rn;
+ let Inst{31-28} = p;
let Inst{27-25} = 0b100;
-}
-class AXI4st<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
- : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin,
- asm, cstr, pattern> {
- let Inst{20} = 0; // L bit
let Inst{22} = 0; // S bit
- let Inst{27-25} = 0b100;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
}
// Unsigned multiply, multiply-accumulate instructions.
@@ -899,24 +673,65 @@ class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
}
// Most significant word multiply
-class AMul2I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
opc, asm, "", pattern> {
- let Inst{7-4} = 0b1001;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{7-4} = opc7_4;
let Inst{20} = 1;
let Inst{27-21} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+// MSW multiple w/ Ra operand
+class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMul2I<opcod, opc7_4, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
}
// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
-class AMulxyI<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
opc, asm, "", pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{4} = 0;
let Inst{7} = 1;
let Inst{20} = 0;
let Inst{27-21} = opcod;
+ let Inst{6-5} = bit6_5;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AMulxyI<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{19-16} = Rd;
+}
+
+// AMulxyI with Ra operand
+class AMulxyIa<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyI<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+// SMLAL*
+class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
}
// Extend instructions.
@@ -924,16 +739,47 @@ class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, itin,
opc, asm, "", pattern> {
+ // All AExtI instructions have Rd and Rm register operands.
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
let Inst{7-4} = 0b0111;
+ let Inst{9-8} = 0b00;
let Inst{27-20} = opcod;
}
// Misc Arithmetic instructions.
-class AMiscA1I<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
let Inst{27-20} = opcod;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+// PKH instructions
+class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<8> sh;
+ let Inst{27-20} = opcod;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{7-3};
+ let Inst{6} = tb;
+ let Inst{5-4} = 0b01;
+ let Inst{3-0} = Rm;
}
//===----------------------------------------------------------------------===//
@@ -950,12 +796,9 @@ class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
}
//===----------------------------------------------------------------------===//
-//
// Thumb Instruction Format Definitions.
//
-// TI - Thumb instruction.
-
class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
InstrItinClass itin, string asm, string cstr, list<dag> pattern>
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -966,6 +809,7 @@ class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<Predicate> Predicates = [IsThumb];
}
+// TI - Thumb instruction.
class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
: ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
@@ -986,6 +830,13 @@ class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
let Inst{12} = opcod3;
}
+// Move to/from coprocessor instructions
+class T1Cop<dag oops, dag iops, string asm, list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, Size4Bytes, NoItinerary, asm, "", pattern>,
+ Encoding, Requires<[IsThumb, HasV6]> {
+ let Inst{31-28} = 0b1110;
+}
+
// BR_JT instructions
class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
list<dag> pattern>
@@ -999,7 +850,7 @@ class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
let InOperandList = iops;
let AsmString = asm;
let Pattern = pattern;
- list<Predicate> Predicates = [IsThumb1Only];
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
}
class T1I<dag oops, dag iops, InstrItinClass itin,
@@ -1008,9 +859,6 @@ class T1I<dag oops, dag iops, InstrItinClass itin,
class T1Ix2<dag oops, dag iops, InstrItinClass itin,
string asm, list<dag> pattern>
: Thumb1I<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
-class T1JTI<dag oops, dag iops, InstrItinClass itin,
- string asm, list<dag> pattern>
- : Thumb1I<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
// Two-address instructions
class T1It<dag oops, dag iops, InstrItinClass itin,
@@ -1025,9 +873,9 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = !con(oops, (outs s_cc_out:$s));
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
let Pattern = pattern;
- list<Predicate> Predicates = [IsThumb1Only];
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
}
class T1sI<dag oops, dag iops, InstrItinClass itin,
@@ -1038,7 +886,7 @@ class T1sI<dag oops, dag iops, InstrItinClass itin,
class T1sIt<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
- "$lhs = $dst", pattern>;
+ "$Rn = $Rdn", pattern>;
// Thumb1 instruction that can be predicated.
class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
@@ -1047,9 +895,9 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
- list<Predicate> Predicates = [IsThumb1Only];
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
}
class T1pI<dag oops, dag iops, InstrItinClass itin,
@@ -1060,17 +908,8 @@ class T1pI<dag oops, dag iops, InstrItinClass itin,
class T1pIt<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
- "$lhs = $dst", pattern>;
+ "$Rn = $Rdn", pattern>;
-class T1pI1<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : Thumb1pI<oops, iops, AddrModeT1_1, Size2Bytes, itin, opc, asm, "", pattern>;
-class T1pI2<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : Thumb1pI<oops, iops, AddrModeT1_2, Size2Bytes, itin, opc, asm, "", pattern>;
-class T1pI4<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : Thumb1pI<oops, iops, AddrModeT1_4, Size2Bytes, itin, opc, asm, "", pattern>;
class T1pIs<dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
: Thumb1pI<oops, iops, AddrModeT1_s, Size2Bytes, itin, opc, asm, "", pattern>;
@@ -1099,7 +938,7 @@ class T1DataProcessing<bits<4> opcode> : Encoding16 {
// A6.2.3 Special data instructions and branch and exchange encoding.
class T1Special<bits<4> opcode> : Encoding16 {
let Inst{15-10} = 0b010001;
- let Inst{9-6} = opcode;
+ let Inst{9-6} = opcode;
}
// A6.2.4 Load/store single data item encoding.
@@ -1107,12 +946,37 @@ class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
let Inst{15-12} = opA;
let Inst{11-9} = opB;
}
-class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>;
-class T1LdSt4Imm<bits<3> opB> : T1LoadStore<0b0110, opB>; // Immediate, 4 bytes
-class T1LdSt1Imm<bits<3> opB> : T1LoadStore<0b0111, opB>; // Immediate, 1 byte
-class T1LdSt2Imm<bits<3> opB> : T1LoadStore<0b1000, opB>; // Immediate, 2 bytes
class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
+// Helper classes to encode Thumb1 loads and stores. For immediates, the
+// following bits are used for "opA" (see A6.2.4):
+//
+// 0b0110 => Immediate, 4 bytes
+// 0b1000 => Immediate, 2 bytes
+// 0b0111 => Immediate, 1 byte
+class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>,
+ T1LoadStore<0b0101, opcode> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{8-6} = addr{5-3}; // Rm
+ let Inst{5-3} = addr{2-0}; // Rn
+ let Inst{2-0} = Rt;
+}
+class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>,
+ T1LoadStore<opA, {opB,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-6} = addr{7-3}; // imm5
+ let Inst{5-3} = addr{2-0}; // Rn
+ let Inst{2-0} = Rt;
+}
+
// A6.2.5 Miscellaneous 16-bit instructions encoding.
class T1Misc<bits<7> opcode> : Encoding16 {
let Inst{15-12} = 0b1011;
@@ -1126,7 +990,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
}
@@ -1134,16 +998,19 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
// input operand since by default it's a zero register. It will become an
// implicit def once it's "flipped".
-//
+//
// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
// more consistent.
class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{20} = s;
+
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
- let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
}
@@ -1168,7 +1035,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
let InOperandList = iops;
let AsmString = asm;
let Pattern = pattern;
- list<Predicate> Predicates = [IsThumb1Only];
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
}
class T2I<dag oops, dag iops, InstrItinClass itin,
@@ -1186,17 +1053,23 @@ class T2Iso<dag oops, dag iops, InstrItinClass itin,
class T2Ipc<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, itin, opc, asm, "", pattern>;
-class T2Ii8s4<bit P, bit W, bit load, dag oops, dag iops, InstrItinClass itin,
+class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, itin, opc, asm, "",
pattern> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b00;
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<13> addr;
+ let Inst{31-25} = 0b1110100;
let Inst{24} = P;
- let Inst{23} = ?; // The U bit.
+ let Inst{23} = addr{8};
let Inst{22} = 1;
let Inst{21} = W;
- let Inst{20} = load;
+ let Inst{20} = isLoad;
+ let Inst{19-16} = addr{12-9};
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-8} = Rt2{3-0};
+ let Inst{7-0} = addr{7-0};
}
class T2sI<dag oops, dag iops, InstrItinClass itin,
@@ -1210,9 +1083,11 @@ class T2JTI<dag oops, dag iops, InstrItinClass itin,
string asm, list<dag> pattern>
: Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
-class T2Ix2<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : Thumb2I<oops, iops, AddrModeNone, Size8Bytes, itin, opc, asm, "", pattern>;
+// Move to/from coprocessor instructions
+class T2Cop<dag oops, dag iops, string asm, list<dag> pattern>
+ : T2XI<oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2, HasV6]> {
+ let Inst{31-28} = 0b1111;
+}
// Two-address instructions
class T2XIt<dag oops, dag iops, InstrItinClass itin,
@@ -1227,7 +1102,7 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
: InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
let Inst{31-27} = 0b11111;
@@ -1240,29 +1115,25 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
// (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
let Inst{10} = pre; // The P bit.
let Inst{8} = 1; // The W bit.
-}
-// Helper class for disassembly only
-// A6.3.16 & A6.3.17
-// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
-class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops,
- InstrItinClass itin, string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
- let Inst{31-27} = 0b11111;
- let Inst{26-24} = 0b011;
- let Inst{23} = long;
- let Inst{22-20} = op22_20;
- let Inst{7-4} = op7_4;
+ bits<9> addr;
+ let Inst{7-0} = addr{7-0};
+ let Inst{9} = addr{8}; // Sign bit
+
+ bits<4> Rt;
+ bits<4> Rn;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{19-16} = Rn{3-0};
}
// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
- list<Predicate> Predicates = [IsThumb1Only, HasV5T];
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only, HasV5T];
}
// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode.
class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
- list<Predicate> Predicates = [IsThumb1Only];
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
}
// T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
@@ -1281,10 +1152,13 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
list<Predicate> Predicates = [HasVFP2];
}
@@ -1293,17 +1167,22 @@ class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
let OutOperandList = oops;
let InOperandList = iops;
let AsmString = asm;
let Pattern = pattern;
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
list<Predicate> Predicates = [HasVFP2];
}
class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: VFPI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern>;
+ opc, asm, "", pattern> {
+ let PostEncoderMethod = "VFPThumb2PostEncoder";
+}
// ARM VFP addrmode5 loads and stores
class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
@@ -1311,12 +1190,24 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
string opc, string asm, list<dag> pattern>
: VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
VFPLdStFrm, itin, opc, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Dd{4};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Dd{3-0};
+ let Inst{7-0} = addr{7-0}; // imm8
+
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-24} = opcod1;
let Inst{21-20} = opcod2;
- let Inst{11-8} = 0b1011;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
- // 64-bit loads & stores operate on both NEON and VFP pipelines.
+ // Loads & stores operate on both NEON and VFP pipelines.
let D = VFPNeonDomain;
}
@@ -1325,10 +1216,36 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
string opc, string asm, list<dag> pattern>
: VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
VFPLdStFrm, itin, opc, asm, "", pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<13> addr;
+
+ // Encode instruction operands.
+ let Inst{23} = addr{8}; // U (add = (U == '1'))
+ let Inst{22} = Sd{0};
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Sd{4-1};
+ let Inst{7-0} = addr{7-0}; // imm8
+
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-24} = opcod1;
let Inst{21-20} = opcod2;
- let Inst{11-8} = 0b1010;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+
+ // Loads & stores operate on both NEON and VFP pipelines.
+ let D = VFPNeonDomain;
+}
+
+// VFP Load / store multiple pseudo instructions.
+class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrMode4, Size4Bytes, IndexModeNone, Pseudo, VFPNeonDomain,
+ cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasVFP2];
}
// Load / store multiple
@@ -1336,21 +1253,40 @@ class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = regs{12};
+ let Inst{15-12} = regs{11-8};
+ let Inst{7-0} = regs{7-0};
+
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
- let Inst{11-8} = 0b1011;
-
- // 64-bit loads & stores operate on both NEON and VFP pipelines.
- let D = VFPNeonDomain;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
}
class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ // Instruction operands.
+ bits<4> Rn;
+ bits<13> regs;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Rn;
+ let Inst{22} = regs{8};
+ let Inst{15-12} = regs{12-9};
+ let Inst{7-0} = regs{7-0};
+
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
- let Inst{11-8} = 0b1010;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
}
// Double precision, unary
@@ -1358,10 +1294,21 @@ class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
string asm, list<dag> pattern>
: VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{19-16} = opcod3;
- let Inst{11-8} = 0b1011;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
let Inst{7-6} = opcod4;
let Inst{4} = opcod5;
}
@@ -1371,24 +1318,25 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
dag iops, InstrItinClass itin, string opc, string asm,
list<dag> pattern>
: VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
- let Inst{27-23} = opcod1;
- let Inst{21-20} = opcod2;
- let Inst{11-8} = 0b1011;
- let Inst{6} = op6;
- let Inst{4} = op4;
-}
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dn;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{19-16} = Dn{3-0};
+ let Inst{7} = Dn{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
-// Double precision, binary, VML[AS] (for additional predicate)
-class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
- dag iops, InstrItinClass itin, string opc, string asm,
- list<dag> pattern>
- : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
- let Inst{11-8} = 0b1011;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
let Inst{6} = op6;
let Inst{4} = op4;
- list<Predicate> Predicates = [HasVFP2, UseVMLx];
}
// Single precision, unary
@@ -1396,16 +1344,27 @@ class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
string asm, list<dag> pattern>
: VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{19-16} = opcod3;
- let Inst{11-8} = 0b1010;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
let Inst{7-6} = opcod4;
let Inst{4} = opcod5;
}
-// Single precision unary, if no NEON
-// Same as ASuI except not available if NEON is enabled
+// Single precision unary, if no NEON. Same as ASuI except not available if
+// NEON is enabled.
class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
string asm, list<dag> pattern>
@@ -1418,20 +1377,47 @@ class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
: VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
- let Inst{11-8} = 0b1010;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
let Inst{6} = op6;
let Inst{4} = op4;
}
-// Single precision binary, if no NEON
-// Same as ASbI except not available if NEON is enabled
+// Single precision binary, if no NEON. Same as ASbI except not available if
+// NEON is enabled.
class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
dag iops, InstrItinClass itin, string opc, string asm,
list<dag> pattern>
: ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
}
// VFP conversion instructions
@@ -1502,9 +1488,7 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
: InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(
- !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
- !strconcat("\t", asm));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
}
@@ -1516,7 +1500,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
: InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm));
+ let AsmString = !strconcat(opc, "${p}", "\t", asm);
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
}
@@ -1531,6 +1515,25 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
let Inst{21-20} = op21_20;
let Inst{11-8} = op11_8;
let Inst{7-4} = op7_4;
+
+ let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder";
+
+ bits<5> Vd;
+ bits<6> Rn;
+ bits<4> Rm;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{19-16} = Rn{3-0};
+ let Inst{3-0} = Rm{3-0};
+}
+
+class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : NLdSt<op23, op21_20, op11_8, op7_4, oops, iops, itin, opc,
+ dt, asm, cstr, pattern> {
+ bits<3> lane;
}
class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
@@ -1541,11 +1544,22 @@ class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
list<Predicate> Predicates = [HasNEON];
}
+class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
+}
+
class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
pattern> {
let Inst{31-25} = 0b1111001;
+ let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
}
class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -1553,6 +1567,7 @@ class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
: NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm,
cstr, pattern> {
let Inst{31-25} = 0b1111001;
+ let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
}
// NEON "one register and a modified immediate" format.
@@ -1569,6 +1584,16 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
let Inst{6} = op6;
let Inst{5} = op5;
let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<13> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{24} = SIMM{7};
+ let Inst{18-16} = SIMM{6-4};
+ let Inst{3-0} = SIMM{3-0};
}
// NEON 2 vector register format.
@@ -1584,6 +1609,15 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
let Inst{11-7} = op11_7;
let Inst{6} = op6;
let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
}
// Same as N2V except it doesn't have a datatype suffix.
@@ -1599,6 +1633,15 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
let Inst{11-7} = op11_7;
let Inst{6} = op6;
let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
}
// NEON 2 vector register with immediate.
@@ -1612,6 +1655,17 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
let Inst{7} = op7;
let Inst{6} = op6;
let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<6> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{21-16} = SIMM{5-0};
}
// NEON 3 vector register format.
@@ -1625,6 +1679,18 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
let Inst{11-8} = op11_8;
let Inst{6} = op6;
let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
}
// Same as N3V except it doesn't have a data type suffix.
@@ -1639,13 +1705,25 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
let Inst{11-8} = op11_8;
let Inst{6} = op6;
let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
}
// NEON VMOVs between scalar and core registers.
class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
- : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain,
+ : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, NeonDomain,
"", itin> {
let Inst{27-20} = opcod1;
let Inst{11-8} = opcod2;
@@ -1654,11 +1732,21 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(
- !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
- !strconcat("\t", asm));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
+
+ let PostEncoderMethod = "NEONThumb2DupPostEncoder";
+
+ bits<5> V;
+ bits<4> R;
+ bits<4> p;
+ bits<4> lane;
+
+ let Inst{31-28} = p{3-0};
+ let Inst{7} = V{4};
+ let Inst{19-16} = V{3-0};
+ let Inst{15-12} = R{3-0};
}
class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
@@ -1687,6 +1775,15 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
let Inst{11-7} = 0b11000;
let Inst{6} = op6;
let Inst{4} = 0;
+
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<4> lane;
+
+ let Inst{22} = Vd{4};
+ let Inst{15-12} = Vd{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{3-0} = Vm{3-0};
}
// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index ba228ff..6f48d96 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -33,13 +33,13 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
default: break;
case ARM::LDR_PRE:
case ARM::LDR_POST:
- return ARM::LDR;
+ return ARM::LDRi12;
case ARM::LDRH_PRE:
case ARM::LDRH_POST:
return ARM::LDRH;
case ARM::LDRB_PRE:
case ARM::LDRB_POST:
- return ARM::LDRB;
+ return ARM::LDRBi12;
case ARM::LDRSH_PRE:
case ARM::LDRSH_POST:
return ARM::LDRSH;
@@ -48,39 +48,14 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
return ARM::LDRSB;
case ARM::STR_PRE:
case ARM::STR_POST:
- return ARM::STR;
+ return ARM::STRi12;
case ARM::STRH_PRE:
case ARM::STRH_POST:
return ARM::STRH;
case ARM::STRB_PRE:
case ARM::STRB_POST:
- return ARM::STRB;
+ return ARM::STRBi12;
}
return 0;
}
-
-void ARMInstrInfo::
-reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig,
- const TargetRegisterInfo &TRI) const {
- DebugLoc dl = Orig->getDebugLoc();
- unsigned Opcode = Orig->getOpcode();
- switch (Opcode) {
- default:
- break;
- case ARM::MOVi2pieces: {
- RI.emitLoadConstPool(MBB, I, dl,
- DestReg, SubIdx,
- Orig->getOperand(1).getImm(),
- (ARMCC::CondCodes)Orig->getOperand(2).getImm(),
- Orig->getOperand(3).getReg());
- MachineInstr *NewMI = prior(I);
- NewMI->getOperand(0).setSubReg(SubIdx);
- return;
- }
- }
-
- return ARMBaseInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, TRI);
-}
-
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
index 4563ffe..f2c7bdc 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h
@@ -32,11 +32,6 @@ public:
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
- void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig,
- const TargetRegisterInfo &TRI) const;
-
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index e66f9b9..c827ce3d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -58,10 +58,9 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
-def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>;
-def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>;
-def SDT_ARMMEMBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -70,33 +69,35 @@ def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
+def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>;
+def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOptInGlue]>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
- [SDNPInFlag]>;
+ [SDNPInGlue]>;
def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov,
- [SDNPInFlag]>;
+ [SDNPInGlue]>;
def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
[SDNPHasChain]>;
@@ -106,40 +107,38 @@ def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
[SDNPHasChain]>;
-def ARMand : SDNode<"ARMISD::AND", SDT_ARMAnd,
- [SDNPOutFlag]>;
-
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
- [SDNPOutFlag]>;
+ [SDNPOutGlue]>;
def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
- [SDNPOutFlag, SDNPCommutative]>;
+ [SDNPOutGlue, SDNPCommutative]>;
def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
-def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
-def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
-def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>;
+def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
- SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+ SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP",
+ SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>;
+
def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
[SDNPHasChain]>;
-def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER,
- [SDNPHasChain]>;
-def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR,
- [SDNPHasChain]>;
-def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR,
+def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
[SDNPHasChain]>;
+def ARMPreload : SDNode<"ARMISD::PRELOAD", SDTPrefetch,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
-def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
- [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
@@ -147,34 +146,40 @@ def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
-def HasV4T : Predicate<"Subtarget->hasV4TOps()">;
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">, AssemblerPredicate;
def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
-def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
-def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">, AssemblerPredicate;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">, AssemblerPredicate;
+def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate;
def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
-def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
+def HasV7 : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate;
def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
-def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
-def HasNEON : Predicate<"Subtarget->hasNEON()">;
-def HasDivide : Predicate<"Subtarget->hasDivide()">;
-def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">;
-def HasDB : Predicate<"Subtarget->hasDataBarrier()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate;
+def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate;
+def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate;
+def HasDivide : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate;
+def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
+ AssemblerPredicate;
+def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
+ AssemblerPredicate;
+def HasMP : Predicate<"Subtarget->hasMPExtension()">,
+ AssemblerPredicate;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
-def IsThumb : Predicate<"Subtarget->isThumb()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">, AssemblerPredicate;
def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
-def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
-def IsARM : Predicate<"!Subtarget->isThumb()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate;
+def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate;
def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
// FIXME: Eventually this will be just "hasV6T2Ops".
def UseMovt : Predicate<"Subtarget->useMovt()">;
def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
-def UseVMLx : Predicate<"Subtarget->useVMLx()">;
+def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -199,12 +204,6 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
}]>;
-// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24.
-def rot_imm : PatLeaf<(i32 imm), [{
- int32_t v = (int32_t)N->getZExtValue();
- return v == 8 || v == 16 || v == 24;
-}]>;
-
/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
def imm1_15 : PatLeaf<(i32 imm), [{
return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16;
@@ -217,12 +216,12 @@ def imm16_31 : PatLeaf<(i32 imm), [{
def so_imm_neg :
PatLeaf<(imm), [{
- return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1;
+ return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
}], so_imm_neg_XFORM>;
def so_imm_not :
PatLeaf<(imm), [{
- return ARM_AM::getSOImmVal(~(int)N->getZExtValue()) != -1;
+ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
}], so_imm_not_XFORM>;
// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
@@ -230,15 +229,6 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{
return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
}]>;
-/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
-/// e.g., 0xf000ffff
-def bf_inv_mask_imm : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM::isBitFieldInvertedMask(N->getZExtValue());
-}] > {
- let PrintMethod = "printBitfieldInvMaskImmOperand";
-}
-
/// Split a 32-bit immediate into two 16 bit parts.
def hi16 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
@@ -273,28 +263,103 @@ def sube_live_carry :
PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
[{return N->hasAnyUseOfValue(1);}]>;
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'xor' node with a single use.
+def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+
+// An 'fmul' node with a single use.
+def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{
+ return N->hasOneUse();
+}]>;
+
+// An 'fadd' node which checks for single non-hazardous use.
+def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
+ return hasNoVMLxHazardUse(N);
+}]>;
+
+// An 'fsub' node which checks for single non-hazardous use.
+def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
+ return hasNoVMLxHazardUse(N);
+}]>;
+
//===----------------------------------------------------------------------===//
// Operand Definitions.
//
// Branch target.
-def brtarget : Operand<OtherVT>;
+// FIXME: rename brtarget to t2_brtarget
+def brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValue";
+}
+
+// FIXME: get rid of this one?
+def uncondbrtarget : Operand<OtherVT> {
+ let EncoderMethod = "getUnconditionalBranchTargetOpValue";
+}
+
+// Branch target for ARM. Handles conditional/unconditional
+def br_target : Operand<OtherVT> {
+ let EncoderMethod = "getARMBranchTargetOpValue";
+}
+
+// Call target.
+// FIXME: rename bltarget to t2_bl_target?
+def bltarget : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getBranchTargetOpValue";
+}
+
+// Call target for ARM. Handles conditional/unconditional
+// FIXME: rename bl_target to t2_bltarget?
+def bl_target : Operand<i32> {
+ // Encoded the same as branch targets.
+ let EncoderMethod = "getARMBranchTargetOpValue";
+}
+
// A list of registers separated by comma. Used by load/store multiple.
+def RegListAsmOperand : AsmOperandClass {
+ let Name = "RegList";
+ let SuperClasses = [];
+}
+
+def DPRRegListAsmOperand : AsmOperandClass {
+ let Name = "DPRRegList";
+ let SuperClasses = [];
+}
+
+def SPRRegListAsmOperand : AsmOperandClass {
+ let Name = "SPRRegList";
+ let SuperClasses = [];
+}
+
def reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = RegListAsmOperand;
let PrintMethod = "printRegisterList";
}
-// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
-def cpinst_operand : Operand<i32> {
- let PrintMethod = "printCPInstOperand";
+def dpr_reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = DPRRegListAsmOperand;
+ let PrintMethod = "printRegisterList";
}
-def jtblock_operand : Operand<i32> {
- let PrintMethod = "printJTBlockOperand";
+def spr_reglist : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = SPRRegListAsmOperand;
+ let PrintMethod = "printRegisterList";
}
-def jt2block_operand : Operand<i32> {
- let PrintMethod = "printJT2BlockOperand";
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+ let PrintMethod = "printCPInstOperand";
}
// Local PC labels.
@@ -302,6 +367,22 @@ def pclabel : Operand<i32> {
let PrintMethod = "printPCLabel";
}
+// ADR instruction labels.
+def adrlabel : Operand<i32> {
+ let EncoderMethod = "getAdrLabelOpValue";
+}
+
+def neon_vcvt_imm32 : Operand<i32> {
+ let EncoderMethod = "getNEONVcvtImm32OpValue";
+}
+
+// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
+def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
+ int32_t v = (int32_t)N->getZExtValue();
+ return v == 8 || v == 16 || v == 24; }]> {
+ let EncoderMethod = "getRotImmOpValue";
+}
+
// shift_imm: An integer that encodes a shift amount and the type of shift
// (currently either asr or lsl) using the same encoding used for the
// immediates in so_reg operands.
@@ -313,73 +394,120 @@ def shift_imm : Operand<i32> {
def so_reg : Operand<i32>, // reg reg imm
ComplexPattern<i32, 3, "SelectShifterOperandReg",
[shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegOpValue";
+ let PrintMethod = "printSORegOperand";
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+def shift_so_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let EncoderMethod = "getSORegOpValue";
let PrintMethod = "printSORegOperand";
let MIOperandInfo = (ops GPR, GPR, i32imm);
}
// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
-// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are
-// represented in the imm field in the same 12-bit form that they are encoded
-// into so_imm instructions: the 8-bit immediate is the least significant bits
-// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
+// 8-bit immediate rotated by an arbitrary number of bits.
def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> {
+ let EncoderMethod = "getSOImmOpValue";
let PrintMethod = "printSOImmOperand";
}
// Break so_imm's up into two pieces. This handles immediates with up to 16
// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to
// get the first/second pieces.
-def so_imm2part : Operand<i32>,
- PatLeaf<(imm), [{
+def so_imm2part : PatLeaf<(imm), [{
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
- }]> {
- let PrintMethod = "printSOImm2PartOperand";
-}
+}]>;
-def so_imm2part_1 : SDNodeXForm<imm, [{
- unsigned V = ARM_AM::getSOImmTwoPartFirst((unsigned)N->getZExtValue());
- return CurDAG->getTargetConstant(V, MVT::i32);
+/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
+///
+def arm_i32imm : PatLeaf<(imm), [{
+ if (Subtarget->hasV6T2Ops())
+ return true;
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
}]>;
-def so_imm2part_2 : SDNodeXForm<imm, [{
- unsigned V = ARM_AM::getSOImmTwoPartSecond((unsigned)N->getZExtValue());
- return CurDAG->getTargetConstant(V, MVT::i32);
+/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
+def imm0_31 : Operand<i32>, PatLeaf<(imm), [{
+ return (int32_t)N->getZExtValue() < 32;
}]>;
-def so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{
- return ARM_AM::isSOImmTwoPartVal(-(int)N->getZExtValue());
- }]> {
- let PrintMethod = "printSOImm2PartOperand";
+/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'.
+def imm0_31_m1 : Operand<i32>, PatLeaf<(imm), [{
+ return (int32_t)N->getZExtValue() < 32;
+}]> {
+ let EncoderMethod = "getImmMinusOneOpValue";
}
-def so_neg_imm2part_1 : SDNodeXForm<imm, [{
- unsigned V = ARM_AM::getSOImmTwoPartFirst(-(int)N->getZExtValue());
- return CurDAG->getTargetConstant(V, MVT::i32);
-}]>;
+// i32imm_hilo16 - For movt/movw - sets the MC Encoder method.
+// The imm is split into imm{15-12}, imm{11-0}
+//
+def i32imm_hilo16 : Operand<i32> {
+ let EncoderMethod = "getHiLo16ImmOpValue";
+}
-def so_neg_imm2part_2 : SDNodeXForm<imm, [{
- unsigned V = ARM_AM::getSOImmTwoPartSecond(-(int)N->getZExtValue());
- return CurDAG->getTargetConstant(V, MVT::i32);
-}]>;
+/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
+/// e.g., 0xf000ffff
+def bf_inv_mask_imm : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM::isBitFieldInvertedMask(N->getZExtValue());
+}] > {
+ let EncoderMethod = "getBitfieldInvertedMaskOpValue";
+ let PrintMethod = "printBitfieldInvMaskImmOperand";
+}
-/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
-def imm0_31 : Operand<i32>, PatLeaf<(imm), [{
- return (int32_t)N->getZExtValue() < 32;
+/// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p
+def lsb_pos_imm : Operand<i32>, PatLeaf<(imm), [{
+ return isInt<5>(N->getSExtValue());
}]>;
+/// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p
+def width_imm : Operand<i32>, PatLeaf<(imm), [{
+ return N->getSExtValue() > 0 && N->getSExtValue() <= 32;
+}] > {
+ let EncoderMethod = "getMsbOpValue";
+}
+
// Define ARM specific addressing modes.
-// addrmode2 := reg +/- reg shop imm
+
+// addrmode_imm12 := reg +/- imm12
+//
+def addrmode_imm12 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
+ // 12-bit immediate operand. Note that instructions using this encode
+ // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
+ // immediate values are as normal.
+
+ let EncoderMethod = "getAddrModeImm12OpValue";
+ let PrintMethod = "printAddrModeImm12Operand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+// ldst_so_reg := reg +/- reg shop imm
+//
+def ldst_so_reg : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
+ let EncoderMethod = "getLdStSORegOpValue";
+ // FIXME: Simplify the printer
+ let PrintMethod = "printAddrMode2Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
// addrmode2 := reg +/- imm12
+// := reg +/- reg shop imm
//
def addrmode2 : Operand<i32>,
ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+ let EncoderMethod = "getAddrMode2OpValue";
let PrintMethod = "printAddrMode2Operand";
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
}
def am2offset : Operand<i32>,
- ComplexPattern<i32, 2, "SelectAddrMode2Offset", []> {
+ ComplexPattern<i32, 2, "SelectAddrMode2Offset",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode2OffsetOpValue";
let PrintMethod = "printAddrMode2OffsetOperand";
let MIOperandInfo = (ops GPR, i32imm);
}
@@ -389,22 +517,29 @@ def am2offset : Operand<i32>,
//
def addrmode3 : Operand<i32>,
ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+ let EncoderMethod = "getAddrMode3OpValue";
let PrintMethod = "printAddrMode3Operand";
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
}
def am3offset : Operand<i32>,
- ComplexPattern<i32, 2, "SelectAddrMode3Offset", []> {
+ ComplexPattern<i32, 2, "SelectAddrMode3Offset",
+ [], [SDNPWantRoot]> {
+ let EncoderMethod = "getAddrMode3OffsetOpValue";
let PrintMethod = "printAddrMode3OffsetOperand";
let MIOperandInfo = (ops GPR, i32imm);
}
-// addrmode4 := reg, <mode|W>
+// ldstm_mode := {ia, ib, da, db}
//
-def addrmode4 : Operand<i32>,
- ComplexPattern<i32, 2, "SelectAddrMode4", []> {
- let PrintMethod = "printAddrMode4Operand";
- let MIOperandInfo = (ops GPR:$addr, i32imm);
+def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
+ let EncoderMethod = "getLdStmModeOpValue";
+ let PrintMethod = "printLdStmModeOperand";
+}
+
+def MemMode5AsmOperand : AsmOperandClass {
+ let Name = "MemMode5";
+ let SuperClasses = [];
}
// addrmode5 := reg +/- imm8*4
@@ -413,19 +548,32 @@ def addrmode5 : Operand<i32>,
ComplexPattern<i32, 2, "SelectAddrMode5", []> {
let PrintMethod = "printAddrMode5Operand";
let MIOperandInfo = (ops GPR:$base, i32imm);
+ let ParserMatchClass = MemMode5AsmOperand;
+ let EncoderMethod = "getAddrMode5OpValue";
}
-// addrmode6 := reg with optional writeback
+// addrmode6 := reg with optional alignment
//
def addrmode6 : Operand<i32>,
- ComplexPattern<i32, 2, "SelectAddrMode6", []> {
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
let PrintMethod = "printAddrMode6Operand";
let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6AddressOpValue";
}
def am6offset : Operand<i32> {
let PrintMethod = "printAddrMode6OffsetOperand";
let MIOperandInfo = (ops GPR);
+ let EncoderMethod = "getAddrMode6OffsetOpValue";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VLD-dup
+// instructions, specifically VLD4-dup.
+def addrmode6dup : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6DupAddressOpValue";
}
// addrmodepc := pc + reg
@@ -440,6 +588,28 @@ def nohash_imm : Operand<i32> {
let PrintMethod = "printNoHashImmediate";
}
+def CoprocNumAsmOperand : AsmOperandClass {
+ let Name = "CoprocNum";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseCoprocNumOperand";
+}
+
+def CoprocRegAsmOperand : AsmOperandClass {
+ let Name = "CoprocReg";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseCoprocRegOperand";
+}
+
+def p_imm : Operand<i32> {
+ let PrintMethod = "printPImmediate";
+ let ParserMatchClass = CoprocNumAsmOperand;
+}
+
+def c_imm : Operand<i32> {
+ let PrintMethod = "printCImmediate";
+ let ParserMatchClass = CoprocRegAsmOperand;
+}
+
//===----------------------------------------------------------------------===//
include "ARMInstrFormats.td"
@@ -450,55 +620,93 @@ include "ARMInstrFormats.td"
/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
/// binop that produces a value.
-multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0> {
+multiclass AsI1_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
- def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
let Inst{25} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
}
}
- def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
- IIC_iALUr, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
- let Inst{11-4} = 0b00000000;
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{25} = 0;
let isCommutable = Commutable;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
}
- def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
+ def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
let Inst{25} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = shift;
}
}
/// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
/// instruction modifies the CPSR register.
-let Defs = [CPSR] in {
-multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0> {
- def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
- let Inst{20} = 1;
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass AI1_bin_s_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
}
- def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
- IIC_iALUr, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
+ def rr : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
let isCommutable = Commutable;
- let Inst{11-4} = 0b00000000;
- let Inst{20} = 1;
let Inst{25} = 0;
- }
- def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ }
+ def rs : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = shift;
}
}
}
@@ -507,146 +715,233 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
let isCompare = 1, Defs = [CPSR] in {
-multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0> {
- def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi,
- opc, "\t$a, $b",
- [(opnode GPR:$a, so_imm:$b)]> {
- let Inst{20} = 1;
+multiclass AI1_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
+ opc, "\t$Rn, $imm",
+ [(opnode GPR:$Rn, so_imm:$imm)]> {
+ bits<4> Rn;
+ bits<12> imm;
let Inst{25} = 1;
- }
- def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr,
- opc, "\t$a, $b",
- [(opnode GPR:$a, GPR:$b)]> {
- let Inst{11-4} = 0b00000000;
let Inst{20} = 1;
- let Inst{25} = 0;
- let isCommutable = Commutable;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = imm;
}
- def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr,
- opc, "\t$a, $b",
- [(opnode GPR:$a, so_reg:$b)]> {
+ def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
+ opc, "\t$Rn, $Rm",
+ [(opnode GPR:$Rn, GPR:$Rm)]> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let isCommutable = Commutable;
+ let Inst{25} = 0;
let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
+ }
+ def rs : AI1<opcod, (outs), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, iis,
+ opc, "\t$Rn, $shift",
+ [(opnode GPR:$Rn, so_reg:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b0000;
+ let Inst{11-0} = shift;
}
}
}
-/// AI_unary_rrot - A unary operation with two forms: one whose operand is a
+/// AI_ext_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
-multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> {
- def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src),
- IIC_iUNAr, opc, "\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]>,
+multiclass AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+ def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iEXTr, opc, "\t$Rd, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rm))]>,
Requires<[IsARM, HasV6]> {
- let Inst{11-10} = 0b00;
+ bits<4> Rd;
+ bits<4> Rm;
let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = 0b00;
+ let Inst{3-0} = Rm;
}
- def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot),
- IIC_iUNAsi, opc, "\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+ def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
+ [(set GPR:$Rd, (opnode (rotr GPR:$Rm, rot_imm:$rot)))]>,
Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<2> rot;
let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{3-0} = Rm;
}
}
-multiclass AI_unary_rrot_np<bits<8> opcod, string opc> {
- def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src),
- IIC_iUNAr, opc, "\t$dst, $src",
+multiclass AI_ext_rrot_np<bits<8> opcod, string opc> {
+ def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iEXTr, opc, "\t$Rd, $Rm",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6]> {
- let Inst{11-10} = 0b00;
let Inst{19-16} = 0b1111;
+ let Inst{11-10} = 0b00;
}
- def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot),
- IIC_iUNAsi, opc, "\t$dst, $src, ror $rot",
+ def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6]> {
+ bits<2> rot;
let Inst{19-16} = 0b1111;
+ let Inst{11-10} = rot;
}
}
-/// AI_bin_rrot - A binary operation with two forms: one whose operand is a
+/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
- def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
- IIC_iALUr, opc, "\t$dst, $LHS, $RHS",
- [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+multiclass AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+ def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
let Inst{11-10} = 0b00;
+ let Inst{9-4} = 0b000111;
+ let Inst{3-0} = Rm;
+ }
+ def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+ rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+ [(set GPR:$Rd, (opnode GPR:$Rn,
+ (rotr GPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{9-4} = 0b000111;
+ let Inst{3-0} = Rm;
}
- def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS,
- i32imm:$rot),
- IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
- [(set GPR:$dst, (opnode GPR:$LHS,
- (rotr GPR:$RHS, rot_imm:$rot)))]>,
- Requires<[IsARM, HasV6]>;
}
// For disassembly only.
-multiclass AI_bin_rrot_np<bits<8> opcod, string opc> {
- def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
- IIC_iALUr, opc, "\t$dst, $LHS, $RHS",
+multiclass AI_exta_rrot_np<bits<8> opcod, string opc> {
+ def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6]> {
let Inst{11-10} = 0b00;
}
- def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS,
- i32imm:$rot),
- IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
+ def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+ rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{11-10} = rot;
+ }
}
/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
let Uses = [CPSR] in {
multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
- def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
- DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
}
- def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM]> {
- let isCommutable = Commutable;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
- def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
- DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
+ def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
}
// Carry setting variants
-let Defs = [CPSR] in {
+let isCodeGenOnly = 1, Defs = [CPSR] in {
multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
- def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
- DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"),
- [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
+ def Sri : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, !strconcat(opc, "\t$Rd, $Rn, $imm"),
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
let Inst{20} = 1;
let Inst{25} = 1;
}
- def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"),
- [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
+ def Srr : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, !strconcat(opc, "\t$Rd, $Rn, $Rm"),
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{11-4} = 0b00000000;
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
let Inst{20} = 1;
let Inst{25} = 0;
}
- def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
- DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"),
- [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
+ def Srs : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$Rd, $Rn, $shift"),
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -654,6 +949,62 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
}
}
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12: AI2ldst<0b010, 1, isByte, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 1, isByte, (outs GPR:$Rt), (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
+}
+
+multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12 : AI2ldst<0b010, 0, isByte, (outs),
+ (ins GPR:$Rt, addrmode_imm12:$addr),
+ AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+ [(opnode GPR:$Rt, addrmode_imm12:$addr)]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPR:$Rt, ldst_so_reg:$shift),
+ AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+ [(opnode GPR:$Rt, ldst_so_reg:$shift)]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = shift{11-0};
+ }
+}
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -669,8 +1020,7 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let neverHasSideEffects = 1, isNotDuplicable = 1 in
def CONSTPOOL_ENTRY :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
- i32imm:$size), NoItinerary,
- "${instid:label} ${cpidx:cpentry}", []>;
+ i32imm:$size), NoItinerary, []>;
// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
// from removing one half of the matched pairs. That breaks PEI, which assumes
@@ -678,12 +1028,10 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def ADJCALLSTACKUP :
PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
- "${:comment} ADJCALLSTACKUP $amt1",
[(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
def ADJCALLSTACKDOWN :
PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
- "${:comment} ADJCALLSTACKDOWN $amt",
[(ARMcallseq_start timm:$amt)]>;
}
@@ -691,6 +1039,7 @@ def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000000;
}
@@ -698,6 +1047,7 @@ def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000001;
}
@@ -705,6 +1055,7 @@ def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000010;
}
@@ -712,6 +1063,7 @@ def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000011;
}
@@ -719,14 +1071,22 @@ def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
"\t$dst, $a, $b",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
let Inst{27-20} = 0b01101000;
let Inst{7-4} = 0b1011;
+ let Inst{11-8} = 0b1111;
}
def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000100;
}
@@ -735,154 +1095,174 @@ def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM]> {
+ bits<16> val;
+ let Inst{3-0} = val{3-0};
+ let Inst{19-8} = val{15-4};
let Inst{27-20} = 0b00010010;
let Inst{7-4} = 0b0111;
}
-// Change Processor State is a system instruction -- for disassembly only.
-// The singleton $opt operand contains the following information:
-// opt{4-0} = mode from Inst{4-0}
-// opt{5} = changemode from Inst{17}
-// opt{8-6} = AIF from Inst{8-6}
-// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
-def CPS : AXI<(outs), (ins cps_opt:$opt), MiscFrm, NoItinerary, "cps$opt",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM]> {
+// Change Processor State is a system instruction -- for disassembly and
+// parsing only.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class CPS<dag iops, string asm_ops>
+ : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops),
+ [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> {
+ bits<2> imod;
+ bits<3> iflags;
+ bits<5> mode;
+ bit M;
+
let Inst{31-28} = 0b1111;
let Inst{27-20} = 0b00010000;
- let Inst{16} = 0;
- let Inst{5} = 0;
+ let Inst{19-18} = imod;
+ let Inst{17} = M; // Enabled if mode is set;
+ let Inst{16} = 0;
+ let Inst{8-6} = iflags;
+ let Inst{5} = 0;
+ let Inst{4-0} = mode;
}
+let M = 1 in
+ def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+ "$imod\t$iflags, $mode">;
+let mode = 0, M = 0 in
+ def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">;
+
+let imod = 0, iflags = 0, M = 1 in
+ def CPS1p : CPS<(ins i32imm:$mode), "\t$mode">;
+
// Preload signals the memory system of possible future data/instruction access.
// These are for disassembly only.
-//
-// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0.
-// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
-multiclass APreLoad<bit data, bit read, string opc> {
+multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
- def i : AXI<(outs), (ins GPR:$base, neg_zero:$imm), MiscFrm, NoItinerary,
- !strconcat(opc, "\t[$base, $imm]"), []> {
+ def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
+ !strconcat(opc, "\t$addr"),
+ [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> {
+ bits<4> Rt;
+ bits<17> addr;
let Inst{31-26} = 0b111101;
let Inst{25} = 0; // 0 for immediate form
let Inst{24} = data;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
let Inst{22} = read;
let Inst{21-20} = 0b01;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = addr{11-0}; // imm12
}
- def r : AXI<(outs), (ins addrmode2:$addr), MiscFrm, NoItinerary,
- !strconcat(opc, "\t$addr"), []> {
+ def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
+ !strconcat(opc, "\t$shift"),
+ [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> {
+ bits<17> shift;
let Inst{31-26} = 0b111101;
let Inst{25} = 1; // 1 for register form
let Inst{24} = data;
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
let Inst{22} = read;
let Inst{21-20} = 0b01;
- let Inst{4} = 0;
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = shift{11-0};
}
}
-defm PLD : APreLoad<1, 1, "pld">;
-defm PLDW : APreLoad<1, 0, "pldw">;
-defm PLI : APreLoad<0, 1, "pli">;
-
-def SETENDBE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tbe",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM]> {
- let Inst{31-28} = 0b1111;
- let Inst{27-20} = 0b00010000;
- let Inst{16} = 1;
- let Inst{9} = 1;
- let Inst{7-4} = 0b0000;
-}
+defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>;
+defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
+defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>;
-def SETENDLE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tle",
- [/* For disassembly only; pattern left blank */]>,
+def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
+ "setend\t$end",
+ [/* For disassembly only; pattern left blank */]>,
Requires<[IsARM]> {
- let Inst{31-28} = 0b1111;
- let Inst{27-20} = 0b00010000;
- let Inst{16} = 1;
- let Inst{9} = 0;
- let Inst{7-4} = 0b0000;
+ bits<1> end;
+ let Inst{31-10} = 0b1111000100000001000000;
+ let Inst{9} = end;
+ let Inst{8-0} = 0;
}
def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV7]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{7-4} = 0b1111;
+ bits<4> opt;
+ let Inst{27-4} = 0b001100100000111100001111;
+ let Inst{3-0} = opt;
}
// A5.4 Permanently UNDEFINED instructions.
-// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to
-// binutils
let isBarrier = 1, isTerminator = 1 in
-def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
- ".long 0xe7ffdefe ${:comment} trap", [(trap)]>,
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ "trap", [(trap)]>,
Requires<[IsARM]> {
- let Inst{27-25} = 0b011;
- let Inst{24-20} = 0b11111;
- let Inst{7-5} = 0b111;
- let Inst{4} = 0b1;
+ let Inst = 0xe7ffdefe;
}
// Address computation and loads and stores in PIC mode.
let isNotDuplicable = 1 in {
-def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
- Pseudo, IIC_iALUr, "\n$cp:\n\tadd$p\t$dst, pc, $a",
- [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
+ Size4Bytes, IIC_iALUr,
+ [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
let AddedComplexity = 10 in {
-def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p\t$dst, $addr",
- [(set GPR:$dst, (load addrmodepc:$addr))]>;
+def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iLoad_r,
+ [(set GPR:$dst, (load addrmodepc:$addr))]>;
-def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrh${p}\t$dst, $addr",
- [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>;
+def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>;
-def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrb${p}\t$dst, $addr",
- [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>;
+def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>;
-def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsh${p}\t$dst, $addr",
- [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>;
+def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>;
-def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsb${p}\t$dst, $addr",
- [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>;
+def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iLoad_bh_r,
+ [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>;
}
let AddedComplexity = 10 in {
-def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr$p\t$src, $addr",
- [(store GPR:$src, addrmodepc:$addr)]>;
+def PICSTR : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
-def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrh${p}\t$src, $addr",
- [(truncstorei16 GPR:$src, addrmodepc:$addr)]>;
+def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
+ addrmodepc:$addr)]>;
-def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrb${p}\t$src, $addr",
- [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ Size4Bytes, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
}
} // isNotDuplicable = 1
// LEApcrel - Load a pc-relative address into a register without offending the
// assembler.
-let neverHasSideEffects = 1 in {
-let isReMaterializable = 1 in
-def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p),
- Pseudo, IIC_iALUi,
- "adr$p\t$dst, #$label", []>;
-
-} // neverHasSideEffects
-def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
- (ins i32imm:$label, nohash_imm:$id, pred:$p),
- Pseudo, IIC_iALUi,
- "adr$p\t$dst, #${label}_${id}", []> {
- let Inst{25} = 1;
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+// The 'adr' mnemonic encodes differently if the label is before or after
+// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
+// know until then which form of the instruction will be used.
+def ADR : AI1<0, (outs GPR:$Rd), (ins adrlabel:$label),
+ MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> {
+ bits<4> Rd;
+ bits<12> label;
+ let Inst{27-25} = 0b001;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = label;
}
+def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
+ Size4Bytes, IIC_iALUi, []>;
+
+def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ Size4Bytes, IIC_iALUi, []>;
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
@@ -893,159 +1273,139 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
"bx", "\tlr", [(ARMretflag)]>,
Requires<[IsARM, HasV4T]> {
- let Inst{3-0} = 0b1110;
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
+ let Inst{27-0} = 0b0001001011111111111100011110;
}
// ARMV4 only
- def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
"mov", "\tpc, lr", [(ARMretflag)]>,
Requires<[IsARM, NoV4T]> {
- let Inst{11-0} = 0b000000001110;
- let Inst{15-12} = 0b1111;
- let Inst{19-16} = 0b0000;
- let Inst{27-20} = 0b00011010;
+ let Inst{27-0} = 0b0001101000001111000000001110;
}
}
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
// ARMV4T and above
- def BRIND : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
+ def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
[(brind GPR:$dst)]>,
Requires<[IsARM, HasV4T]> {
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
- let Inst{31-28} = 0b1110;
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
}
// ARMV4 only
- def MOVPCRX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "mov\tpc, $dst",
- [(brind GPR:$dst)]>,
- Requires<[IsARM, NoV4T]> {
- let Inst{11-4} = 0b00000000;
- let Inst{15-12} = 0b1111;
- let Inst{19-16} = 0b0000;
- let Inst{27-20} = 0b00011010;
- let Inst{31-28} = 0b1110;
- }
+ // FIXME: We would really like to define this as a vanilla ARMPat like:
+ // ARMPat<(brind GPR:$dst), (MOVr PC, GPR:$dst)>
+ // With that, however, we can't set isBranch, isTerminator, etc..
+ def MOVPCRX : ARMPseudoInst<(outs), (ins GPR:$dst),
+ Size4Bytes, IIC_Br, [(brind GPR:$dst)]>,
+ Requires<[IsARM, NoV4T]>;
}
-// FIXME: remove when we have a way to marking a MI with these properties.
-// FIXME: Should pc be an implicit operand like PICADD, etc?
-let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
- hasExtraDefRegAllocReq = 1 in
- def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops),
- IndexModeUpd, LdStMulFrm, IIC_Br,
- "ldm${addr:submode}${p}\t$addr!, $dsts",
- "$addr.addr = $wb", []>;
-
-// On non-Darwin platforms R9 is callee-saved.
+// All calls clobber the non-callee saved registers. SP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
let isCall = 1,
+ // On non-Darwin platforms R9 is callee-saved.
Defs = [R0, R1, R2, R3, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23,
- D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
- def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
- IIC_Br, "bl\t${func:call}",
+ D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+ Uses = [SP] in {
+ def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+ IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsARM, IsNotDarwin]> {
let Inst{31-28} = 0b1110;
+ bits<24> func;
+ let Inst{23-0} = func;
}
- def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
- IIC_Br, "bl", "\t${func:call}",
+ def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+ IIC_Br, "bl", "\t$func",
[(ARMcall_pred tglobaladdr:$func)]>,
- Requires<[IsARM, IsNotDarwin]>;
+ Requires<[IsARM, IsNotDarwin]> {
+ bits<24> func;
+ let Inst{23-0} = func;
+ }
// ARMv5T and above
def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
IIC_Br, "blx\t$func",
[(ARMcall GPR:$func)]>,
Requires<[IsARM, HasV5T, IsNotDarwin]> {
- let Inst{7-4} = 0b0011;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
+ bits<4> func;
+ let Inst{31-4} = 0b1110000100101111111111110011;
+ let Inst{3-0} = func;
}
// ARMv4T
// Note: Restrict $func to the tGPR regclass to prevent it being in LR.
- def BX : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
- IIC_Br, "mov\tlr, pc\n\tbx\t$func",
- [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, HasV4T, IsNotDarwin]> {
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
- }
+ def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, HasV4T, IsNotDarwin]>;
// ARMv4
- def BMOVPCRX : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
- IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func",
- [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, NoV4T, IsNotDarwin]> {
- let Inst{11-4} = 0b00000000;
- let Inst{15-12} = 0b1111;
- let Inst{19-16} = 0b0000;
- let Inst{27-20} = 0b00011010;
- }
+ def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, NoV4T, IsNotDarwin]>;
}
-// On Darwin R9 is call-clobbered.
let isCall = 1,
+ // On Darwin R9 is call-clobbered.
+ // R7 is marked as a use to prevent frame-pointer assignments from being
+ // moved above / below calls.
Defs = [R0, R1, R2, R3, R9, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23,
- D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
- def BLr9 : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
- IIC_Br, "bl\t${func:call}",
+ D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+ Uses = [R7, SP] in {
+ def BLr9 : ABXI<0b1011, (outs), (ins bltarget:$func, variable_ops),
+ IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> {
let Inst{31-28} = 0b1110;
+ bits<24> func;
+ let Inst{23-0} = func;
}
- def BLr9_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
- IIC_Br, "bl", "\t${func:call}",
+ def BLr9_pred : ABI<0b1011, (outs), (ins bltarget:$func, variable_ops),
+ IIC_Br, "bl", "\t$func",
[(ARMcall_pred tglobaladdr:$func)]>,
- Requires<[IsARM, IsDarwin]>;
+ Requires<[IsARM, IsDarwin]> {
+ bits<24> func;
+ let Inst{23-0} = func;
+ }
// ARMv5T and above
def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
IIC_Br, "blx\t$func",
[(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> {
- let Inst{7-4} = 0b0011;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
+ bits<4> func;
+ let Inst{31-4} = 0b1110000100101111111111110011;
+ let Inst{3-0} = func;
}
// ARMv4T
// Note: Restrict $func to the tGPR regclass to prevent it being in LR.
- def BXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
- IIC_Br, "mov\tlr, pc\n\tbx\t$func",
- [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, HasV4T, IsDarwin]> {
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
- }
+ def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, HasV4T, IsDarwin]>;
// ARMv4
- def BMOVPCRXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
- IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func",
- [(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsARM, NoV4T, IsDarwin]> {
- let Inst{11-4} = 0b00000000;
- let Inst{15-12} = 0b1111;
- let Inst{19-16} = 0b0000;
- let Inst{27-20} = 0b00011010;
- }
+ def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+ Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+ Requires<[IsARM, NoV4T, IsDarwin]>;
}
// Tail calls.
+// FIXME: These should probably be xformed into the non-TC versions of the
+// instructions as part of MC lowering.
+// FIXME: These seem to be used for both Thumb and ARM instruction selection.
+// Thumb should have its own version since the instruction is actually
+// different, even though the mnemonic is the same.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// Darwin versions.
let Defs = [R0, R1, R2, R3, R9, R12,
@@ -1053,29 +1413,26 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
D27, D28, D29, D30, D31, PC],
Uses = [SP] in {
- def TCRETURNdi : AInoP<(outs), (ins i32imm:$dst, variable_ops),
- Pseudo, IIC_Br,
- "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>;
+ def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsDarwin]>;
- def TCRETURNri : AInoP<(outs), (ins tcGPR:$dst, variable_ops),
- Pseudo, IIC_Br,
- "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>;
+ def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsDarwin]>;
def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
IIC_Br, "b\t$dst @ TAILCALL",
- []>, Requires<[IsDarwin]>;
+ []>, Requires<[IsARM, IsDarwin]>;
def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
IIC_Br, "b.w\t$dst @ TAILCALL",
- []>, Requires<[IsDarwin]>;
+ []>, Requires<[IsThumb, IsDarwin]>;
def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops),
BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
[]>, Requires<[IsDarwin]> {
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
- let Inst{31-28} = 0b1110;
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
}
}
@@ -1085,13 +1442,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
D27, D28, D29, D30, D31, PC],
Uses = [SP] in {
- def TCRETURNdiND : AInoP<(outs), (ins i32imm:$dst, variable_ops),
- Pseudo, IIC_Br,
- "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>;
+ def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsNotDarwin]>;
- def TCRETURNriND : AInoP<(outs), (ins tcGPR:$dst, variable_ops),
- Pseudo, IIC_Br,
- "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>;
+ def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ IIC_Br, []>, Requires<[IsNotDarwin]>;
def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
IIC_Br, "b\t$dst @ TAILCALL",
@@ -1104,10 +1459,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops),
BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
[]>, Requires<[IsNotDarwin]> {
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
- let Inst{31-28} = 0b1110;
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
}
}
}
@@ -1117,48 +1471,40 @@ let isBranch = 1, isTerminator = 1 in {
let isBarrier = 1 in {
let isPredicable = 1 in
def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br,
- "b\t$target", [(br bb:$target)]>;
+ "b\t$target", [(br bb:$target)]> {
+ bits<24> target;
+ let Inst{31-28} = 0b1110;
+ let Inst{23-0} = target;
+ }
- let isNotDuplicable = 1, isIndirectBranch = 1 in {
- def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target$jt",
- [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
- let Inst{11-4} = 0b00000000;
- let Inst{15-12} = 0b1111;
- let Inst{20} = 0; // S Bit
- let Inst{24-21} = 0b1101;
- let Inst{27-25} = 0b000;
- }
- def BR_JTm : JTI<(outs),
- (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "ldr\tpc, $target$jt",
- [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
- imm:$id)]> {
- let Inst{15-12} = 0b1111;
- let Inst{20} = 1; // L bit
- let Inst{21} = 0; // W bit
- let Inst{22} = 0; // B bit
- let Inst{24} = 1; // P bit
- let Inst{27-25} = 0b011;
- }
- def BR_JTadd : JTI<(outs),
- (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "add\tpc, $target, $idx$jt",
- [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
- imm:$id)]> {
- let Inst{15-12} = 0b1111;
- let Inst{20} = 0; // S bit
- let Inst{24-21} = 0b0100;
- let Inst{27-25} = 0b000;
- }
- } // isNotDuplicable = 1, isIndirectBranch = 1
+ let isNotDuplicable = 1, isIndirectBranch = 1 in {
+ def BR_JTr : ARMPseudoInst<(outs),
+ (ins GPR:$target, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br,
+ [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+ // FIXME: This shouldn't use the generic "addrmode2," but rather be split
+ // into i12 and rs suffixed versions.
+ def BR_JTm : ARMPseudoInst<(outs),
+ (ins addrmode2:$target, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br,
+ [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
+ imm:$id)]>;
+ def BR_JTadd : ARMPseudoInst<(outs),
+ (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br,
+ [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
+ imm:$id)]>;
+ } // isNotDuplicable = 1, isIndirectBranch = 1
} // isBarrier = 1
// FIXME: should be able to write a pattern for ARMBrcond, but can't use
// a two-value operand where a dag node expects two operands. :(
- def Bcc : ABI<0b1010, (outs), (ins brtarget:$target),
+ def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
IIC_Br, "b", "\t$target",
- [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>;
+ [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
+ bits<24> target;
+ let Inst{23-0} = target;
+ }
}
// Branch and Exchange Jazelle -- for disassembly only
@@ -1172,271 +1518,303 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
// Secure Monitor Call is a system instruction -- for disassembly only
def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
[/* For disassembly only; pattern left blank */]> {
- let Inst{23-20} = 0b0110;
- let Inst{7-4} = 0b0111;
+ bits<4> opt;
+ let Inst{23-4} = 0b01100000000000000111;
+ let Inst{3-0} = opt;
}
// Supervisor Call (Software Interrupt) -- for disassembly only
-let isCall = 1 in {
+let isCall = 1, Uses = [SP] in {
def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]> {
+ bits<24> svc;
+ let Inst{23-0} = svc;
+}
}
// Store Return State is a system instruction -- for disassembly only
-def SRSW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode),
- NoItinerary, "srs${addr:submode}\tsp!, $mode",
+let isCodeGenOnly = 1 in { // FIXME: This should not use submode!
+def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
+ NoItinerary, "srs${amode}\tsp!, $mode",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-28} = 0b1111;
let Inst{22-20} = 0b110; // W = 1
}
-def SRS : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode),
- NoItinerary, "srs${addr:submode}\tsp, $mode",
+def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
+ NoItinerary, "srs${amode}\tsp, $mode",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-28} = 0b1111;
let Inst{22-20} = 0b100; // W = 0
}
// Return From Exception is a system instruction -- for disassembly only
-def RFEW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base),
- NoItinerary, "rfe${addr:submode}\t$base!",
+def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
+ NoItinerary, "rfe${amode}\t$base!",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-28} = 0b1111;
let Inst{22-20} = 0b011; // W = 1
}
-def RFE : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base),
- NoItinerary, "rfe${addr:submode}\t$base",
+def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
+ NoItinerary, "rfe${amode}\t$base",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-28} = 0b1111;
let Inst{22-20} = 0b001; // W = 0
}
+} // isCodeGenOnly = 1
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//
// Load
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
- "ldr", "\t$dst, $addr",
- [(set GPR:$dst, (load addrmode2:$addr))]>;
+
+
+defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
+ UnOpFrag<(load node:$Src)>>;
+defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
// Special LDR for loads from non-pc-relative constpools.
let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
isReMaterializable = 1 in
-def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
- "ldr", "\t$dst, $addr", []>;
+def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
+ []> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+}
// Loads with zero extension
-def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldrh", "\t$dst, $addr",
- [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
-
-def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
- IIC_iLoadr, "ldrb", "\t$dst, $addr",
- [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
+def LDRH : AI3ld<0b1011, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (zextloadi16 addrmode3:$addr))]>;
// Loads with sign extension
-def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldrsh", "\t$dst, $addr",
- [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
-
-def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldrsb", "\t$dst, $addr",
- [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
-
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+def LDRSH : AI3ld<0b1111, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr",
+ [(set GPR:$Rt, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
+ [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
+ isCodeGenOnly = 1 in { // $dst2 doesn't exist in asmstring?
+// FIXME: $dst2 isn't in the asm string as it's implied by $Rd (dst2 = Rd+1)
+// how to represent that such that tblgen is happy and we don't
+// mark this codegen only?
// Load doubleword
-def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldrd", "\t$dst1, $addr",
+def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
+ (ins addrmode3:$addr), LdMiscFrm,
+ IIC_iLoad_d_r, "ldrd", "\t$Rd, $addr",
[]>, Requires<[IsARM, HasV5TE]>;
+}
// Indexed loads
-def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb),
- (ins addrmode2:$addr), LdFrm, IIC_iLoadru,
- "ldr", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
-
-def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru,
- "ldr", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
-
-def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb),
- (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
- "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
-
-def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
- "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
-
-def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb),
- (ins addrmode2:$addr), LdFrm, IIC_iLoadru,
- "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
-
-def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
- "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
-
-def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb),
- (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
- "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
-
-def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
- "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
-
-def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
- (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
- "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
-
-def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
- "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
+multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
+ def _PRE : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode2:$addr), IndexModePre, LdFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ // {17-14} Rn
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<18> addr;
+ let Inst{25} = addr{13};
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{17-14};
+ let Inst{11-0} = addr{11-0};
+ }
+ def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins GPR:$Rn, am2offset:$offset),
+ IndexModePost, LdFrm, itin,
+ opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<14> offset;
+ bits<4> Rn;
+ let Inst{25} = offset{13};
+ let Inst{23} = offset{12};
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = offset{11-0};
+ }
+}
-// For disassembly only
-def LDRD_PRE : AI3lddpr<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
- (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr,
- "ldrd", "\t$dst1, $dst2, $addr!", "$addr.base = $base_wb", []>,
- Requires<[IsARM, HasV5TE]>;
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>;
+defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>;
+}
-// For disassembly only
-def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
- (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadr,
- "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>,
- Requires<[IsARM, HasV5TE]>;
+multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> {
+ def _PRE : AI3ldstidx<op, op20, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins addrmode3:$addr), IndexModePre,
+ LdMiscFrm, itin,
+ opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ }
+ def _POST : AI3ldstidx<op, op20, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+ (ins GPR:$Rn, am3offset:$offset), IndexModePost,
+ LdMiscFrm, itin,
+ opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> Rn;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = Rn;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+ }
+}
-} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDRH : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>;
+defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>;
+defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>;
+let hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+defm LDRD : AI3_ldridx<0b1101, 0, "ldrd", IIC_iLoad_d_ru>;
+} // mayLoad = 1, neverHasSideEffects = 1
// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
-
-def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru,
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am2offset:$offset), IndexModeNone,
+ LdFrm, IIC_iLoad_ru,
"ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
-
-def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
+def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am2offset:$offset), IndexModeNone,
+ LdFrm, IIC_iLoad_bh_ru,
"ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
-
-def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+def LDRSBT : AI3ldstidx<0b1101, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am3offset:$offset), IndexModePost,
+ LdMiscFrm, IIC_iLoad_bh_ru,
"ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
-
-def LDRHT : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, am3offset:$offset), LdMiscFrm, IIC_iLoadru,
- "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+def LDRHT : AI3ldstidx<0b1011, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am3offset:$offset), IndexModePost,
+ LdMiscFrm, IIC_iLoad_bh_ru,
+ "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
-
-def LDRSHT : AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+def LDRSHT : AI3ldstidx<0b1111, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am3offset:$offset), IndexModePost,
+ LdMiscFrm, IIC_iLoad_bh_ru,
"ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
+}
// Store
-def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
- "str", "\t$src, $addr",
- [(store GPR:$src, addrmode2:$addr)]>;
// Stores with truncate
-def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
- IIC_iStorer, "strh", "\t$src, $addr",
- [(truncstorei16 GPR:$src, addrmode3:$addr)]>;
-
-def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
- "strb", "\t$src, $addr",
- [(truncstorei8 GPR:$src, addrmode2:$addr)]>;
+def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
+ IIC_iStore_bh_r, "strh", "\t$Rt, $addr",
+ [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
// Store doubleword
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
-def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
- StMiscFrm, IIC_iStorer,
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1,
+ isCodeGenOnly = 1 in // $src2 doesn't exist in asm string
+def STRD : AI3str<0b1111, (outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
+ StMiscFrm, IIC_iStore_d_r,
"strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
// Indexed stores
-def STR_PRE : AI2stwpr<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base, am2offset:$offset),
- StFrm, IIC_iStoreru,
- "str", "\t$src, [$base, $offset]!", "$base = $base_wb",
- [(set GPR:$base_wb,
- (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
-
-def STR_POST : AI2stwpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
- StFrm, IIC_iStoreru,
- "str", "\t$src, [$base], $offset", "$base = $base_wb",
- [(set GPR:$base_wb,
- (post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
-
-def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am3offset:$offset),
- StMiscFrm, IIC_iStoreru,
- "strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
- [(set GPR:$base_wb,
- (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
-
-def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am3offset:$offset),
- StMiscFrm, IIC_iStoreru,
- "strh", "\t$src, [$base], $offset", "$base = $base_wb",
- [(set GPR:$base_wb, (post_truncsti16 GPR:$src,
- GPR:$base, am3offset:$offset))]>;
-
-def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
- StFrm, IIC_iStoreru,
- "strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
- [(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
- GPR:$base, am2offset:$offset))]>;
-
-def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
- StFrm, IIC_iStoreru,
- "strb", "\t$src, [$base], $offset", "$base = $base_wb",
- [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
- GPR:$base, am2offset:$offset))]>;
+def STR_PRE : AI2stridx<0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModePre, StFrm, IIC_iStore_ru,
+ "str", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+ [(set GPR:$Rn_wb,
+ (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
+
+def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "str", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+ [(set GPR:$Rn_wb,
+ (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
+
+def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModePre, StFrm, IIC_iStore_bh_ru,
+ "strb", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+ [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt,
+ GPR:$Rn, am2offset:$offset))]>;
+def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strb", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+ [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt,
+ GPR:$Rn, am2offset:$offset))]>;
+
+def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
+ IndexModePre, StMiscFrm, IIC_iStore_ru,
+ "strh", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+ [(set GPR:$Rn_wb,
+ (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
+
+def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
+ IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
+ "strh", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+ [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
+ GPR:$Rn, am3offset:$offset))]>;
// For disassembly only
def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
(ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
- StMiscFrm, IIC_iStoreru,
+ StMiscFrm, IIC_iStore_d_ru,
"strd", "\t$src1, $src2, [$base, $offset]!",
"$base = $base_wb", []>;
// For disassembly only
def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
(ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
- StMiscFrm, IIC_iStoreru,
+ StMiscFrm, IIC_iStore_d_ru,
"strd", "\t$src1, $src2, [$base], $offset",
"$base = $base_wb", []>;
// STRT, STRBT, and STRHT are for disassembly only.
-def STRT : AI2stwpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
- StFrm, IIC_iStoreru,
- "strt", "\t$src, [$base], $offset", "$base = $base_wb",
+def STRT : AI2stridx<0, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn,am2offset:$offset),
+ IndexModeNone, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
[/* For disassembly only; pattern left blank */]> {
let Inst{21} = 1; // overwrite
}
-def STRBT : AI2stbpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
- StFrm, IIC_iStoreru,
- "strbt", "\t$src, [$base], $offset", "$base = $base_wb",
+def STRBT : AI2stridx<1, 0, (outs GPR:$Rn_wb),
+ (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+ IndexModeNone, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
[/* For disassembly only; pattern left blank */]> {
let Inst{21} = 1; // overwrite
}
def STRHT: AI3sthpo<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am3offset:$offset),
- StMiscFrm, IIC_iStoreru,
+ StMiscFrm, IIC_iStore_bh_ru,
"strht", "\t$src, [$base], $offset", "$base = $base_wb",
[/* For disassembly only; pattern left blank */]> {
let Inst{21} = 1; // overwrite
@@ -1446,103 +1824,212 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb),
// Load / store multiple Instructions.
//
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops),
- IndexModeNone, LdStMulFrm, IIC_iLoadm,
- "ldm${addr:submode}${p}\t$addr, $dsts", "", []>;
-
-def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops),
- IndexModeUpd, LdStMulFrm, IIC_iLoadm,
- "ldm${addr:submode}${p}\t$addr!, $dsts",
- "$addr.addr = $wb", []>;
-} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
-
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
- reglist:$srcs, variable_ops),
- IndexModeNone, LdStMulFrm, IIC_iStorem,
- "stm${addr:submode}${p}\t$addr, $srcs", "", []>;
-
-def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$srcs, variable_ops),
- IndexModeUpd, LdStMulFrm, IIC_iStorem,
- "stm${addr:submode}${p}\t$addr!, $srcs",
- "$addr.addr = $wb", []>;
-} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
+multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+ InstrItinClass itin, InstrItinClass itin_upd> {
+ def IA :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def IA_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def DA :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b00; // Decrement After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DA_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b00; // Decrement After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def DB :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DB_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def IB :
+ AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeNone, f, itin,
+ !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b11; // Increment Before
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def IB_UPD :
+ AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ IndexModeUpd, f, itin_upd,
+ !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b11; // Increment Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+
+} // neverHasSideEffects
+
+// Load / Store Multiple Mnemonic Aliases
+def : MnemonicAlias<"ldm", "ldmia">;
+def : MnemonicAlias<"stm", "stmia">;
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+ hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+// FIXME: Should be a pseudo-instruction.
+def LDMIA_RET : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr,
+ "ldmia${p}\t$Rn!, $regs",
+ "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = 1; // Load
+}
//===----------------------------------------------------------------------===//
// Move Instructions.
//
let neverHasSideEffects = 1 in
-def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
- "mov", "\t$dst, $src", []>, UnaryDP {
+def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
+ "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
}
// A version for the smaller set of tail call registers.
let neverHasSideEffects = 1 in
-def MOVr_TC : AsI1<0b1101, (outs tcGPR:$dst), (ins tcGPR:$src), DPFrm,
- IIC_iMOVr, "mov", "\t$dst, $src", []>, UnaryDP {
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
+ IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
}
-def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
+def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
DPSoRegFrm, IIC_iMOVsr,
- "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
+ "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
+ UnaryDP {
+ bits<4> Rd;
+ bits<12> src;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = src;
let Inst{25} = 0;
}
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi,
- "mov", "\t$dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
+ "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-0} = imm;
}
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm),
DPFrm, IIC_iMOVi,
- "movw", "\t$dst, $src",
- [(set GPR:$dst, imm0_65535:$src)]>,
+ "movw", "\t$Rd, $imm",
+ [(set GPR:$Rd, imm0_65535:$imm)]>,
Requires<[IsARM, HasV6T2]>, UnaryDP {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
let Inst{20} = 0;
let Inst{25} = 1;
}
-let Constraints = "$src = $dst" in
-def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
+def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm),
DPFrm, IIC_iMOVi,
- "movt", "\t$dst, $imm",
- [(set GPR:$dst,
+ "movt", "\t$Rd, $imm",
+ [(set GPR:$Rd,
(or (and GPR:$src, 0xffff),
lo16AllZero:$imm))]>, UnaryDP,
Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
let Inst{20} = 0;
let Inst{25} = 1;
}
+def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+ (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+} // Constraints
+
def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
Requires<[IsARM, HasV6T2]>;
let Uses = [CPSR] in
-def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi,
- "mov", "\t$dst, $src, rrx",
- [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP;
+def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
+ [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
+ Requires<[IsARM]>;
// These aren't really mov instructions, but we have to define them this way
// due to flag operands.
let Defs = [CPSR] in {
-def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
- IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1",
- [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
-def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
- IIC_iMOVsi, "movs", "\t$dst, $src, asr #1",
- [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP;
+def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
+ Requires<[IsARM]>;
+def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
+ Requires<[IsARM]>;
}
//===----------------------------------------------------------------------===//
@@ -1551,31 +2038,31 @@ def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
// Sign extenders
-defm SXTB : AI_unary_rrot<0b01101010,
- "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
-defm SXTH : AI_unary_rrot<0b01101011,
- "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+defm SXTB : AI_ext_rrot<0b01101010,
+ "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH : AI_ext_rrot<0b01101011,
+ "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
-defm SXTAB : AI_bin_rrot<0b01101010,
+defm SXTAB : AI_exta_rrot<0b01101010,
"sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-defm SXTAH : AI_bin_rrot<0b01101011,
+defm SXTAH : AI_exta_rrot<0b01101011,
"sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
// For disassembly only
-defm SXTB16 : AI_unary_rrot_np<0b01101000, "sxtb16">;
+defm SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
// For disassembly only
-defm SXTAB16 : AI_bin_rrot_np<0b01101000, "sxtab16">;
+defm SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
// Zero extenders
let AddedComplexity = 16 in {
-defm UXTB : AI_unary_rrot<0b01101110,
- "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
-defm UXTH : AI_unary_rrot<0b01101111,
- "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm UXTB16 : AI_unary_rrot<0b01101100,
- "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+defm UXTB : AI_ext_rrot<0b01101110,
+ "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH : AI_ext_rrot<0b01101111,
+ "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_ext_rrot<0b01101100,
+ "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
// The transformation should probably be done as a combiner action
@@ -1586,33 +2073,49 @@ defm UXTB16 : AI_unary_rrot<0b01101100,
def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
(UXTB16r_rot GPR:$Src, 8)>;
-defm UXTAB : AI_bin_rrot<0b01101110, "uxtab",
+defm UXTAB : AI_exta_rrot<0b01101110, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
+defm UXTAH : AI_exta_rrot<0b01101111, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
}
// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
// For disassembly only
-defm UXTAB16 : AI_bin_rrot_np<0b01101100, "uxtab16">;
+defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
-def SBFX : I<(outs GPR:$dst),
- (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
- AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi,
- "sbfx", "\t$dst, $src, $lsb, $width", "", []>,
+def SBFX : I<(outs GPR:$Rd),
+ (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
let Inst{27-21} = 0b0111101;
let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
}
-def UBFX : I<(outs GPR:$dst),
- (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
- AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi,
- "ubfx", "\t$dst, $src, $lsb, $width", "", []>,
+def UBFX : I<(outs GPR:$Rd),
+ (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
let Inst{27-21} = 0b0111111;
let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
}
//===----------------------------------------------------------------------===//
@@ -1620,100 +2123,166 @@ def UBFX : I<(outs GPR:$dst),
//
defm ADD : AsI1_bin_irs<0b0100, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
defm SUB : AsI1_bin_irs<0b0010, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// ADD and SUB with 's' bit set.
defm ADDS : AI1_bin_s_irs<0b0100, "adds",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
defm SUBS : AI1_bin_s_irs<0b0010, "subs",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+
+// ADC and SUBC with 's' bit set.
defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
-def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, "rsb", "\t$dst, $a, $b",
- [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
- let Inst{25} = 1;
+def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (sub so_imm:$imm, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
}
// The reg/reg form is only defined for the disassembler; for codegen it is
// equivalent to SUBrr.
-def RSBrr : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
- IIC_iALUr, "rsb", "\t$dst, $a, $b",
+def RSBrr : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ IIC_iALUr, "rsb", "\t$Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]> {
- let Inst{25} = 0;
- let Inst{11-4} = 0b00000000;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
-def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, "rsb", "\t$dst, $a, $b",
- [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
- let Inst{25} = 0;
+def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rsb", "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (sub so_reg:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
// RSB with 's' bit set.
-let Defs = [CPSR] in {
-def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, "rsbs", "\t$dst, $a, $b",
- [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> {
- let Inst{20} = 1;
- let Inst{25} = 1;
-}
-def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, "rsbs", "\t$dst, $a, $b",
- [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]> {
- let Inst{20} = 1;
- let Inst{25} = 0;
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def RSBSri : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ IIC_iALUi, "rsbs", "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+}
+def RSBSrs : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rsbs", "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
}
let Uses = [CPSR] in {
-def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
- DPFrm, IIC_iALUi, "rsc", "\t$dst, $a, $b",
- [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>,
+def RSCri : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, "rsc", "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
Requires<[IsARM]> {
- let Inst{25} = 1;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
}
// The reg/reg form is only defined for the disassembler; for codegen it is
// equivalent to SUBrr.
-def RSCrr : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- DPFrm, IIC_iALUr, "rsc", "\t$dst, $a, $b",
+def RSCrr : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, "rsc", "\t$Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]> {
- let Inst{25} = 0;
- let Inst{11-4} = 0b00000000;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
-def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
- DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
- [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
+def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rsc", "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
Requires<[IsARM]> {
- let Inst{25} = 0;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
}
// FIXME: Allow these to be predicated.
-let Defs = [CPSR], Uses = [CPSR] in {
-def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
- DPFrm, IIC_iALUi, "rscs\t$dst, $a, $b",
- [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>,
+let isCodeGenOnly = 1, Defs = [CPSR], Uses = [CPSR] in {
+def RSCSri : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, "rscs\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
Requires<[IsARM]> {
- let Inst{20} = 1;
- let Inst{25} = 1;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
}
-def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
- DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b",
- [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
+def RSCSrs : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rscs\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
Requires<[IsARM]> {
- let Inst{20} = 1;
- let Inst{25} = 0;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
}
@@ -1740,111 +2309,166 @@ def : ARMPat<(adde GPR:$src, so_imm_not:$imm),
// ARM Arithmetic Instruction -- for disassembly only
// GPR:$dst = GPR:$a op GPR:$b
-class AAI<bits<8> op27_20, bits<4> op7_4, string opc,
- list<dag> pattern = [/* For disassembly only; pattern left blank */]>
- : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr,
- opc, "\t$dst, $a, $b", pattern> {
+class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
+ list<dag> pattern = [/* For disassembly only; pattern left blank */],
+ dag iops = (ins GPR:$Rn, GPR:$Rm), string asm = "\t$Rd, $Rn, $Rm">
+ : AI<(outs GPR:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<4> Rd;
+ bits<4> Rm;
let Inst{27-20} = op27_20;
- let Inst{7-4} = op7_4;
+ let Inst{11-4} = op11_4;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
}
// Saturating add/subtract -- for disassembly only
-def QADD : AAI<0b00010000, 0b0101, "qadd",
- [(set GPR:$dst, (int_arm_qadd GPR:$a, GPR:$b))]>;
-def QADD16 : AAI<0b01100010, 0b0001, "qadd16">;
-def QADD8 : AAI<0b01100010, 0b1001, "qadd8">;
-def QASX : AAI<0b01100010, 0b0011, "qasx">;
-def QDADD : AAI<0b00010100, 0b0101, "qdadd">;
-def QDSUB : AAI<0b00010110, 0b0101, "qdsub">;
-def QSAX : AAI<0b01100010, 0b0101, "qsax">;
-def QSUB : AAI<0b00010010, 0b0101, "qsub",
- [(set GPR:$dst, (int_arm_qsub GPR:$a, GPR:$b))]>;
-def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">;
-def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">;
-def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">;
-def UQADD8 : AAI<0b01100110, 0b1001, "uqadd8">;
-def UQASX : AAI<0b01100110, 0b0011, "uqasx">;
-def UQSAX : AAI<0b01100110, 0b0101, "uqsax">;
-def UQSUB16 : AAI<0b01100110, 0b0111, "uqsub16">;
-def UQSUB8 : AAI<0b01100110, 0b1111, "uqsub8">;
+def QADD : AAI<0b00010000, 0b00000101, "qadd",
+ [(set GPR:$Rd, (int_arm_qadd GPR:$Rm, GPR:$Rn))],
+ (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def QSUB : AAI<0b00010010, 0b00000101, "qsub",
+ [(set GPR:$Rd, (int_arm_qsub GPR:$Rm, GPR:$Rn))],
+ (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], (ins GPR:$Rm, GPR:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], (ins GPR:$Rm, GPR:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+
+def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">;
+def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">;
+def QASX : AAI<0b01100010, 0b11110011, "qasx">;
+def QSAX : AAI<0b01100010, 0b11110101, "qsax">;
+def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">;
+def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">;
+def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">;
+def UQASX : AAI<0b01100110, 0b11110011, "uqasx">;
+def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
+def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">;
// Signed/Unsigned add/subtract -- for disassembly only
-def SASX : AAI<0b01100001, 0b0011, "sasx">;
-def SADD16 : AAI<0b01100001, 0b0001, "sadd16">;
-def SADD8 : AAI<0b01100001, 0b1001, "sadd8">;
-def SSAX : AAI<0b01100001, 0b0101, "ssax">;
-def SSUB16 : AAI<0b01100001, 0b0111, "ssub16">;
-def SSUB8 : AAI<0b01100001, 0b1111, "ssub8">;
-def UASX : AAI<0b01100101, 0b0011, "uasx">;
-def UADD16 : AAI<0b01100101, 0b0001, "uadd16">;
-def UADD8 : AAI<0b01100101, 0b1001, "uadd8">;
-def USAX : AAI<0b01100101, 0b0101, "usax">;
-def USUB16 : AAI<0b01100101, 0b0111, "usub16">;
-def USUB8 : AAI<0b01100101, 0b1111, "usub8">;
+def SASX : AAI<0b01100001, 0b11110011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
+def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">;
+def SSAX : AAI<0b01100001, 0b11110101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">;
+def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">;
+def UASX : AAI<0b01100101, 0b11110011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">;
+def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">;
+def USAX : AAI<0b01100101, 0b11110101, "usax">;
+def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
+def USUB8 : AAI<0b01100101, 0b11111111, "usub8">;
// Signed/Unsigned halving add/subtract -- for disassembly only
-def SHASX : AAI<0b01100011, 0b0011, "shasx">;
-def SHADD16 : AAI<0b01100011, 0b0001, "shadd16">;
-def SHADD8 : AAI<0b01100011, 0b1001, "shadd8">;
-def SHSAX : AAI<0b01100011, 0b0101, "shsax">;
-def SHSUB16 : AAI<0b01100011, 0b0111, "shsub16">;
-def SHSUB8 : AAI<0b01100011, 0b1111, "shsub8">;
-def UHASX : AAI<0b01100111, 0b0011, "uhasx">;
-def UHADD16 : AAI<0b01100111, 0b0001, "uhadd16">;
-def UHADD8 : AAI<0b01100111, 0b1001, "uhadd8">;
-def UHSAX : AAI<0b01100111, 0b0101, "uhsax">;
-def UHSUB16 : AAI<0b01100111, 0b0111, "uhsub16">;
-def UHSUB8 : AAI<0b01100111, 0b1111, "uhsub8">;
+def SHASX : AAI<0b01100011, 0b11110011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
+def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">;
+def SHSAX : AAI<0b01100011, 0b11110101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">;
+def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">;
+def UHASX : AAI<0b01100111, 0b11110011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">;
+def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">;
+def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
+def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
-def USAD8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
+def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
MulFrm /* for convenience */, NoItinerary, "usad8",
- "\t$dst, $a, $b", []>,
+ "\t$Rd, $Rn, $Rm", []>,
Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{27-20} = 0b01111000;
let Inst{15-12} = 0b1111;
let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
}
-def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
MulFrm /* for convenience */, NoItinerary, "usada8",
- "\t$dst, $a, $b, $acc", []>,
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
let Inst{27-20} = 0b01111000;
let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
}
// Signed/Unsigned saturate -- for disassembly only
-def SSAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh),
- SatFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+def SSAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $a$sh",
[/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
let Inst{27-21} = 0b0110101;
let Inst{5-4} = 0b01;
+ let Inst{20-16} = sat_imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{7-3};
+ let Inst{6} = sh{0};
+ let Inst{3-0} = Rn;
}
-def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm,
- NoItinerary, "ssat16", "\t$dst, $bit_pos, $a",
+def SSAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$Rn), SatFrm,
+ NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn",
[/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
let Inst{27-20} = 0b01101010;
- let Inst{7-4} = 0b0011;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
}
-def USAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh),
- SatFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $a$sh",
[/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
let Inst{27-21} = 0b0110111;
let Inst{5-4} = 0b01;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{7-3};
+ let Inst{6} = sh{0};
+ let Inst{20-16} = sat_imm;
+ let Inst{3-0} = Rn;
}
-def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm,
- NoItinerary, "usat16", "\t$dst, $bit_pos, $a",
+def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm,
+ NoItinerary, "usat16", "\t$Rd, $sat_imm, $a",
[/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
let Inst{27-20} = 0b01101110;
- let Inst{7-4} = 0b0011;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
}
def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
@@ -1855,52 +2479,100 @@ def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
//
defm AND : AsI1_bin_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
-defm ANDS : AI1_bin_s_irs<0b0000, "and",
- BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>;
defm ORR : AsI1_bin_irs<0b1100, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm EOR : AsI1_bin_irs<0b0001, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
defm BIC : AsI1_bin_irs<0b1110, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
-def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfc", "\t$dst, $imm", "$src = $dst",
- [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
+ "bfc", "\t$Rd, $imm", "$src = $Rd",
+ [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<10> imm;
let Inst{27-21} = 0b0111110;
let Inst{6-0} = 0b0011111;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // width
}
// A8.6.18 BFI - Bitfield insert (Encoding A1)
-def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm),
+def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfi", "\t$dst, $val, $imm", "$src = $dst",
- [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val,
+ "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
+ [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn,
bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<10> imm;
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // width
+ let Inst{3-0} = Rn;
+}
+
+// GNU as only supports this form of bfi (w/ 4 arguments)
+let isAsmParserOnly = 1 in
+def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn,
+ lsb_pos_imm:$lsb, width_imm:$width),
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd",
+ []>, Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
let Inst{27-21} = 0b0111110;
let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{20-16} = width; // Custom encoder => lsb+width-1
+ let Inst{3-0} = Rn;
}
-def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
- "mvn", "\t$dst, $src",
- [(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
+def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
+ "mvn", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
let Inst{11-4} = 0b00000000;
-}
-def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
- IIC_iMOVsr, "mvn", "\t$dst, $src",
- [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP {
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
+}
+def MVNs : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm,
+ IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg:$shift))]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> shift;
let Inst{25} = 0;
-}
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
- IIC_iMOVi, "mvn", "\t$dst, $imm",
- [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP {
- let Inst{25} = 1;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = shift;
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
+ IIC_iMVNi, "mvn", "\t$Rd, $imm",
+ [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
}
def : ARMPat<(and GPR:$src, so_imm_not:$imm),
@@ -1909,247 +2581,299 @@ def : ARMPat<(and GPR:$src, so_imm_not:$imm),
//===----------------------------------------------------------------------===//
// Multiply Instructions.
//
+class AsMul1I32<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
-let isCommutable = 1 in
-def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, "mul", "\t$dst, $a, $b",
- [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
-
-def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iMAC32, "mla", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
-
-def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iMAC32, "mls", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
- Requires<[IsARM, HasV6T2]>;
+let isCommutable = 1 in {
+let Constraints = "@earlyclobber $Rd" in
+def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+ pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMUL32,
+ [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, NoV6]>;
+
+def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]>;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MLAv5: ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMAC32,
+ [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, NoV6]> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+
+def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<4> Ra;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
// Extra precision multiplies with low / high results
+
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
-def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iMUL64,
- "smull", "\t$ldst, $hdst, $a, $b", []>;
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMUL64, []>,
+ Requires<[IsARM, NoV6]>;
-def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iMUL64,
- "umull", "\t$ldst, $hdst, $a, $b", []>;
+def UMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMUL64, []>,
+ Requires<[IsARM, NoV6]>;
}
-// Multiply + accumulate
-def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iMAC64,
- "smlal", "\t$ldst, $hdst, $a, $b", []>;
+def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
-def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iMAC64,
- "umlal", "\t$ldst, $hdst, $a, $b", []>;
+def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+}
-def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iMAC64,
- "umaal", "\t$ldst, $hdst, $a, $b", []>,
+// Multiply + accumulate
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMAC64, []>,
+ Requires<[IsARM, NoV6]>;
+def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMAC64, []>,
+ Requires<[IsARM, NoV6]>;
+def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ Size4Bytes, IIC_iMAC64, []>,
+ Requires<[IsARM, NoV6]>;
+
+}
+
+def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]>;
+def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
Requires<[IsARM, HasV6]>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdLo;
+ let Inst{15-12} = RdHi;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
} // neverHasSideEffects
// Most significant word multiply
-def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, "smmul", "\t$dst, $a, $b",
- [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
+def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b0001;
let Inst{15-12} = 0b1111;
}
-def SMMULR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, "smmulr", "\t$dst, $a, $b",
+def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b0011; // R = 1
let Inst{15-12} = 0b1111;
}
-def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
- Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b0001;
-}
+def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, HasV6]>;
-def SMMLAR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iMAC32, "smmlar", "\t$dst, $a, $b, $c",
+def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b0011; // R = 1
-}
+ Requires<[IsARM, HasV6]>;
-def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
- Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b1101;
-}
+def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (sub GPR:$Ra, (mulhs GPR:$Rn, GPR:$Rm)))]>,
+ Requires<[IsARM, HasV6]>;
-def SMMLSR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iMAC32, "smmlsr", "\t$dst, $a, $b, $c",
+def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b1111; // R = 1
-}
+ Requires<[IsARM, HasV6]>;
multiclass AI_smul<string opc, PatFrag opnode> {
- def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
- (sext_inreg GPR:$b, i16)))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 0;
- }
-
- def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, !strconcat(opc, "bt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 1;
- }
-
- def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, !strconcat(opc, "tb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
- (sext_inreg GPR:$b, i16)))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 0;
- }
-
- def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, !strconcat(opc, "tt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 1;
- }
-
- def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL16, !strconcat(opc, "wb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16)))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 0;
- }
-
- def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL16, !strconcat(opc, "wt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16)))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 1;
- }
+ def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+ (sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+ (sra GPR:$Rm, (i32 16))), (i32 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
}
multiclass AI_smla<string opc, PatFrag opnode> {
- def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iMAC16, !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc,
- (opnode (sext_inreg GPR:$a, i16),
- (sext_inreg GPR:$b, i16))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 0;
- }
-
- def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 1;
- }
-
- def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iMAC16, !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sext_inreg GPR:$b, i16))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 0;
- }
-
- def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iMAC16, !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 1;
- }
-
- def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iMAC16, !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 0;
- }
-
- def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iMAC16, !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 1;
- }
+ def BB : AMulxyIa<0b0001000, 0b00, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra,
+ (opnode (sext_inreg GPR:$Rn, i16),
+ (sext_inreg GPR:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT : AMulxyIa<0b0001000, 0b10, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPR:$Rn, i16),
+ (sra GPR:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB : AMulxyIa<0b0001000, 0b01, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+ (sext_inreg GPR:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AMulxyIa<0b0001000, 0b11, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+ (sra GPR:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AMulxyIa<0b0001001, 0b00, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+ (sext_inreg GPR:$Rm, i16)), (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AMulxyIa<0b0001001, 0b10, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+ (sra GPR:$Rm, (i32 16))), (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
}
defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only
-def SMLALBB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
- IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 0;
-}
+ Requires<[IsARM, HasV5TE]>;
-def SMLALBT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
- IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 1;
-}
+ Requires<[IsARM, HasV5TE]>;
-def SMLALTB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
- IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 0;
-}
+ Requires<[IsARM, HasV5TE]>;
-def SMLALTT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
- IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 1;
-}
+ Requires<[IsARM, HasV5TE]>;
// Helper class for AI_smld -- for disassembly only
-class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
- InstrItinClass itin, string opc, string asm>
+class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
: AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{4} = 1;
let Inst{5} = swap;
let Inst{6} = sub;
@@ -2157,21 +2881,46 @@ class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
let Inst{21-20} = 0b00;
let Inst{22} = long;
let Inst{27-23} = 0b01110;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{19-16} = Rd;
+}
+class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
}
multiclass AI_smld<bit sub, string opc> {
- def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b, $acc">;
+ def D : AMulDualIa<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
- def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b, $acc">;
+ def DX: AMulDualIa<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
- def LD : AMulDualI<1, sub, 0, (outs GPR:$ldst,GPR:$hdst), (ins GPR:$a,GPR:$b),
- NoItinerary, !strconcat(opc, "ld"), "\t$ldst, $hdst, $a, $b">;
+ def LD: AMulDualI64<1, sub, 0, (outs GPR:$RdLo,GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+ !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
- def LDX : AMulDualI<1, sub, 1, (outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
- NoItinerary, !strconcat(opc, "ldx"),"\t$ldst, $hdst, $a, $b">;
+ def LDX : AMulDualI64<1, sub, 1, (outs GPR:$RdLo,GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+ !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
}
@@ -2180,16 +2929,10 @@ defm SMLS : AI_smld<1, "smls">;
multiclass AI_sdml<bit sub, string opc> {
- def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b"> {
- let Inst{15-12} = 0b1111;
- }
-
- def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b"> {
- let Inst{15-12} = 0b1111;
- }
-
+ def D : AMulDualI<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+ def DX : AMulDualI<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
}
defm SMUA : AI_sdml<0, "smua">;
@@ -2199,55 +2942,35 @@ defm SMUS : AI_sdml<1, "smus">;
// Misc. Arithmetic Instructions.
//
-def CLZ : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "clz", "\t$dst, $src",
- [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> {
- let Inst{7-4} = 0b0001;
- let Inst{11-8} = 0b1111;
- let Inst{19-16} = 0b1111;
-}
-
-def RBIT : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "rbit", "\t$dst, $src",
- [(set GPR:$dst, (ARMrbit GPR:$src))]>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{7-4} = 0b0011;
- let Inst{11-8} = 0b1111;
- let Inst{19-16} = 0b1111;
-}
-
-def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "rev", "\t$dst, $src",
- [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b0011;
- let Inst{11-8} = 0b1111;
- let Inst{19-16} = 0b1111;
-}
-
-def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "rev16", "\t$dst, $src",
- [(set GPR:$dst,
- (or (and (srl GPR:$src, (i32 8)), 0xFF),
- (or (and (shl GPR:$src, (i32 8)), 0xFF00),
- (or (and (srl GPR:$src, (i32 8)), 0xFF0000),
- (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>,
- Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b1011;
- let Inst{11-8} = 0b1111;
- let Inst{19-16} = 0b1111;
-}
-
-def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "revsh", "\t$dst, $src",
- [(set GPR:$dst,
+def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "clz", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>;
+
+def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rbit", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+ Requires<[IsARM, HasV6T2]>;
+
+def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+
+def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev16", "\t$Rd, $Rm",
+ [(set GPR:$Rd,
+ (or (and (srl GPR:$Rm, (i32 8)), 0xFF),
+ (or (and (shl GPR:$Rm, (i32 8)), 0xFF00),
+ (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000),
+ (and (shl GPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+ Requires<[IsARM, HasV6]>;
+
+def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "revsh", "\t$Rd, $Rm",
+ [(set GPR:$Rd,
(sext_inreg
- (or (srl (and GPR:$src, 0xFF00), (i32 8)),
- (shl GPR:$src, (i32 8))), i16))]>,
- Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b1011;
- let Inst{11-8} = 0b1111;
- let Inst{19-16} = 0b1111;
-}
+ (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
+ (shl GPR:$Rm, (i32 8))), i16))]>,
+ Requires<[IsARM, HasV6]>;
def lsl_shift_imm : SDNodeXForm<imm, [{
unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
@@ -2258,21 +2981,19 @@ def lsl_amt : PatLeaf<(i32 imm), [{
return (N->getZExtValue() < 32);
}], lsl_shift_imm>;
-def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
- (ins GPR:$src1, GPR:$src2, shift_imm:$sh),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh",
- [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
- (and (shl GPR:$src2, lsl_amt:$sh),
- 0xFFFF0000)))]>,
- Requires<[IsARM, HasV6]> {
- let Inst{6-4} = 0b001;
-}
+def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+ IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF),
+ (and (shl GPR:$Rm, lsl_amt:$sh),
+ 0xFFFF0000)))]>,
+ Requires<[IsARM, HasV6]>;
// Alternate cases for PKHBT where identities eliminate some nodes.
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
- (PKHBT GPR:$src1, GPR:$src2, 0)>;
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$sh)),
- (PKHBT GPR:$src1, GPR:$src2, (lsl_shift_imm imm16_31:$sh))>;
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (and GPR:$Rm, 0xFFFF0000)),
+ (PKHBT GPR:$Rn, GPR:$Rm, 0)>;
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (shl GPR:$Rm, imm16_31:$sh)),
+ (PKHBT GPR:$Rn, GPR:$Rm, (lsl_shift_imm imm16_31:$sh))>;
def asr_shift_imm : SDNodeXForm<imm, [{
unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
@@ -2285,15 +3006,13 @@ def asr_amt : PatLeaf<(i32 imm), [{
// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
// will match the pattern below.
-def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
- (ins GPR:$src1, GPR:$src2, shift_imm:$sh),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh",
- [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
- (and (sra GPR:$src2, asr_amt:$sh),
- 0xFFFF)))]>,
- Requires<[IsARM, HasV6]> {
- let Inst{6-4} = 0b101;
-}
+def PKHTB : APKHI<0b01101000, 1, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF0000),
+ (and (sra GPR:$Rm, asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[IsARM, HasV6]>;
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
@@ -2308,10 +3027,19 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
//
defm CMP : AI1_cmp_irs<0b1010, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
-// FIXME: There seems to be a (potential) hardware bug with the CMN instruction
-// and comparison with 0. These two pieces of code should give identical
+// ARMcmpZ can re-use the above instruction definitions.
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm),
+ (CMPri GPR:$src, so_imm:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
+ (CMPrr GPR:$src, GPR:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg:$rhs),
+ (CMPrs GPR:$src, so_reg:$rhs)>;
+
+// FIXME: We have to be careful when using the CMN instruction and comparison
+// with 0. One would expect these two pieces of code should give identical
// results:
//
// rsbs r1, r1, 0
@@ -2321,7 +3049,7 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp",
// mov r0, #1
//
// and:
-//
+//
// cmn r0, r1
// mov r0, #0
// it ls
@@ -2336,20 +3064,16 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp",
// never a "carry" when this AddWithCarry is performed (because the "carry bit"
// parameter to AddWithCarry is defined as 0).
//
-// The AddWithCarry in the CMP case seems to be relying upon the identity:
-//
-// ~x + 1 = -x
-//
-// However when x is 0 and unsigned, this doesn't hold:
+// When x is 0 and unsigned:
//
// x = 0
// ~x = 0xFFFF FFFF
// ~x + 1 = 0x1 0000 0000
// (-x = 0) != (0x1 0000 0000 = ~x + 1)
//
-// Therefore, we should disable *all* versions of CMN, especially when comparing
-// against zero, until we can limit when the CMN instruction is used (when we
-// know that the RHS is not 0) or when we have a hardware fix for this.
+// Therefore, we should disable CMN when comparing against zero, until we can
+// limit when the CMN instruction is used (when we know that the RHS is not 0 or
+// when it's a comparison which doesn't look at the 'carry' flag).
//
// (See the ARM docs for the "AddWithCarry" pseudo-code.)
//
@@ -2360,13 +3084,14 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp",
// Note that TST/TEQ don't set all the same flags that CMP does!
defm TST : AI1_cmp_irs<0b1000, "tst",
- BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>, 1>;
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
defm TEQ : AI1_cmp_irs<0b1001, "teq",
- BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>, 1>;
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
-defm CMPz : AI1_cmp_irs<0b1010, "cmp",
- BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>;
defm CMNz : AI1_cmp_irs<0b1011, "cmn",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
//def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
@@ -2381,13 +3106,10 @@ let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
def BCCi64 : PseudoInst<(outs),
(ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
IIC_Br,
- "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
[(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
def BCCZi64 : PseudoInst<(outs),
- (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst),
- IIC_Br,
- "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc",
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br,
[(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
} // usesCustomInserter
@@ -2395,29 +3117,87 @@ def BCCZi64 : PseudoInst<(outs),
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
+// FIXME: These should all be pseudo-instructions that get expanded to
+// the normal MOV instructions. That would fix the dependency on
+// special casing them in tblgen.
let neverHasSideEffects = 1 in {
-def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
- IIC_iCMOVr, "mov", "\t$dst, $true",
- [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $dst">, UnaryDP {
- let Inst{11-4} = 0b00000000;
+def MOVCCr : AI1<0b1101, (outs GPR:$Rd), (ins GPR:$false, GPR:$Rm), DPFrm,
+ IIC_iCMOVr, "mov", "\t$Rd, $Rm",
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
let Inst{25} = 0;
+ let Inst{20} = 0;
+ let Inst{15-12} = Rd;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rm;
}
-def MOVCCs : AI1<0b1101, (outs GPR:$dst),
- (ins GPR:$false, so_reg:$true), DPSoRegFrm, IIC_iCMOVsr,
- "mov", "\t$dst, $true",
- [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $dst">, UnaryDP {
+def MOVCCs : AI1<0b1101, (outs GPR:$Rd),
+ (ins GPR:$false, so_reg:$shift), DPSoRegFrm, IIC_iCMOVsr,
+ "mov", "\t$Rd, $shift",
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">, UnaryDP {
+ bits<4> Rd;
+ bits<12> shift;
let Inst{25} = 0;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = shift;
}
-def MOVCCi : AI1<0b1101, (outs GPR:$dst),
- (ins GPR:$false, so_imm:$true), DPFrm, IIC_iCMOVi,
- "mov", "\t$dst, $true",
- [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $dst">, UnaryDP {
+let isMoveImm = 1 in
+def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm_hilo16:$imm),
+ DPFrm, IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ []>,
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
+ UnaryDP {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 0;
+ let Inst{19-16} = imm{15-12};
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+}
+
+let isMoveImm = 1 in
+def MOVCCi : AI1<0b1101, (outs GPR:$Rd),
+ (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
+ "mov", "\t$Rd, $imm",
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">, UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
let Inst{25} = 1;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
+}
+
+// Two instruction predicate mov immediate.
+let isMoveImm = 1 in
+def MOVCCi32imm : PseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, i32imm:$src, pred:$p),
+ IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def MVNCCi : AI1<0b1111, (outs GPR:$Rd),
+ (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
+ "mvn", "\t$Rd, $imm",
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">, UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
}
} // neverHasSideEffects
@@ -2425,64 +3205,41 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
// Atomic operations intrinsics
//
+def memb_opt : Operand<i32> {
+ let PrintMethod = "printMemBOption";
+ let ParserMatchClass = MemBarrierOptOperand;
+}
+
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def DMBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dmb", "",
- [(ARMMemBarrier)]>, Requires<[IsARM, HasDB]> {
+def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
let Inst{31-4} = 0xf57ff05;
- // FIXME: add support for options other than a full system DMB
- // See DMB disassembly-only variants below.
- let Inst{3-0} = 0b1111;
-}
-
-def DSBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dsb", "",
- [(ARMSyncBarrier)]>, Requires<[IsARM, HasDB]> {
- let Inst{31-4} = 0xf57ff04;
- // FIXME: add support for options other than a full system DSB
- // See DSB disassembly-only variants below.
- let Inst{3-0} = 0b1111;
+ let Inst{3-0} = opt;
}
def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 5",
[(ARMMemBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
- // FIXME: add support for options other than a full system DMB
// FIXME: add encoding
}
-
-def DSB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
- "mcr", "\tp15, 0, $zero, c7, c10, 4",
- [(ARMSyncBarrierMCR GPR:$zero)]>,
- Requires<[IsARM, HasV6]> {
- // FIXME: add support for options other than a full system DSB
- // FIXME: add encoding
-}
-}
-
-// Memory Barrier Operations Variants -- for disassembly only
-
-def memb_opt : Operand<i32> {
- let PrintMethod = "printMemBOption";
}
-class AMBI<bits<4> op7_4, string opc>
- : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, opc, "\t$opt",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasDB]> {
- let Inst{31-8} = 0xf57ff0;
- let Inst{7-4} = op7_4;
+def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+ "dsb", "\t$opt",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf57ff04;
+ let Inst{3-0} = opt;
}
-// These DMB variants are for disassembly only.
-def DMBvar : AMBI<0b0101, "dmb">;
-
-// These DSB variants are for disassembly only.
-def DSBvar : AMBI<0b0100, "dsb">;
-
// ISB has only full system option -- for disassembly only
-def ISBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
- Requires<[IsARM, HasDB]> {
+def ISB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
+ Requires<[IsARM, HasDB]> {
let Inst{31-4} = 0xf57ff06;
let Inst{3-0} = 0b1111;
}
@@ -2491,138 +3248,114 @@ let usesCustomInserter = 1 in {
let Uses = [CPSR] in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!",
[(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_SUB_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!",
[(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_AND_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!",
[(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_OR_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!",
[(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_XOR_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!",
[(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_NAND_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!",
[(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!",
[(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_SUB_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!",
[(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_AND_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!",
[(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_OR_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!",
[(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_XOR_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!",
[(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_NAND_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!",
[(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!",
[(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_SUB_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!",
[(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_AND_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!",
[(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_OR_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!",
[(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_XOR_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!",
[(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_NAND_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!",
[(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_SWAP_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- "${:comment} ATOMIC_SWAP_I8 PSEUDO!",
[(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
def ATOMIC_SWAP_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- "${:comment} ATOMIC_SWAP_I16 PSEUDO!",
[(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
def ATOMIC_SWAP_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- "${:comment} ATOMIC_SWAP_I32 PSEUDO!",
[(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
def ATOMIC_CMP_SWAP_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!",
[(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
def ATOMIC_CMP_SWAP_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!",
[(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
def ATOMIC_CMP_SWAP_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!",
[(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
}
}
let mayLoad = 1 in {
-def LDREXB : AIldrex<0b10, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
- "ldrexb", "\t$dest, [$ptr]",
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
+ "ldrexb", "\t$Rt, [$Rn]",
[]>;
-def LDREXH : AIldrex<0b11, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
- "ldrexh", "\t$dest, [$ptr]",
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
+ "ldrexh", "\t$Rt, [$Rn]",
[]>;
-def LDREX : AIldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
- "ldrex", "\t$dest, [$ptr]",
+def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
+ "ldrex", "\t$Rt, [$Rn]",
[]>;
-def LDREXD : AIldrex<0b01, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr),
+def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins GPR:$Rn),
NoItinerary,
- "ldrexd", "\t$dest, $dest2, [$ptr]",
+ "ldrexd", "\t$Rt, $Rt2, [$Rn]",
[]>;
}
-let mayStore = 1, Constraints = "@earlyclobber $success" in {
-def STREXB : AIstrex<0b10, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$src, GPR:$Rn),
NoItinerary,
- "strexb", "\t$success, $src, [$ptr]",
+ "strexb", "\t$Rd, $src, [$Rn]",
[]>;
-def STREXH : AIstrex<0b11, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn),
NoItinerary,
- "strexh", "\t$success, $src, [$ptr]",
+ "strexh", "\t$Rd, $Rt, [$Rn]",
[]>;
-def STREX : AIstrex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn),
NoItinerary,
- "strex", "\t$success, $src, [$ptr]",
+ "strex", "\t$Rd, $Rt, [$Rn]",
[]>;
-def STREXD : AIstrex<0b01, (outs GPR:$success),
- (ins GPR:$src, GPR:$src2, GPR:$ptr),
+def STREXD : AIstrex<0b01, (outs GPR:$Rd),
+ (ins GPR:$Rt, GPR:$Rt2, GPR:$Rn),
NoItinerary,
- "strexd", "\t$success, $src, $src2, [$ptr]",
+ "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]",
[]>;
}
@@ -2630,29 +3363,15 @@ def STREXD : AIstrex<0b01, (outs GPR:$success),
def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV7]> {
- let Inst{31-20} = 0xf57;
- let Inst{7-4} = 0b0001;
+ let Inst{31-0} = 0b11110101011111111111000000011111;
}
// SWP/SWPB are deprecated in V6/V7 and for disassembly only.
let mayLoad = 1 in {
-def SWP : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
- "swp", "\t$dst, $src, [$ptr]",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{27-23} = 0b00010;
- let Inst{22} = 0; // B = 0
- let Inst{21-20} = 0b00;
- let Inst{7-4} = 0b1001;
-}
-
-def SWPB : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
- "swpb", "\t$dst, $src, [$ptr]",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{27-23} = 0b00010;
- let Inst{22} = 1; // B = 1
- let Inst{21-20} = 0b00;
- let Inst{7-4} = 0b1001;
-}
+def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swp",
+ [/* For disassembly only; pattern left blank */]>;
+def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb",
+ [/* For disassembly only; pattern left blank */]>;
}
//===----------------------------------------------------------------------===//
@@ -2660,10 +3379,11 @@ def SWPB : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
//
// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
let isCall = 1,
- Defs = [R0, R12, LR, CPSR] in {
- def TPsoft : ABXI<0b1011, (outs), (ins), IIC_Br,
- "bl\t__aeabi_read_tp",
+ Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+ def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
[(set R0, ARMthread_pointer)]>;
}
@@ -2680,19 +3400,16 @@ let isCall = 1,
// doing so, we also cause the prologue/epilogue code to actively preserve
// all of the callee-saved resgisters, which is exactly what we want.
// A constant value is passed in $val, and we use the location as a scratch.
+//
+// These are pseudo-instructions and are lowered to individual MC-insts, so
+// no encoding information is necessary.
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0,
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
D31 ], hasSideEffects = 1, isBarrier = 1 in {
- def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val),
- AddrModeNone, SizeSpecial, IndexModeNone,
- Pseudo, NoItinerary,
- "add\t$val, pc, #8\t${:comment} eh_setjmp begin\n\t"
- "str\t$val, [$src, #+4]\n\t"
- "mov\tr0, #0\n\t"
- "add\tpc, pc, #0\n\t"
- "mov\tr0, #1 ${:comment} eh_setjmp end", "",
+ def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, HasVFP2]>;
}
@@ -2700,14 +3417,8 @@ let Defs =
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
hasSideEffects = 1, isBarrier = 1 in {
- def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val),
- AddrModeNone, SizeSpecial, IndexModeNone,
- Pseudo, NoItinerary,
- "add\t$val, pc, #8\n ${:comment} eh_setjmp begin\n\t"
- "str\t$val, [$src, #+4]\n\t"
- "mov\tr0, #0\n\t"
- "add\tpc, pc, #0\n\t"
- "mov\tr0, #1 ${:comment} eh_setjmp end", "",
+ def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, NoVFP]>;
}
@@ -2715,53 +3426,58 @@ let Defs =
// FIXME: Non-Darwin version(s)
let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
Defs = [ R7, LR, SP ] in {
-def Int_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
- AddrModeNone, SizeSpecial, IndexModeNone,
- Pseudo, NoItinerary,
- "ldr\tsp, [$src, #8]\n\t"
- "ldr\t$scratch, [$src, #4]\n\t"
- "ldr\tr7, [$src]\n\t"
- "bx\t$scratch", "",
+def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
+ NoItinerary,
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
Requires<[IsARM, IsDarwin]>;
}
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
+// handled when the pseudo is expanded (which happens before any passes
+// that need the instruction size).
+let isBarrier = 1, hasSideEffects = 1 in
+def Int_eh_sjlj_dispatchsetup :
+ PseudoInst<(outs), (ins GPR:$src), NoItinerary,
+ [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
+ Requires<[IsDarwin]>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
// Large immediate handling.
-// Two piece so_imms.
-let isReMaterializable = 1 in
-def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src),
- Pseudo, IIC_iMOVi,
- "mov", "\t$dst, $src",
- [(set GPR:$dst, so_imm2part:$src)]>,
- Requires<[IsARM, NoV6T2]>;
-
-def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS),
- (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
- (so_imm2part_2 imm:$RHS))>;
-def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
- (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
- (so_imm2part_2 imm:$RHS))>;
-def : ARMPat<(add GPR:$LHS, so_imm2part:$RHS),
- (ADDri (ADDri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
- (so_imm2part_2 imm:$RHS))>;
-def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS),
- (SUBri (SUBri GPR:$LHS, (so_neg_imm2part_1 imm:$RHS)),
- (so_neg_imm2part_2 imm:$RHS))>;
-
-// 32-bit immediate using movw + movt.
+// 32-bit immediate using two piece so_imms or movw + movt.
// This is a single pseudo instruction, the benefit is that it can be remat'd
// as a single unit instead of having to handle reg inputs.
// FIXME: Remove this when we can do generalized remat.
-let isReMaterializable = 1 in
-def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
- "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
- [(set GPR:$dst, (i32 imm:$src))]>,
- Requires<[IsARM, HasV6T2]>;
+let isReMaterializable = 1, isMoveImm = 1 in
+def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set GPR:$dst, (arm_i32imm:$src))]>,
+ Requires<[IsARM]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if PIC).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2,
+ [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+let AddedComplexity = 10 in
+def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2ld,
+ [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+ Requires<[IsARM, UseMovt]>;
+} // isReMaterializable
// ConstantPool, GlobalAddress, and JumpTable
def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
@@ -2800,11 +3516,15 @@ def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
Requires<[IsARM, IsDarwin]>;
// zextload i1 -> zextload i8
-def : ARMPat<(zextloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
// extload -> zextload
-def : ARMPat<(extloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>;
-def : ARMPat<(extloadi8 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>;
def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
@@ -2889,19 +3609,45 @@ include "ARMInstrNEON.td"
// Coprocessor Instructions. For disassembly only.
//
-def CDP : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
- nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
- NoItinerary, "cdp", "\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{4} = 0;
-}
-
-def CDP2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
- nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
- NoItinerary, "cdp2\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2",
+def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
+
+def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-28} = 0b1111;
- let Inst{4} = 0;
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
}
class ACI<dag oops, dag iops, string opc, string asm>
@@ -3000,110 +3746,164 @@ defm LDC2 : LdStCop<0b1111, 1, "ldc2">;
defm STC : LdStCop<{?,?,?,?}, 0, "stc">;
defm STC2 : LdStCop<0b1111, 0, "stc2">;
-def MCR : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
- GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
- NoItinerary, "mcr", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{20} = 0;
- let Inst{4} = 1;
-}
-
-def MCR2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
- GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
- NoItinerary, "mcr2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-28} = 0b1111;
- let Inst{20} = 0;
- let Inst{4} = 1;
-}
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
-def MRC : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
- GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
- NoItinerary, "mrc", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{20} = 1;
+class MovRCopro<string opc, bit direction>
+ : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ NoItinerary, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{20} = direction;
let Inst{4} = 1;
-}
-def MRC2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
- GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
- NoItinerary, "mrc2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
- [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>;
+def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>;
+
+class MovRCopro2<string opc, bit direction>
+ : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+ [/* For disassembly only; pattern left blank */]> {
let Inst{31-28} = 0b1111;
- let Inst{20} = 1;
+ let Inst{20} = direction;
let Inst{4} = 1;
-}
-def MCRR : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
- GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
- NoItinerary, "mcrr", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{23-20} = 0b0100;
-}
-
-def MCRR2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
- GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
- NoItinerary, "mcrr2\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
- [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */>;
+def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */>;
+
+class MovRRCopro<string opc, bit direction>
+ : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */>;
+def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+
+class MovRRCopro2<string opc, bit direction>
+ : ABXI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"),
+ [/* For disassembly only; pattern left blank */]> {
let Inst{31-28} = 0b1111;
- let Inst{23-20} = 0b0100;
-}
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
-def MRRC : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
- GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
- NoItinerary, "mrrc", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{23-20} = 0b0101;
-}
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
-def MRRC2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
- GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
- NoItinerary, "mrrc2\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-28} = 0b1111;
- let Inst{23-20} = 0b0101;
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
}
+def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */>;
+def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
+
//===----------------------------------------------------------------------===//
// Move between special register and ARM core register -- for disassembly only
//
-def MRS : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary, "mrs", "\t$dst, cpsr",
+// Move to ARM core register from Special Register
+def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
[/* For disassembly only; pattern left blank */]> {
- let Inst{23-20} = 0b0000;
+ bits<4> Rd;
+ let Inst{23-16} = 0b00001111;
+ let Inst{15-12} = Rd;
let Inst{7-4} = 0b0000;
}
-def MRSsys : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary,"mrs","\t$dst, spsr",
+def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr",
[/* For disassembly only; pattern left blank */]> {
- let Inst{23-20} = 0b0100;
+ bits<4> Rd;
+ let Inst{23-16} = 0b01001111;
+ let Inst{15-12} = Rd;
let Inst{7-4} = 0b0000;
}
-def MSR : ABI<0b0001, (outs), (ins GPR:$src, msr_mask:$mask), NoItinerary,
- "msr", "\tcpsr$mask, $src",
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
+ "msr", "\t$mask, $Rn",
[/* For disassembly only; pattern left blank */]> {
- let Inst{23-20} = 0b0010;
- let Inst{7-4} = 0b0000;
-}
+ bits<5> mask;
+ bits<4> Rn;
-def MSRi : ABI<0b0011, (outs), (ins so_imm:$a, msr_mask:$mask), NoItinerary,
- "msr", "\tcpsr$mask, $a",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{23-20} = 0b0010;
- let Inst{7-4} = 0b0000;
+ let Inst{23} = 0;
+ let Inst{22} = mask{4}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = mask{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-4} = 0b00000000;
+ let Inst{3-0} = Rn;
}
-def MSRsys : ABI<0b0001, (outs), (ins GPR:$src, msr_mask:$mask), NoItinerary,
- "msr", "\tspsr$mask, $src",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{23-20} = 0b0110;
- let Inst{7-4} = 0b0000;
-}
+def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
+ "msr", "\t$mask, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<5> mask;
+ bits<12> a;
-def MSRsysi : ABI<0b0011, (outs), (ins so_imm:$a, msr_mask:$mask), NoItinerary,
- "msr", "\tspsr$mask, $a",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{23-20} = 0b0110;
- let Inst{7-4} = 0b0000;
+ let Inst{23} = 0;
+ let Inst{22} = mask{4}; // R bit
+ let Inst{21-20} = 0b10;
+ let Inst{19-16} = mask{3-0};
+ let Inst{15-12} = 0b1111;
+ let Inst{11-0} = a;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 4d2f116..1e2e550 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -16,11 +16,17 @@
//===----------------------------------------------------------------------===//
def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
@@ -69,6 +75,11 @@ def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
+
def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
// VDUPLANE can produce a quad-register result from a double-register source,
@@ -129,830 +140,1506 @@ def nModImm : Operand<i32> {
// NEON load / store instructions
//===----------------------------------------------------------------------===//
-// Use vldmia to load a Q register as a D register pair.
-// This is equivalent to VLDMD except that it has a Q register operand
-// instead of a pair of D registers.
-def VLDMQ
- : AXDI4<(outs QPR:$dst), (ins addrmode4:$addr, pred:$p),
- IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t$addr, ${dst:dregpair}", "",
- [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>;
-
-let mayLoad = 1, neverHasSideEffects = 1 in {
-// Use vld1 to load a Q register as a D register pair.
-// This alternative to VLDMQ allows an alignment to be specified.
-// This is equivalent to VLD1q64 except that it has a Q register operand.
-def VLD1q
- : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr),
- IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
-} // mayLoad = 1, neverHasSideEffects = 1
-
-// Use vstmia to store a Q register as a D register pair.
-// This is equivalent to VSTMD except that it has a Q register operand
-// instead of a pair of D registers.
-def VSTMQ
- : AXDI4<(outs), (ins QPR:$src, addrmode4:$addr, pred:$p),
- IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t$addr, ${src:dregpair}", "",
- [(store (v2f64 QPR:$src), addrmode4:$addr)]>;
-
-let mayStore = 1, neverHasSideEffects = 1 in {
-// Use vst1 to store a Q register as a D register pair.
-// This alternative to VSTMQ allows an alignment to be specified.
-// This is equivalent to VST1q64 except that it has a Q register operand.
-def VST1q
- : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src),
- IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>;
-} // mayStore = 1, neverHasSideEffects = 1
-
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Use VLDM to load a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
+def VLDMQIA
+ : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+ IIC_fpLoad_m, "",
+ [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+def VLDMQDB
+ : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+ IIC_fpLoad_m, "",
+ [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+
+// Use VSTM to store a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
+def VSTMQIA
+ : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+ IIC_fpStore_m, "",
+ [(store (v2f64 QPR:$src), GPR:$Rn)]>;
+def VSTMQDB
+ : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+ IIC_fpStore_m, "",
+ [(store (v2f64 QPR:$src), GPR:$Rn)]>;
// Classes for VLD* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
-class VLDQPseudo
- : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
-class VLDQWBPseudo
+class VLDQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
+ (ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
-class VLDQQPseudo
- : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
-class VLDQQWBPseudo
+class VLDQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
+ (ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
-class VLDQQQQWBPseudo
+class VLDQQQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src), itin,"">;
+class VLDQQQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
"$addr.addr = $wb, $src = $dst">;
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
- (ins addrmode6:$addr), IIC_VLD1,
- "vld1", Dt, "\\{$dst\\}, $addr", "", []>;
+ : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd),
+ (ins addrmode6:$Rn), IIC_VLD1,
+ "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
class VLD1Q<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2),
- (ins addrmode6:$addr), IIC_VLD1,
- "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+ : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn), IIC_VLD1x2,
+ "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
-def VLD1d8 : VLD1D<0b0000, "8">;
-def VLD1d16 : VLD1D<0b0100, "16">;
-def VLD1d32 : VLD1D<0b1000, "32">;
-def VLD1d64 : VLD1D<0b1100, "64">;
+def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
+def VLD1d16 : VLD1D<{0,1,0,?}, "16">;
+def VLD1d32 : VLD1D<{1,0,0,?}, "32">;
+def VLD1d64 : VLD1D<{1,1,0,?}, "64">;
-def VLD1q8 : VLD1Q<0b0000, "8">;
-def VLD1q16 : VLD1Q<0b0100, "16">;
-def VLD1q32 : VLD1Q<0b1000, "32">;
-def VLD1q64 : VLD1Q<0b1100, "64">;
+def VLD1q8 : VLD1Q<{0,0,?,?}, "8">;
+def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
+def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
+def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
-def VLD1q8Pseudo : VLDQPseudo;
-def VLD1q16Pseudo : VLDQPseudo;
-def VLD1q32Pseudo : VLDQPseudo;
-def VLD1q64Pseudo : VLDQPseudo;
+def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
// ...with address register writeback:
class VLD1DWB<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
- "vld1", Dt, "\\{$dst\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u,
+ "vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
class VLD1QWB<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
- "vld1", Dt, "${dst:dregpair}, $addr$offset",
- "$addr.addr = $wb", []>;
-
-def VLD1d8_UPD : VLD1DWB<0b0000, "8">;
-def VLD1d16_UPD : VLD1DWB<0b0100, "16">;
-def VLD1d32_UPD : VLD1DWB<0b1000, "32">;
-def VLD1d64_UPD : VLD1DWB<0b1100, "64">;
-
-def VLD1q8_UPD : VLD1QWB<0b0000, "8">;
-def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
-def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
-def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
-
-def VLD1q8Pseudo_UPD : VLDQWBPseudo;
-def VLD1q16Pseudo_UPD : VLDQWBPseudo;
-def VLD1q32Pseudo_UPD : VLDQWBPseudo;
-def VLD1q64Pseudo_UPD : VLDQWBPseudo;
+ : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u,
+ "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">;
+def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">;
+def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">;
+def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">;
+
+def VLD1q8_UPD : VLD1QWB<{0,0,?,?}, "8">;
+def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">;
+def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">;
+def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">;
+
+def VLD1q8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
// ...with 3 registers (some of these are only for the disassembler):
class VLD1D3<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
- (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
- "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
+ : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
class VLD1D3WB<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
- "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>;
+ : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
-def VLD1d8T : VLD1D3<0b0000, "8">;
-def VLD1d16T : VLD1D3<0b0100, "16">;
-def VLD1d32T : VLD1D3<0b1000, "32">;
-def VLD1d64T : VLD1D3<0b1100, "64">;
+def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
+def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
+def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
+def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
-def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">;
-def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
-def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
-def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
+def VLD1d8T_UPD : VLD1D3WB<{0,0,0,?}, "8">;
+def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">;
+def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">;
+def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">;
-def VLD1d64TPseudo : VLDQQPseudo;
-def VLD1d64TPseudo_UPD : VLDQQWBPseudo;
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>;
// ...with 4 registers (some of these are only for the disassembler):
class VLD1D4<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
- "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
+ : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
class VLD1D4WB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
- "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb",
- []>;
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt,
+ "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
+ []> {
+ let Inst{5-4} = Rn{5-4};
+}
-def VLD1d8Q : VLD1D4<0b0000, "8">;
-def VLD1d16Q : VLD1D4<0b0100, "16">;
-def VLD1d32Q : VLD1D4<0b1000, "32">;
-def VLD1d64Q : VLD1D4<0b1100, "64">;
+def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
+def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
+def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
+def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
-def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">;
-def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
-def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
-def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
+def VLD1d8Q_UPD : VLD1D4WB<{0,0,?,?}, "8">;
+def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">;
+def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">;
+def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">;
-def VLD1d64QPseudo : VLDQQPseudo;
-def VLD1d64QPseudo_UPD : VLDQQWBPseudo;
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
- (ins addrmode6:$addr), IIC_VLD2,
- "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn), IIC_VLD2,
+ "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
class VLD2Q<bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, 0b0011, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr), IIC_VLD2,
- "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD2x2,
+ "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
-def VLD2d8 : VLD2D<0b1000, 0b0000, "8">;
-def VLD2d16 : VLD2D<0b1000, 0b0100, "16">;
-def VLD2d32 : VLD2D<0b1000, 0b1000, "32">;
+def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">;
+def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">;
+def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">;
-def VLD2q8 : VLD2Q<0b0000, "8">;
-def VLD2q16 : VLD2Q<0b0100, "16">;
-def VLD2q32 : VLD2Q<0b1000, "32">;
+def VLD2q8 : VLD2Q<{0,0,?,?}, "8">;
+def VLD2q16 : VLD2Q<{0,1,?,?}, "16">;
+def VLD2q32 : VLD2Q<{1,0,?,?}, "32">;
-def VLD2d8Pseudo : VLDQPseudo;
-def VLD2d16Pseudo : VLDQPseudo;
-def VLD2d32Pseudo : VLDQPseudo;
+def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>;
+def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
+def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
-def VLD2q8Pseudo : VLDQQPseudo;
-def VLD2q16Pseudo : VLDQQPseudo;
-def VLD2q32Pseudo : VLDQQPseudo;
+def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
// ...with address register writeback:
class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
- "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
+ "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
class VLD2QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, 0b0011, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
- "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
+ "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
-def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">;
-def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">;
-def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">;
+def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">;
+def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">;
+def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">;
-def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
-def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
-def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
+def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">;
+def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">;
+def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">;
-def VLD2d8Pseudo_UPD : VLDQWBPseudo;
-def VLD2d16Pseudo_UPD : VLDQWBPseudo;
-def VLD2d32Pseudo_UPD : VLDQWBPseudo;
+def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2q8Pseudo_UPD : VLDQQWBPseudo;
-def VLD2q16Pseudo_UPD : VLDQQWBPseudo;
-def VLD2q32Pseudo_UPD : VLDQQWBPseudo;
+def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
// ...with double-spaced registers (for disassembly only):
-def VLD2b8 : VLD2D<0b1001, 0b0000, "8">;
-def VLD2b16 : VLD2D<0b1001, 0b0100, "16">;
-def VLD2b32 : VLD2D<0b1001, 0b1000, "32">;
-def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">;
-def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">;
-def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">;
+def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">;
+def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">;
+def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">;
+def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">;
+def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">;
+def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
- (ins addrmode6:$addr), IIC_VLD3,
- "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
+ : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn), IIC_VLD3,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
-def VLD3d8 : VLD3D<0b0100, 0b0000, "8">;
-def VLD3d16 : VLD3D<0b0100, 0b0100, "16">;
-def VLD3d32 : VLD3D<0b0100, 0b1000, "32">;
+def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
+def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
+def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
-def VLD3d8Pseudo : VLDQQPseudo;
-def VLD3d16Pseudo : VLDQQPseudo;
-def VLD3d32Pseudo : VLDQQPseudo;
+def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3,
- "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
- "$addr.addr = $wb", []>;
-
-def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">;
-def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
-def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
-
-def VLD3d8Pseudo_UPD : VLDQQWBPseudo;
-def VLD3d16Pseudo_UPD : VLDQQWBPseudo;
-def VLD3d32Pseudo_UPD : VLDQQWBPseudo;
-
-// ...with double-spaced registers (non-updating versions for disassembly only):
-def VLD3q8 : VLD3D<0b0101, 0b0000, "8">;
-def VLD3q16 : VLD3D<0b0101, 0b0100, "16">;
-def VLD3q32 : VLD3D<0b0101, 0b1000, "32">;
-def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">;
-def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
-def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
-
-def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo;
-def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo;
-def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo;
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
+ "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+
+// ...with double-spaced registers:
+def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
+def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
+def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
+def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
// ...alternate versions to be allocated odd register numbers:
-def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo;
-def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo;
-def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
+
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
// VLD4 : Vector Load (multiple 4-element structures)
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr), IIC_VLD4,
- "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn), IIC_VLD4,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
-def VLD4d8 : VLD4D<0b0000, 0b0000, "8">;
-def VLD4d16 : VLD4D<0b0000, 0b0100, "16">;
-def VLD4d32 : VLD4D<0b0000, 0b1000, "32">;
+def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
+def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
+def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
-def VLD4d8Pseudo : VLDQQPseudo;
-def VLD4d16Pseudo : VLDQQPseudo;
-def VLD4d32Pseudo : VLDQQPseudo;
+def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4,
- "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
- "$addr.addr = $wb", []>;
-
-def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">;
-def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
-def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
-
-def VLD4d8Pseudo_UPD : VLDQQWBPseudo;
-def VLD4d16Pseudo_UPD : VLDQQWBPseudo;
-def VLD4d32Pseudo_UPD : VLDQQWBPseudo;
-
-// ...with double-spaced registers (non-updating versions for disassembly only):
-def VLD4q8 : VLD4D<0b0001, 0b0000, "8">;
-def VLD4q16 : VLD4D<0b0001, 0b0100, "16">;
-def VLD4q32 : VLD4D<0b0001, 0b1000, "32">;
-def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">;
-def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
-def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
-
-def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo;
-def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo;
-def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo;
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
+ "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+
+// ...with double-spaced registers:
+def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
+def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
+def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
+def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
// ...alternate versions to be allocated odd register numbers:
-def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo;
-def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo;
-def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
+
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// Classes for VLD*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst),
+ (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst),
+ (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst),
+ (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
// VLD1LN : Vector Load (single element to one lane)
-// FIXME: Not yet implemented.
+class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag LoadOp>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+ (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
+ IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+ "$src = $Vd",
+ [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+ (i32 (LoadOp addrmode6:$Rn)),
+ imm:$lane))]> {
+ let Rm = 0b1111;
+}
+class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
+ let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
+ (i32 (LoadOp addrmode6:$addr)),
+ imm:$lane))];
+}
+
+def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VLD1LNd32 : VLD1LN<0b1000, {?,0,?,?}, "32", v2i32, load> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{4};
+ let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
+def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
+def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
+
+def : Pat<(vector_insert (v2f32 DPR:$src),
+ (f32 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v4f32 QPR:$src),
+ (f32 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
+ "\\{$Vd[$lane]\\}, $Rn$Rm",
+ "$src = $Vd, $Rn.addr = $wb", []>;
+
+def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{4};
+ let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
// VLD2LN : Vector Load (single 2-element structure to one lane)
class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
- IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
- "$src1 = $dst1, $src2 = $dst2", []>;
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+ IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
-def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">;
-def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">;
-def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">;
+def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
// ...with double-spaced registers:
-def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">;
-def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">;
+def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
-// ...alternate versions to be allocated odd register numbers:
-def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">;
-def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">;
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
// ...with address register writeback:
class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt,
- "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset",
- "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>;
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
+ "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
+ "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
-def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8">;
-def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">;
-def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">;
+def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
-def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">;
-def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">;
+def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
// VLD3LN : Vector Load (single 3-element structure to one lane)
class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- nohash_imm:$lane), IIC_VLD3, "vld3", Dt,
- "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
- "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
+ : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
+ let Rm = 0b1111;
+}
-def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">;
-def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">;
-def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">;
+def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
// ...with double-spaced registers:
-def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">;
-def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">;
+def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
-// ...alternate versions to be allocated odd register numbers:
-def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">;
-def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">;
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
// ...with address register writeback:
class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, op11_8, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
- IIC_VLD3, "vld3", Dt,
- "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset",
- "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb",
+ IIC_VLD3lnu, "vld3", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
[]>;
-def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">;
-def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">;
-def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">;
+def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
-def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">;
-def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">;
+def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
// VLD4LN : Vector Load (single 4-element structure to one lane)
class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, op11_8, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
- nohash_imm:$lane), IIC_VLD4, "vld4", Dt,
- "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
- "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
+ "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
+ "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
-def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">;
-def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">;
-def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">;
+def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
// ...with double-spaced registers:
-def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">;
-def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">;
+def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
-// ...alternate versions to be allocated odd register numbers:
-def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">;
-def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">;
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
// ...with address register writeback:
class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, op11_8, op7_4,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
+ : NLdStLn<1, 0b10, op11_8, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
- IIC_VLD4, "vld4", Dt,
-"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset",
-"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb",
- []>;
+ IIC_VLD4lnu, "vld4", Dt,
+"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
+"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
+ []> {
+ let Inst{4} = Rn{4};
+}
-def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">;
-def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">;
-def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">;
+def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
-def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">;
-def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
+def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// VLD1DUP : Vector Load (single element to all lanes)
+class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
+ IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
+ [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
+ let Pattern = [(set QPR:$dst,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
+}
+
+def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
+def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
+def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
+
+def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>;
+def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
+def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
+
+def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPd32 addrmode6:$addr)>;
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPq32Pseudo addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+class VLD1QDUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6dup:$Rn), IIC_VLD1dup,
+ "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD1DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+class VLD1QDUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">;
+def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
+def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
+
+def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">;
+def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
+def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+
+def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
+class VLD2DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ (ins addrmode6dup:$Rn), IIC_VLD2dup,
+ "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
+
+def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD2DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
+ "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">;
+def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">;
+def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">;
+
+def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">;
+def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">;
+def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">;
+
+def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
+class VLD3DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6dup:$Rn), IIC_VLD3dup,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
+def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
+def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
+
+def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD3DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
+def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
+def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
+
+def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
+def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
+def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+
+def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+
// VLD4DUP : Vector Load (single 4-element structure to all lanes)
-// FIXME: Not yet implemented.
+class VLD4DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1111, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6dup:$Rn), IIC_VLD4dup,
+ "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
+def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
+def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">;
+def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">;
+def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+// ...with address register writeback:
+class VLD4DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1111, op7_4,
+ (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+ (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
+ "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
+def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
+def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
+def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
+def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// Classes for VST* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
-class VSTQPseudo
- : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">;
-class VSTQWBPseudo
+class VSTQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
+class VSTQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
"$addr.addr = $wb">;
-class VSTQQPseudo
- : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
-class VSTQQWBPseudo
+class VSTQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
+class VSTQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST,
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
"$addr.addr = $wb">;
-class VSTQQQQWBPseudo
+class VSTQQQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
+class VSTQQQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
"$addr.addr = $wb">;
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt>
- : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
- "vst1", Dt, "\\{$src\\}, $addr", "", []>;
+ : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd),
+ IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
class VST1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b1010,op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
- "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2,
+ "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
-def VST1d8 : VST1D<0b0000, "8">;
-def VST1d16 : VST1D<0b0100, "16">;
-def VST1d32 : VST1D<0b1000, "32">;
-def VST1d64 : VST1D<0b1100, "64">;
+def VST1d8 : VST1D<{0,0,0,?}, "8">;
+def VST1d16 : VST1D<{0,1,0,?}, "16">;
+def VST1d32 : VST1D<{1,0,0,?}, "32">;
+def VST1d64 : VST1D<{1,1,0,?}, "64">;
-def VST1q8 : VST1Q<0b0000, "8">;
-def VST1q16 : VST1Q<0b0100, "16">;
-def VST1q32 : VST1Q<0b1000, "32">;
-def VST1q64 : VST1Q<0b1100, "64">;
+def VST1q8 : VST1Q<{0,0,?,?}, "8">;
+def VST1q16 : VST1Q<{0,1,?,?}, "16">;
+def VST1q32 : VST1Q<{1,0,?,?}, "32">;
+def VST1q64 : VST1Q<{1,1,?,?}, "64">;
-def VST1q8Pseudo : VSTQPseudo;
-def VST1q16Pseudo : VSTQPseudo;
-def VST1q32Pseudo : VSTQPseudo;
-def VST1q64Pseudo : VSTQPseudo;
+def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
// ...with address register writeback:
class VST1DWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST,
- "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>;
+ (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
+ "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
class VST1QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
- "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
+ (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+ IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
-def VST1d8_UPD : VST1DWB<0b0000, "8">;
-def VST1d16_UPD : VST1DWB<0b0100, "16">;
-def VST1d32_UPD : VST1DWB<0b1000, "32">;
-def VST1d64_UPD : VST1DWB<0b1100, "64">;
+def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">;
+def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">;
+def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">;
+def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">;
-def VST1q8_UPD : VST1QWB<0b0000, "8">;
-def VST1q16_UPD : VST1QWB<0b0100, "16">;
-def VST1q32_UPD : VST1QWB<0b1000, "32">;
-def VST1q64_UPD : VST1QWB<0b1100, "64">;
+def VST1q8_UPD : VST1QWB<{0,0,?,?}, "8">;
+def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">;
+def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">;
+def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">;
-def VST1q8Pseudo_UPD : VSTQWBPseudo;
-def VST1q16Pseudo_UPD : VSTQWBPseudo;
-def VST1q32Pseudo_UPD : VSTQWBPseudo;
-def VST1q64Pseudo_UPD : VSTQWBPseudo;
+def VST1q8Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
// ...with 3 registers (some of these are only for the disassembler):
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
- IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
+ IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
class VST1D3WB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3),
- IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3),
+ IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
-def VST1d8T : VST1D3<0b0000, "8">;
-def VST1d16T : VST1D3<0b0100, "16">;
-def VST1d32T : VST1D3<0b1000, "32">;
-def VST1d64T : VST1D3<0b1100, "64">;
+def VST1d8T : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T : VST1D3<{1,1,0,?}, "64">;
-def VST1d8T_UPD : VST1D3WB<0b0000, "8">;
-def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
-def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
-def VST1d64T_UPD : VST1D3WB<0b1100, "64">;
+def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">;
+def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
+def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
+def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
-def VST1d64TPseudo : VSTQQPseudo;
-def VST1d64TPseudo_UPD : VSTQQWBPseudo;
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
// ...with 4 registers (some of these are only for the disassembler):
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
- []>;
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+ []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
class VST1D4WB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
+ "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
-def VST1d8Q : VST1D4<0b0000, "8">;
-def VST1d16Q : VST1D4<0b0100, "16">;
-def VST1d32Q : VST1D4<0b1000, "32">;
-def VST1d64Q : VST1D4<0b1100, "64">;
+def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
-def VST1d8Q_UPD : VST1D4WB<0b0000, "8">;
-def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
-def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
-def VST1d64Q_UPD : VST1D4WB<0b1100, "64">;
+def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">;
+def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
+def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
+def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
-def VST1d64QPseudo : VSTQQPseudo;
-def VST1d64QPseudo_UPD : VSTQQWBPseudo;
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
// VST2 : Vector Store (multiple 2-element structures)
class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2),
- IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
+ IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
class VST2Q<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0011, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
- "", []>;
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
-def VST2d8 : VST2D<0b1000, 0b0000, "8">;
-def VST2d16 : VST2D<0b1000, 0b0100, "16">;
-def VST2d32 : VST2D<0b1000, 0b1000, "32">;
+def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">;
+def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">;
+def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">;
-def VST2q8 : VST2Q<0b0000, "8">;
-def VST2q16 : VST2Q<0b0100, "16">;
-def VST2q32 : VST2Q<0b1000, "32">;
+def VST2q8 : VST2Q<{0,0,?,?}, "8">;
+def VST2q16 : VST2Q<{0,1,?,?}, "16">;
+def VST2q32 : VST2Q<{1,0,?,?}, "32">;
-def VST2d8Pseudo : VSTQPseudo;
-def VST2d16Pseudo : VSTQPseudo;
-def VST2d32Pseudo : VSTQPseudo;
+def VST2d8Pseudo : VSTQPseudo<IIC_VST2>;
+def VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
+def VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
-def VST2q8Pseudo : VSTQQPseudo;
-def VST2q16Pseudo : VSTQQPseudo;
-def VST2q32Pseudo : VSTQQPseudo;
+def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
// ...with address register writeback:
class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2),
- IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+ IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
class VST2QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
+ "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
-def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">;
-def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">;
-def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">;
+def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">;
+def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
+def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
-def VST2q8_UPD : VST2QWB<0b0000, "8">;
-def VST2q16_UPD : VST2QWB<0b0100, "16">;
-def VST2q32_UPD : VST2QWB<0b1000, "32">;
+def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">;
+def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
+def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
-def VST2d8Pseudo_UPD : VSTQWBPseudo;
-def VST2d16Pseudo_UPD : VSTQWBPseudo;
-def VST2d32Pseudo_UPD : VSTQWBPseudo;
+def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2q8Pseudo_UPD : VSTQQWBPseudo;
-def VST2q16Pseudo_UPD : VSTQQWBPseudo;
-def VST2q32Pseudo_UPD : VSTQQWBPseudo;
+def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
// ...with double-spaced registers (for disassembly only):
-def VST2b8 : VST2D<0b1001, 0b0000, "8">;
-def VST2b16 : VST2D<0b1001, 0b0100, "16">;
-def VST2b32 : VST2D<0b1001, 0b1000, "32">;
-def VST2b8_UPD : VST2DWB<0b1001, 0b0000, "8">;
-def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">;
-def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">;
+def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">;
+def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">;
+def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">;
+def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">;
+def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
+def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
- "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
-def VST3d8 : VST3D<0b0100, 0b0000, "8">;
-def VST3d16 : VST3D<0b0100, 0b0100, "16">;
-def VST3d32 : VST3D<0b0100, 0b1000, "32">;
+def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
+def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
+def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
-def VST3d8Pseudo : VSTQQPseudo;
-def VST3d16Pseudo : VSTQQPseudo;
-def VST3d32Pseudo : VSTQQPseudo;
+def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
- "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
- "$addr.addr = $wb", []>;
-
-def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">;
-def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
-def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
-
-def VST3d8Pseudo_UPD : VSTQQWBPseudo;
-def VST3d16Pseudo_UPD : VSTQQWBPseudo;
-def VST3d32Pseudo_UPD : VSTQQWBPseudo;
-
-// ...with double-spaced registers (non-updating versions for disassembly only):
-def VST3q8 : VST3D<0b0101, 0b0000, "8">;
-def VST3q16 : VST3D<0b0101, 0b0100, "16">;
-def VST3q32 : VST3D<0b0101, 0b1000, "32">;
-def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">;
-def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
-def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
-
-def VST3q8Pseudo_UPD : VSTQQQQWBPseudo;
-def VST3q16Pseudo_UPD : VSTQQQQWBPseudo;
-def VST3q32Pseudo_UPD : VSTQQQQWBPseudo;
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
+ "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
+def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
+def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+
+// ...with double-spaced registers:
+def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
+def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
+def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
+def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
+def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
+def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
// ...alternate versions to be allocated odd register numbers:
-def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo;
-def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo;
-def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
+
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
// VST4 : Vector Store (multiple 4-element structures)
class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
- "", []>;
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{5-4} = Rn{5-4};
+}
-def VST4d8 : VST4D<0b0000, 0b0000, "8">;
-def VST4d16 : VST4D<0b0000, 0b0100, "16">;
-def VST4d32 : VST4D<0b0000, 0b1000, "32">;
+def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
+def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
+def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
-def VST4d8Pseudo : VSTQQPseudo;
-def VST4d16Pseudo : VSTQQPseudo;
-def VST4d32Pseudo : VSTQQPseudo;
+def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
- "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
- "$addr.addr = $wb", []>;
-
-def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">;
-def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
-def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
-
-def VST4d8Pseudo_UPD : VSTQQWBPseudo;
-def VST4d16Pseudo_UPD : VSTQQWBPseudo;
-def VST4d32Pseudo_UPD : VSTQQWBPseudo;
-
-// ...with double-spaced registers (non-updating versions for disassembly only):
-def VST4q8 : VST4D<0b0001, 0b0000, "8">;
-def VST4q16 : VST4D<0b0001, 0b0100, "16">;
-def VST4q32 : VST4D<0b0001, 0b1000, "32">;
-def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">;
-def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
-def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
-
-def VST4q8Pseudo_UPD : VSTQQQQWBPseudo;
-def VST4q16Pseudo_UPD : VSTQQQQWBPseudo;
-def VST4q32Pseudo_UPD : VSTQQQQWBPseudo;
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
+ "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
+def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
+def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+
+// ...with double-spaced registers:
+def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
+def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
+def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
+def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
+def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
+def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
// ...alternate versions to be allocated odd register numbers:
-def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo;
-def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
-def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
+
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// Classes for VST*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
// VST1LN : Vector Store (single element from one lane)
-// FIXME: Not yet implemented.
+class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
+ IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
+ [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
+ let Rm = 0b1111;
+}
+class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNPseudo<IIC_VST1ln> {
+ let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr)];
+}
+
+def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
+ NEONvgetlaneu> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
+ NEONvgetlaneu> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{5};
+}
+def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
+def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
+def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
+
+def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
+ "\\{$Vd[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []>;
+
+def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{5};
+}
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
+def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
+def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
// VST2LN : Vector Store (single 2-element structure from one lane)
class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<1, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
- IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
- "", []>;
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
+ IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
-def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">;
-def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">;
-def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">;
+def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
// ...with double-spaced registers:
-def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">;
-def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">;
+def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+ let Inst{4} = Rn{4};
+}
+def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{4} = Rn{4};
+}
-// ...alternate versions to be allocated odd register numbers:
-def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">;
-def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">;
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
"\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ "$addr.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
-def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">;
-def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">;
-def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">;
+def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
+
+def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+
+def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
+ let Inst{7} = lane{0};
+}
-def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">;
-def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">;
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
// VST3LN : Vector Store (single 3-element structure from one lane)
class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<1, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- nohash_imm:$lane), IIC_VST, "vst3", Dt,
- "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+}
+
+def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
-def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">;
-def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">;
-def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">;
+def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
// ...with double-spaced registers:
-def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">;
-def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">;
+def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
-// ...alternate versions to be allocated odd register numbers:
-def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">;
-def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">;
+def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
// ...with address register writeback:
class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
- IIC_VST, "vst3", Dt,
- "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+ IIC_VST3lnu, "vst3", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []>;
+
+def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
-def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">;
-def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">;
-def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">;
+def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+
+def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
+ let Inst{7} = lane{0};
+}
-def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">;
-def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">;
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
// VST4LN : Vector Store (single 4-element structure from one lane)
class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<1, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
- nohash_imm:$lane), IIC_VST, "vst4", Dt,
- "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
- "", []>;
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+ (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
+ "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
-def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">;
-def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">;
-def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">;
+def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
// ...with double-spaced registers:
-def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">;
-def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">;
+def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
-// ...alternate versions to be allocated odd register numbers:
-def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">;
-def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">;
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
// ...with address register writeback:
class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
- IIC_VST, "vst4", Dt,
- "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset",
- "$addr.addr = $wb", []>;
+ : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+ IIC_VST4lnu, "vst4", Dt,
+ "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
-def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">;
-def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">;
-def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
+def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
+ let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+
+def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
+ let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
+ let Inst{7} = lane{0};
+ let Inst{5} = Rn{5};
+}
-def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
-def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
@@ -1000,98 +1687,92 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
// Instruction Classes
//===----------------------------------------------------------------------===//
-// Basic 2-register operations: single-, double- and quad-register.
-class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
- string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
- IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
+// Basic 2-register operations: double- and quad-register.
class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt,"$dst, $src", "",
- [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt,"$dst, $src", "",
- [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
// Basic 2-register intrinsics, both double- and quad-register.
class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
- [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Narrow 2-register operations.
class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyD, ValueType TyQ, SDNode OpNode>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
- (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
- [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>;
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
// Narrow 2-register intrinsics.
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyD, ValueType TyQ, Intrinsic IntOp>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
- (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
- [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+ (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
// Long 2-register operations (currently only used for VMOVL).
class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst),
- (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>;
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
+
+// Long 2-register intrinsics.
+class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+ (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
- : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2),
- (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
- OpcodeStr, Dt, "$dst1, $dst2",
- "$src1 = $dst1, $src2 = $dst2", []>;
+ : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
+ (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
+ OpcodeStr, Dt, "$Vd, $Vm",
+ "$src1 = $Vd, $src2 = $Vm", []>;
class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
InstrItinClass itin, string OpcodeStr, string Dt>
- : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2),
- (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2",
- "$src1 = $dst1, $src2 = $dst2", []>;
-
-// Basic 3-register operations: single-, double- and quad-register.
-class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- SDNode OpNode, bit Commutable>
- : N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm,
- IIC_VBIND, OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
- let isCommutable = Commutable;
-}
+ : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
+ (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
+ "$src1 = $Vd, $src2 = $Vm", []>;
+// Basic 3-register operations: double- and quad-register.
class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
let isCommutable = Commutable;
}
// Same as N3VD but no data type.
@@ -1100,31 +1781,31 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
ValueType ResTy, ValueType OpTy,
SDNode OpNode, bit Commutable>
: N3VX<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
- OpcodeStr, "$dst, $src1, $src2", "",
- [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
let isCommutable = Commutable;
}
-class N3VDSL<bits<2> op21_20, bits<4> op11_8,
+class N3VDSL<bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
- [(set (Ty DPR:$dst),
- (Ty (ShOp (Ty DPR:$src1),
- (Ty (NEONvduplane (Ty DPR_VFP2:$src2),imm:$lane)))))]> {
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
let isCommutable = 0;
}
-class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
+class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","",
- [(set (Ty DPR:$dst),
- (Ty (ShOp (Ty DPR:$src1),
- (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+ [(set (Ty DPR:$Vd),
+ (Ty (ShOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
let isCommutable = 0;
}
@@ -1132,40 +1813,40 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
let isCommutable = Commutable;
}
class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3VX<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin,
- OpcodeStr, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
let isCommutable = Commutable;
}
-class N3VQSL<bits<2> op21_20, bits<4> op11_8,
+class N3VQSL<bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
- [(set (ResTy QPR:$dst),
- (ResTy (ShOp (ResTy QPR:$src1),
- (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","",
- [(set (ResTy QPR:$dst),
- (ResTy (ShOp (ResTy QPR:$src1),
- (ResTy (NEONvduplane (OpTy DPR_8:$src2),
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (ShOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]> {
let isCommutable = 0;
}
@@ -1175,30 +1856,39 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), f, itin,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
let isCommutable = Commutable;
}
-class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
- [(set (Ty DPR:$dst),
- (Ty (IntOp (Ty DPR:$src1),
- (Ty (NEONvduplane (Ty DPR_VFP2:$src2),
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (IntOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
- [(set (Ty DPR:$dst),
- (Ty (IntOp (Ty DPR:$src1),
- (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (Ty DPR:$Vd),
+ (Ty (IntOp (Ty DPR:$Vn),
+ (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ let isCommutable = 0;
+}
+class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
let isCommutable = 0;
}
@@ -1206,20 +1896,20 @@ class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), f, itin,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
let isCommutable = Commutable;
}
-class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
- [(set (ResTy QPR:$dst),
- (ResTy (IntOp (ResTy QPR:$src1),
- (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]> {
let isCommutable = 0;
}
@@ -1227,93 +1917,95 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
- [(set (ResTy QPR:$dst),
- (ResTy (IntOp (ResTy QPR:$src1),
- (ResTy (NEONvduplane (OpTy DPR_8:$src2),
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (ResTy QPR:$Vn),
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]> {
let isCommutable = 0;
}
+class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
+ let isCommutable = 0;
+}
-// Multiply-Add/Sub operations: single-, double- and quad-register.
-class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType Ty, SDNode MulOp, SDNode OpNode>
- : N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR_VFP2:$dst),
- (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
-
+// Multiply-Add/Sub operations: double- and quad-register.
class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType Ty, SDNode MulOp, SDNode OpNode>
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set DPR:$dst, (Ty (OpNode DPR:$src1,
- (Ty (MulOp DPR:$src2, DPR:$src3)))))]>;
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
+
class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
- ValueType Ty, SDNode MulOp, SDNode ShOp>
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
- [(set (Ty DPR:$dst),
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$src1),
- (Ty (MulOp DPR:$src2,
- (Ty (NEONvduplane (Ty DPR_VFP2:$src3),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
imm:$lane)))))))]>;
class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
- [(set (Ty DPR:$dst),
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$src1),
- (Ty (MulOp DPR:$src2,
- (Ty (NEONvduplane (Ty DPR_8:$src3),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_8:$Vm),
imm:$lane)))))))]>;
class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
- SDNode MulOp, SDNode OpNode>
+ SDPatternOperator MulOp, SDPatternOperator OpNode>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst, (Ty (OpNode QPR:$src1,
- (Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- SDNode MulOp, SDNode ShOp>
+ SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
- [(set (ResTy QPR:$dst),
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$src1),
- (ResTy (MulOp QPR:$src2,
- (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
+ (ResTy (MulOp QPR:$Vn,
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))))]>;
class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy,
SDNode MulOp, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
- [(set (ResTy QPR:$dst),
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$src1),
- (ResTy (MulOp QPR:$src2,
- (ResTy (NEONvduplane (OpTy DPR_8:$src3),
+ (ResTy (MulOp QPR:$Vn,
+ (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))))]>;
// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
@@ -1321,18 +2013,18 @@ class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, Intrinsic IntOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set DPR:$dst, (Ty (OpNode DPR:$src1,
- (Ty (IntOp (Ty DPR:$src2), (Ty DPR:$src3))))))]>;
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, Intrinsic IntOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst, (Ty (OpNode QPR:$src1,
- (Ty (IntOp (Ty QPR:$src2), (Ty QPR:$src3))))))]>;
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
// Neon 3-argument intrinsics, both double- and quad-register.
// The destination register is also used as the first source operand register.
@@ -1340,52 +2032,52 @@ class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1),
- (OpTy DPR:$src2), (OpTy DPR:$src3))))]>;
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
+ (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
- (OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
+ (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
// Long Multiply-Add/Sub operations.
class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
- (TyQ (MulOp (TyD DPR:$src2),
- (TyD DPR:$src3)))))]>;
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))]>;
class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
- : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
- (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
- [(set QPR:$dst,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set QPR:$Vd,
(OpNode (TyQ QPR:$src1),
- (TyQ (MulOp (TyD DPR:$src2),
- (TyD (NEONvduplane (TyD DPR_VFP2:$src3),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
imm:$lane))))))]>;
class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
- : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
- (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
- [(set QPR:$dst,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set QPR:$Vd,
(OpNode (TyQ QPR:$src1),
- (TyQ (MulOp (TyD DPR:$src2),
- (TyD (NEONvduplane (TyD DPR_8:$src3),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_8:$Vm),
imm:$lane))))))]>;
// Long Intrinsic-Op vector operations with explicit extend (VABAL).
@@ -1394,11 +2086,11 @@ class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
- (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src2),
- (TyD DPR:$src3)))))))]>;
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))))]>;
// Neon Long 3-argument intrinsic. The destination register is
// a quad-register and is also used as the first source operand register.
@@ -1406,35 +2098,35 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst,
- (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>;
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst),
- (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
- [(set (ResTy QPR:$dst),
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$src1),
- (OpTy DPR:$src2),
- (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3),
+ (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]>;
class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst),
- (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ (outs QPR:$Vd),
+ (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
- [(set (ResTy QPR:$dst),
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$src1),
- (OpTy DPR:$src2),
- (OpTy (NEONvduplane (OpTy DPR_8:$src3),
+ (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]>;
// Narrowing 3-register intrinsics.
@@ -1442,9 +2134,9 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINi4D,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> {
+ (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
let isCommutable = Commutable;
}
@@ -1453,29 +2145,29 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src1), (TyD DPR:$src2))))]> {
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}
class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
- [(set QPR:$dst,
- (TyQ (OpNode (TyD DPR:$src1),
- (TyD (NEONvduplane (TyD DPR_VFP2:$src2),imm:$lane)))))]>;
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set QPR:$Vd,
+ (TyQ (OpNode (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
- [(set QPR:$dst,
- (TyQ (OpNode (TyD DPR:$src1),
- (TyD (NEONvduplane (TyD DPR_8:$src2), imm:$lane)))))]>;
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set QPR:$Vd,
+ (TyQ (OpNode (TyD DPR:$Vn),
+ (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
// Long 3-register operations with explicitly extended operands.
class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1483,10 +2175,10 @@ class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (OpNode (TyQ (ExtOp (TyD DPR:$src1))),
- (TyQ (ExtOp (TyD DPR:$src2)))))]> {
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
+ (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
let isCommutable = Commutable;
}
@@ -1496,10 +2188,10 @@ class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1),
- (TyD DPR:$src2))))))]> {
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm))))))]> {
let isCommutable = Commutable;
}
@@ -1508,30 +2200,30 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> {
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
- [(set (ResTy QPR:$dst),
- (ResTy (IntOp (OpTy DPR:$src1),
- (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2),
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]>;
class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
- [(set (ResTy QPR:$dst),
- (ResTy (IntOp (OpTy DPR:$src1),
- (OpTy (NEONvduplane (OpTy DPR_8:$src2),
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+ [(set (ResTy QPR:$Vd),
+ (ResTy (IntOp (OpTy DPR:$Vn),
+ (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]>;
// Wide 3-register operations.
@@ -1539,10 +2231,10 @@ class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
SDNode OpNode, SDNode ExtOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
- (TyQ (ExtOp (TyD DPR:$src2)))))]> {
+ (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
+ (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
let isCommutable = Commutable;
}
@@ -1551,16 +2243,16 @@ class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
- [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Pairwise long 2-register accumulate intrinsics,
// both double- and quad-register.
@@ -1570,17 +2262,17 @@ class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD,
- OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
- [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>;
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ,
- OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
- [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>;
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
// Shift by immediate,
// both double- and quad-register.
@@ -1588,25 +2280,25 @@ class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
- (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), f, itin,
- OpcodeStr, Dt, "$dst, $src, $SIMM", "",
- [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>;
+ (outs DPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
- (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), f, itin,
- OpcodeStr, Dt, "$dst, $src, $SIMM", "",
- [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>;
+ (outs QPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
// Long shift by immediate.
class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
- (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), N2RegVShLFrm,
- IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
- [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src),
+ (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+ IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
(i32 imm:$SIMM))))]>;
// Narrow shift by immediate.
@@ -1614,42 +2306,42 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
- (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), N2RegVShRFrm, itin,
- OpcodeStr, Dt, "$dst, $src, $SIMM", "",
- [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src),
+ (outs DPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
(i32 imm:$SIMM))))]>;
// Shift right by immediate and accumulate,
// both double- and quad-register.
class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
- : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
- OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
- [(set DPR:$dst, (Ty (add DPR:$src1,
- (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>;
+ : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (add DPR:$src1,
+ (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
- : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
- OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
- [(set QPR:$dst, (Ty (add QPR:$src1,
- (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>;
+ : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (add QPR:$src1,
+ (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
// Shift by immediate and insert,
// both double- and quad-register.
class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
- : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), f, IIC_VSHLiD,
- OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
- [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>;
+ : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiD,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
- : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), f, IIC_VSHLiQ,
- OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
- [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>;
+ : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiQ,
+ OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
// Convert, with fractional bits immediate,
// both double- and quad-register.
@@ -1657,16 +2349,16 @@ class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
Intrinsic IntOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
- (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NVCVTFrm,
- IIC_VUNAD, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
- [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>;
+ (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+ IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
Intrinsic IntOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
- (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NVCVTFrm,
- IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
- [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>;
+ (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+ IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
//===----------------------------------------------------------------------===//
// Multiclasses
@@ -1678,45 +2370,127 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// S = single int (32 bit) elements
// D = double int (64 bit) elements
-// Neon 2-register vector operations -- for disassembly only.
+// Neon 2-register vector operations and intrinsics.
-// First with only element sizes of 8, 16 and 32 bits:
+// Neon 2-register comparisons.
+// source operand element sizes of 8, 16 and 32 bits:
multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4, string opc, string Dt,
- string asm> {
+ string asm, SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
- (outs DPR:$dst), (ins DPR:$src), NoItinerary,
- opc, !strconcat(Dt, "8"), asm, "", []>;
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
- (outs DPR:$dst), (ins DPR:$src), NoItinerary,
- opc, !strconcat(Dt, "16"), asm, "", []>;
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
- (outs DPR:$dst), (ins DPR:$src), NoItinerary,
- opc, !strconcat(Dt, "32"), asm, "", []>;
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
- (outs DPR:$dst), (ins DPR:$src), NoItinerary,
- opc, "f32", asm, "", []> {
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, "f32", asm, "",
+ [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
let Inst{10} = 1; // overwrite F = 1
}
// 128-bit vector types.
def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
- (outs QPR:$dst), (ins QPR:$src), NoItinerary,
- opc, !strconcat(Dt, "8"), asm, "", []>;
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
- (outs QPR:$dst), (ins QPR:$src), NoItinerary,
- opc, !strconcat(Dt, "16"), asm, "", []>;
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
- (outs QPR:$dst), (ins QPR:$src), NoItinerary,
- opc, !strconcat(Dt, "32"), asm, "", []>;
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
- (outs QPR:$dst), (ins QPR:$src), NoItinerary,
- opc, "f32", asm, "", []> {
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, "f32", asm, "",
+ [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
let Inst{10} = 1; // overwrite F = 1
}
}
+
+// Neon 2-register vector intrinsics,
+// element sizes of 8, 16 and 32 bits:
+multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
+ def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
+
+ // 128-bit vector types.
+ def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
+ def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
+ def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+ itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
+}
+
+
+// Neon Narrowing 2-register vector operations,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, OpNode>;
+ def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, OpNode>;
+ def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, OpNode>;
+}
+
+// Neon Narrowing 2-register vector intrinsics,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
+ def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, IntOp>;
+ def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, IntOp>;
+ def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, IntOp>;
+}
+
+
+// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+ def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+
// Neon 3-register vector operations.
// First with only element sizes of 8, 16 and 32 bits:
@@ -1726,7 +2500,7 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt,
SDNode OpNode, bit Commutable = 0> {
// 64-bit vector types.
- def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
+ def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
OpcodeStr, !strconcat(Dt, "8"),
v8i8, v8i8, OpNode, Commutable>;
def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
@@ -1775,54 +2549,6 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
-// Neon Narrowing 2-register vector operations,
-// source operand element sizes of 16, 32 and 64 bits:
-multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
- bits<5> op11_7, bit op6, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- SDNode OpNode> {
- def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
- itin, OpcodeStr, !strconcat(Dt, "16"),
- v8i8, v8i16, OpNode>;
- def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
- itin, OpcodeStr, !strconcat(Dt, "32"),
- v4i16, v4i32, OpNode>;
- def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
- itin, OpcodeStr, !strconcat(Dt, "64"),
- v2i32, v2i64, OpNode>;
-}
-
-// Neon Narrowing 2-register vector intrinsics,
-// source operand element sizes of 16, 32 and 64 bits:
-multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
- bits<5> op11_7, bit op6, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- Intrinsic IntOp> {
- def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
- itin, OpcodeStr, !strconcat(Dt, "16"),
- v8i8, v8i16, IntOp>;
- def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
- itin, OpcodeStr, !strconcat(Dt, "32"),
- v4i16, v4i32, IntOp>;
- def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
- itin, OpcodeStr, !strconcat(Dt, "64"),
- v2i32, v2i64, IntOp>;
-}
-
-
-// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
-// source operand element sizes of 16, 32 and 64 bits:
-multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
- string OpcodeStr, string Dt, SDNode OpNode> {
- def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
- def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
- def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
-}
-
-
// Neon 3-register vector intrinsics.
// First with only element sizes of 16 and 32 bits:
@@ -1847,8 +2573,29 @@ multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
OpcodeStr, !strconcat(Dt, "32"),
v4i32, v4i32, IntOp, Commutable>;
}
+multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, IntOp>;
+ def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, IntOp>;
+ def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, IntOp>;
+}
-multiclass N3VIntSL_HS<bits<4> op11_8,
+multiclass N3VIntSL_HS<bits<4> op11_8,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt, Intrinsic IntOp> {
@@ -1877,6 +2624,21 @@ multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
OpcodeStr, !strconcat(Dt, "8"),
v16i8, v16i8, IntOp, Commutable>;
}
+multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp>
+ : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp>;
+ def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp>;
+}
+
// ....then also with element size of 64 bits:
multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
@@ -1893,6 +2655,20 @@ multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
OpcodeStr, !strconcat(Dt, "64"),
v2i64, v2i64, IntOp, Commutable>;
}
+multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp>
+ : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp>;
+ def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp>;
+}
// Neon Narrowing 3-register vector intrinsics,
// source operand element sizes of 16, 32 and 64 bits:
@@ -1920,7 +2696,7 @@ multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "8"),
v8i16, v8i8, OpNode, Commutable>;
- def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+ def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "16"),
v4i32, v4i16, OpNode, Commutable>;
def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
@@ -1944,7 +2720,7 @@ multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "8"),
v8i16, v8i8, OpNode, ExtOp, Commutable>;
- def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+ def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "16"),
v4i32, v4i16, OpNode, ExtOp, Commutable>;
def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
@@ -1959,7 +2735,7 @@ multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin16, InstrItinClass itin32,
string OpcodeStr, string Dt,
Intrinsic IntOp, bit Commutable = 0> {
- def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
+ def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
OpcodeStr, !strconcat(Dt, "16"),
v4i32, v4i16, IntOp, Commutable>;
def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
@@ -1970,7 +2746,7 @@ multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
Intrinsic IntOp> {
- def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
+ def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
@@ -1995,7 +2771,7 @@ multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
OpcodeStr, !strconcat(Dt, "8"),
v8i16, v8i8, IntOp, ExtOp, Commutable>;
- def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+ def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
OpcodeStr, !strconcat(Dt, "16"),
v4i32, v4i16, IntOp, ExtOp, Commutable>;
def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
@@ -2044,7 +2820,7 @@ multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
}
-multiclass N3VMulOpSL_HS<bits<4> op11_8,
+multiclass N3VMulOpSL_HS<bits<4> op11_8,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt, SDNode ShOp> {
@@ -2174,30 +2950,6 @@ multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
}
-// Neon 2-register vector intrinsics,
-// element sizes of 8, 16 and 32 bits:
-multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
- bits<5> op11_7, bit op4,
- InstrItinClass itinD, InstrItinClass itinQ,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
- // 64-bit vector types.
- def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
- itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
- def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
- itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
- def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
- itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
-
- // 128-bit vector types.
- def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
- itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
- def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
- itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
- def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
- itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
-}
-
-
// Neon Pairwise long 2-register intrinsics,
// element sizes of 8, 16 and 32 bits:
multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
@@ -2461,9 +3213,9 @@ def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
"p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
"p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
-def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32",
+def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
-def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32",
+def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
v4f32, v4f32, fmul, 1>;
defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
@@ -2491,7 +3243,7 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
- IIC_VMULi16Q, IIC_VMULi32Q,
+ IIC_VMULi16Q, IIC_VMULi32Q,
"vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
@@ -2555,15 +3307,19 @@ defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
- v2f32, fmul, fadd>;
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
- v4f32, fmul, fadd>;
+ v4f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
- v2f32, fmul, fadd>;
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
- v4f32, v2f32, fmul, fadd>;
+ v4f32, v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
def : Pat<(v8i16 (add (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
@@ -2581,14 +3337,15 @@ def : Pat<(v4i32 (add (v4i32 QPR:$src1),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
-def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
- (fmul (v4f32 QPR:$src2),
+def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
+ (fmul_su (v4f32 QPR:$src2),
(v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
(v4f32 (VMLAslfq (v4f32 QPR:$src1),
(v4f32 QPR:$src2),
(v2f32 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i32_reg imm:$lane))),
- (SubReg_i32_lane imm:$lane)))>;
+ (SubReg_i32_lane imm:$lane)))>,
+ Requires<[HasNEON, UseFPVMLx]>;
// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2608,15 +3365,19 @@ defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
- v2f32, fmul, fsub>;
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
- v4f32, fmul, fsub>;
+ v4f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
- v2f32, fmul, fsub>;
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
- v4f32, v2f32, fmul, fsub>;
+ v4f32, v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEON, UseFPVMLx]>;
def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
@@ -2634,13 +3395,14 @@ def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
-def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
- (fmul (v4f32 QPR:$src2),
+def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
+ (fmul_su (v4f32 QPR:$src2),
(v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
(v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
(v2f32 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i32_reg imm:$lane))),
- (SubReg_i32_lane imm:$lane)))>;
+ (SubReg_i32_lane imm:$lane)))>,
+ Requires<[HasNEON, UseFPVMLx]>;
// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2703,25 +3465,24 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
NEONvceq, 1>;
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
-// For disassembly only.
+
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
- "$dst, $src, #0">;
+ "$Vd, $Vm, #0", NEONvceqz>;
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
-defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
NEONvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
-// For disassembly only.
+
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
- "$dst, $src, #0">;
-// For disassembly only.
+ "$Vd, $Vm, #0", NEONvcgez>;
defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
- "$dst, $src, #0">;
+ "$Vd, $Vm, #0", NEONvclez>;
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -2732,12 +3493,11 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
NEONvcgt, 0>;
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
-// For disassembly only.
+
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
- "$dst, $src, #0">;
-// For disassembly only.
+ "$Vd, $Vm, #0", NEONvcgtz>;
defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
- "$dst, $src, #0">;
+ "$Vd, $Vm, #0", NEONvcltz>;
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
@@ -2750,7 +3510,7 @@ def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
"f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
// VTST : Vector Test Bits
-defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
// Vector Bitwise Operations.
@@ -2779,104 +3539,190 @@ def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
v4i32, v4i32, or, 1>;
+def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+
// VBIC : Vector Bitwise Bit Clear (AND NOT)
-def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
- "vbic", "$dst, $src1, $src2", "",
- [(set DPR:$dst, (v2i32 (and DPR:$src1,
- (vnotd DPR:$src2))))]>;
-def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
- "vbic", "$dst, $src1, $src2", "",
- [(set QPR:$dst, (v4i32 (and QPR:$src1,
- (vnotq QPR:$src2))))]>;
+def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+ "vbic", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
+ (vnotd DPR:$Vm))))]>;
+def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+ "vbic", "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
+ (vnotq QPR:$Vm))))]>;
+
+def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
+ (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
+
+def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
+ (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+ IIC_VMOVImm,
+ "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+ let Inst{10-9} = SIMM{10-9};
+}
// VORN : Vector Bitwise OR NOT
-def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
- "vorn", "$dst, $src1, $src2", "",
- [(set DPR:$dst, (v2i32 (or DPR:$src1,
- (vnotd DPR:$src2))))]>;
-def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
- "vorn", "$dst, $src1, $src2", "",
- [(set QPR:$dst, (v4i32 (or QPR:$src1,
- (vnotq QPR:$src2))))]>;
+def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+ "vorn", "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
+ (vnotd DPR:$Vm))))]>;
+def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+ "vorn", "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
+ (vnotq QPR:$Vm))))]>;
// VMVN : Vector Bitwise NOT (Immediate)
let isReMaterializable = 1 in {
-def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst),
+
+def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
(ins nModImm:$SIMM), IIC_VMOVImm,
- "vmvn", "i16", "$dst, $SIMM", "",
- [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>;
-def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst),
+ "vmvn", "i16", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
(ins nModImm:$SIMM), IIC_VMOVImm,
- "vmvn", "i16", "$dst, $SIMM", "",
- [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>;
+ "vmvn", "i16", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
-def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst),
+def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
(ins nModImm:$SIMM), IIC_VMOVImm,
- "vmvn", "i32", "$dst, $SIMM", "",
- [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>;
-def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst),
+ "vmvn", "i32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
(ins nModImm:$SIMM), IIC_VMOVImm,
- "vmvn", "i32", "$dst, $SIMM", "",
- [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>;
+ "vmvn", "i32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
}
// VMVN : Vector Bitwise NOT
def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
- (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD,
- "vmvn", "$dst, $src", "",
- [(set DPR:$dst, (v2i32 (vnotd DPR:$src)))]>;
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
+ "vmvn", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
- (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD,
- "vmvn", "$dst, $src", "",
- [(set QPR:$dst, (v4i32 (vnotq QPR:$src)))]>;
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
+ "vmvn", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
// VBSL : Vector Bitwise Select
-def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, DPR:$src3),
+def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VCNTiD,
- "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
- [(set DPR:$dst,
- (v2i32 (or (and DPR:$src2, DPR:$src1),
- (and DPR:$src3, (vnotd DPR:$src1)))))]>;
-def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, QPR:$src3),
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (or (and DPR:$Vn, DPR:$src1),
+ (and DPR:$Vm, (vnotd DPR:$src1)))))]>;
+def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VCNTiQ,
- "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst,
- (v4i32 (or (and QPR:$src2, QPR:$src1),
- (and QPR:$src3, (vnotq QPR:$src1)))))]>;
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (or (and QPR:$Vn, QPR:$src1),
+ (and QPR:$Vm, (vnotq QPR:$src1)))))]>;
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
- "vbif", "$dst, $src2, $src3", "$src1 = $dst",
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[/* For disassembly only; pattern left blank */]>;
def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VBINiQ,
- "vbif", "$dst, $src2, $src3", "$src1 = $dst",
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[/* For disassembly only; pattern left blank */]>;
// VBIT : Vector Bitwise Insert if True
// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
- "vbit", "$dst, $src2, $src3", "$src1 = $dst",
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[/* For disassembly only; pattern left blank */]>;
def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VBINiQ,
- "vbit", "$dst, $src2, $src3", "$src1 = $dst",
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[/* For disassembly only; pattern left blank */]>;
// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
@@ -2957,8 +3803,8 @@ def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
"vpadd", "i32",
v2i32, v2i32, int_arm_neon_vpadd, 0>;
-def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
- IIC_VBIND, "vpadd", "f32",
+def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
+ IIC_VPBIND, "vpadd", "f32",
v2f32, v2f32, int_arm_neon_vpadd, 0>;
// VPADDL : Vector Pairwise Add Long
@@ -2986,7 +3832,7 @@ def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
"u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
"u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
-def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
"f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
// VPMIN : Vector Pairwise Minimum
@@ -3002,16 +3848,16 @@ def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
"u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
"u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
-def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin",
+def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
"f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
// VRECPE : Vector Reciprocal Estimate
-def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
IIC_VUNAD, "vrecpe", "u32",
v2i32, v2i32, int_arm_neon_vrecpe>;
-def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
IIC_VUNAQ, "vrecpe", "u32",
v4i32, v4i32, int_arm_neon_vrecpe>;
def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
@@ -3039,7 +3885,7 @@ def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
IIC_VUNAD, "vrsqrte", "f32",
v2f32, v2f32, int_arm_neon_vrsqrte>;
-def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
IIC_VUNAQ, "vrsqrte", "f32",
v4f32, v4f32, int_arm_neon_vrsqrte>;
@@ -3054,12 +3900,12 @@ def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
// Vector Shifts.
// VSHL : Vector Shift
-defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm,
+defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
- "vshl", "s", int_arm_neon_vshifts, 0>;
-defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm,
+ "vshl", "s", int_arm_neon_vshifts>;
+defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
- "vshl", "u", int_arm_neon_vshiftu, 0>;
+ "vshl", "u", int_arm_neon_vshiftu>;
// VSHL : Vector Shift Left (Immediate)
defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl,
N2RegVShLFrm>;
@@ -3093,12 +3939,12 @@ defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
NEONvshrn>;
// VRSHL : Vector Rounding Shift
-defm VRSHLs : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm,
+defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
- "vrshl", "s", int_arm_neon_vrshifts, 0>;
-defm VRSHLu : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm,
+ "vrshl", "s", int_arm_neon_vrshifts>;
+defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
- "vrshl", "u", int_arm_neon_vrshiftu, 0>;
+ "vrshl", "u", int_arm_neon_vrshiftu>;
// VRSHR : Vector Rounding Shift Right
defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs,
N2RegVShRFrm>;
@@ -3110,12 +3956,12 @@ defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
NEONvrshrn>;
// VQSHL : Vector Saturating Shift
-defm VQSHLs : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm,
+defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
- "vqshl", "s", int_arm_neon_vqshifts, 0>;
-defm VQSHLu : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm,
+ "vqshl", "s", int_arm_neon_vqshifts>;
+defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
- "vqshl", "u", int_arm_neon_vqshiftu, 0>;
+ "vqshl", "u", int_arm_neon_vqshiftu>;
// VQSHL : Vector Saturating Shift Left (Immediate)
defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls,
N2RegVShLFrm>;
@@ -3136,12 +3982,12 @@ defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
NEONvqshrnsu>;
// VQRSHL : Vector Saturating Rounding Shift
-defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm,
+defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
- "vqrshl", "s", int_arm_neon_vqrshifts, 0>;
-defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm,
+ "vqrshl", "s", int_arm_neon_vqrshifts>;
+defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
- "vqrshl", "u", int_arm_neon_vqrshiftu, 0>;
+ "vqrshl", "u", int_arm_neon_vqrshiftu>;
// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
@@ -3168,7 +4014,7 @@ defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>;
// Vector Absolute and Saturating Absolute.
// VABS : Vector Absolute Value
-defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
+defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
int_arm_neon_vabs>;
def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
@@ -3179,7 +4025,7 @@ def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
v4f32, v4f32, int_arm_neon_vabs>;
// VQABS : Vector Saturating Absolute Value
-defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
+defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
int_arm_neon_vqabs>;
@@ -3191,13 +4037,13 @@ def vnegq : PatFrag<(ops node:$in),
(sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
- : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
- IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
- [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>;
+ : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
- : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
- IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>;
+ : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
+ IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
// VNEG : Vector Negate (integer)
def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
@@ -3209,13 +4055,13 @@ def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
// VNEG : Vector Negate (floating-point)
def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
- (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD,
- "vneg", "f32", "$dst, $src", "",
- [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>;
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+ "vneg", "f32", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
- (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ,
- "vneg", "f32", "$dst, $src", "",
- [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+ "vneg", "f32", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
@@ -3225,22 +4071,22 @@ def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
// VQNEG : Vector Saturating Negate
-defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
+defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
int_arm_neon_vqneg>;
// Vector Bit Counting Operations.
// VCLS : Vector Count Leading Sign Bits
-defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
+defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
int_arm_neon_vcls>;
// VCLZ : Vector Count Leading Zeros
-defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
+defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
int_arm_neon_vclz>;
// VCNT : Vector Count One Bits
-def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
IIC_VCNTiD, "vcnt", "8",
v8i8, v8i8, int_arm_neon_vcnt>;
def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
@@ -3249,98 +4095,126 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
// Vector Swap -- for disassembly only.
def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
- (outs DPR:$dst), (ins DPR:$src), NoItinerary,
- "vswp", "$dst, $src", "", []>;
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ "vswp", "$Vd, $Vm", "", []>;
def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
- (outs QPR:$dst), (ins QPR:$src), NoItinerary,
- "vswp", "$dst, $src", "", []>;
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ "vswp", "$Vd, $Vm", "", []>;
// Vector Move Operations.
// VMOV : Vector Move (Register)
let neverHasSideEffects = 1 in {
-def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
- N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
-def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
- N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$Vm),
+ N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> {
+ let Vn{4-0} = Vm{4-0};
+}
+def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$Vm),
+ N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> {
+ let Vn{4-0} = Vm{4-0};
+}
// Pseudo vector move instructions for QQ and QQQQ registers. This should
// be expanded after register allocation is completed.
def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
- NoItinerary, "${:comment} vmov\t$dst, $src", []>;
+ NoItinerary, []>;
def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
- NoItinerary, "${:comment} vmov\t$dst, $src", []>;
+ NoItinerary, []>;
} // neverHasSideEffects
// VMOV : Vector Move (Immediate)
let isReMaterializable = 1 in {
-def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nModImm:$SIMM), IIC_VMOVImm,
- "vmov", "i8", "$dst, $SIMM", "",
- [(set DPR:$dst, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
-def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
+ "vmov", "i8", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nModImm:$SIMM), IIC_VMOVImm,
- "vmov", "i8", "$dst, $SIMM", "",
- [(set QPR:$dst, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
+ "vmov", "i8", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
-def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nModImm:$SIMM), IIC_VMOVImm,
- "vmov", "i16", "$dst, $SIMM", "",
- [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>;
-def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
+ "vmov", "i16", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nModImm:$SIMM), IIC_VMOVImm,
- "vmov", "i16", "$dst, $SIMM", "",
- [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>;
+ "vmov", "i16", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
-def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nModImm:$SIMM), IIC_VMOVImm,
- "vmov", "i32", "$dst, $SIMM", "",
- [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>;
-def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst),
+ "vmov", "i32", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nModImm:$SIMM), IIC_VMOVImm,
- "vmov", "i32", "$dst, $SIMM", "",
- [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>;
+ "vmov", "i32", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
-def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
+def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
(ins nModImm:$SIMM), IIC_VMOVImm,
- "vmov", "i64", "$dst, $SIMM", "",
- [(set DPR:$dst, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
-def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
+ "vmov", "i64", "$Vd, $SIMM", "",
+ [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
(ins nModImm:$SIMM), IIC_VMOVImm,
- "vmov", "i64", "$dst, $SIMM", "",
- [(set QPR:$dst, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+ "vmov", "i64", "$Vd, $SIMM", "",
+ [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
} // isReMaterializable
// VMOV : Vector Get Lane (move scalar to ARM core register)
def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
- (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]",
- [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src),
- imm:$lane))]>;
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
- (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]",
- [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src),
- imm:$lane))]>;
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
- (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]",
- [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src),
- imm:$lane))]>;
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
- (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]",
- [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src),
- imm:$lane))]>;
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]",
+ [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
- (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]",
- [(set GPR:$dst, (extractelt (v2i32 DPR:$src),
- imm:$lane))]>;
+ (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+ IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]",
+ [(set GPR:$R, (extractelt (v2i32 DPR:$V),
+ imm:$lane))]> {
+ let Inst{21} = lane{0};
+}
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
(VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
@@ -3376,37 +4250,45 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
// VMOV : Vector Set Lane (move ARM core register to scalar)
-let Constraints = "$src1 = $dst" in {
-def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst),
- (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
- IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2",
- [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1),
- GPR:$src2, imm:$lane))]>;
-def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst),
- (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
- IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2",
- [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1),
- GPR:$src2, imm:$lane))]>;
-def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst),
- (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
- IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2",
- [(set DPR:$dst, (insertelt (v2i32 DPR:$src1),
- GPR:$src2, imm:$lane))]>;
+let Constraints = "$src1 = $V" in {
+def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+ IIC_VMOVISL, "vmov", "8", "$V[$lane], $R",
+ [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{2};
+ let Inst{6-5} = lane{1-0};
+}
+def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+ IIC_VMOVISL, "vmov", "16", "$V[$lane], $R",
+ [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{1};
+ let Inst{6} = lane{0};
+}
+def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
+ (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+ IIC_VMOVISL, "vmov", "32", "$V[$lane], $R",
+ [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
+ GPR:$R, imm:$lane))]> {
+ let Inst{21} = lane{0};
+}
}
def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
- (v16i8 (INSERT_SUBREG QPR:$src1,
+ (v16i8 (INSERT_SUBREG QPR:$src1,
(v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
(DSubReg_i8_reg imm:$lane))),
GPR:$src2, (SubReg_i8_lane imm:$lane))),
(DSubReg_i8_reg imm:$lane)))>;
def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
- (v8i16 (INSERT_SUBREG QPR:$src1,
+ (v8i16 (INSERT_SUBREG QPR:$src1,
(v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
(DSubReg_i16_reg imm:$lane))),
GPR:$src2, (SubReg_i16_lane imm:$lane))),
(DSubReg_i16_reg imm:$lane)))>;
def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
- (v4i32 (INSERT_SUBREG QPR:$src1,
+ (v4i32 (INSERT_SUBREG QPR:$src1,
(v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
(DSubReg_i32_reg imm:$lane))),
GPR:$src2, (SubReg_i32_lane imm:$lane))),
@@ -3454,13 +4336,13 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
// VDUP : Vector Duplicate (from ARM core register to all elements)
class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
- : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src),
- IIC_VMOVIS, "vdup", Dt, "$dst, $src",
- [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
+ : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", Dt, "$V, $R",
+ [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
- : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src),
- IIC_VMOVIS, "vdup", Dt, "$dst, $src",
- [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
+ : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", Dt, "$V, $R",
+ [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
@@ -3469,40 +4351,56 @@ def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
-def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src),
- IIC_VMOVIS, "vdup", "32", "$dst, $src",
- [(set DPR:$dst, (v2f32 (NEONvdup
- (f32 (bitconvert GPR:$src)))))]>;
-def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
- IIC_VMOVIS, "vdup", "32", "$dst, $src",
- [(set QPR:$dst, (v4f32 (NEONvdup
- (f32 (bitconvert GPR:$src)))))]>;
+def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", "32", "$V, $R",
+ [(set DPR:$V, (v2f32 (NEONvdup
+ (f32 (bitconvert GPR:$R)))))]>;
+def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$V), (ins GPR:$R),
+ IIC_VMOVIS, "vdup", "32", "$V, $R",
+ [(set QPR:$V, (v4f32 (NEONvdup
+ (f32 (bitconvert GPR:$R)))))]>;
// VDUP : Vector Duplicate Lane (from scalar to all elements)
class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
ValueType Ty>
- : NVDupLane<op19_16, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
- [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>;
+ : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
+ IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+ [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy>
- : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
- [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src),
+ : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
+ IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+ [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
imm:$lane)))]>;
// Inst{19-16} is partially specified depending on the element size.
-def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>;
-def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>;
-def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>;
-def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>;
-def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>;
-def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>;
-def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>;
-def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>;
+def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> {
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> {
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> {
+ let Inst{19} = lane{0};
+}
+def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32> {
+ let Inst{19} = lane{0};
+}
+def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> {
+ let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> {
+ let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> {
+ let Inst{19} = lane{0};
+}
+def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32> {
+ let Inst{19} = lane{0};
+}
def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
(v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
@@ -3521,18 +4419,13 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
-def VDUPfdf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0,
- (outs DPR:$dst), (ins SPR:$src),
- IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
+def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
[(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
-
-def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
- (outs QPR:$dst), (ins SPR:$src),
- IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
+def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
[(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
// VMOVN : Vector Narrowing Move
-defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
+defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
"vmovn", "i", trunc>;
// VQMOVN : Vector Saturating Narrowing Move
defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
@@ -3585,20 +4478,30 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+// VCVT : Vector Convert Between Half-Precision and Single-Precision.
+def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
+ IIC_VUNAQ, "vcvt", "f16.f32",
+ v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
+ Requires<[HasNEON, HasFP16]>;
+def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
+ IIC_VUNAQ, "vcvt", "f32.f16",
+ v4f32, v4i16, int_arm_neon_vcvthf2fp>,
+ Requires<[HasNEON, HasFP16]>;
+
// Vector Reverse.
// VREV64 : Vector Reverse elements within 64-bit doublewords
class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
- : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst),
- (ins DPR:$src), IIC_VMOVD,
- OpcodeStr, Dt, "$dst, $src", "",
- [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>;
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
- : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst),
- (ins QPR:$src), IIC_VMOVD,
- OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>;
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
@@ -3613,15 +4516,15 @@ def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>;
// VREV32 : Vector Reverse elements within 32-bit words
class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
- : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst),
- (ins DPR:$src), IIC_VMOVD,
- OpcodeStr, Dt, "$dst, $src", "",
- [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>;
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
- : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst),
- (ins QPR:$src), IIC_VMOVD,
- OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>;
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
@@ -3632,46 +4535,91 @@ def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
// VREV16 : Vector Reverse elements within 16-bit halfwords
class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
- : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst),
- (ins DPR:$src), IIC_VMOVD,
- OpcodeStr, Dt, "$dst, $src", "",
- [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>;
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
+ (ins DPR:$Vm), IIC_VMOVD,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
- : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst),
- (ins QPR:$src), IIC_VMOVD,
- OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>;
+ : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
+ (ins QPR:$Vm), IIC_VMOVQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "",
+ [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
// Other Vector Shuffles.
+// Aligned extractions: really just dropping registers
+
+class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
+ : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
+ (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
+
+def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
+
+def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
+
+def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
+
+def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
+
+def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
+
+
// VEXT : Vector Extract
class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
- : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst),
- (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NVExtFrm,
- IIC_VEXTD, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
- [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs),
- (Ty DPR:$rhs), imm:$index)))]>;
+ : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+ IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+ [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
+ (Ty DPR:$Vm), imm:$index)))]> {
+ bits<4> index;
+ let Inst{11-8} = index{3-0};
+}
class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
- : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst),
- (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NVExtFrm,
- IIC_VEXTQ, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
- [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs),
- (Ty QPR:$rhs), imm:$index)))]>;
-
-def VEXTd8 : VEXTd<"vext", "8", v8i8>;
-def VEXTd16 : VEXTd<"vext", "16", v4i16>;
-def VEXTd32 : VEXTd<"vext", "32", v2i32>;
-def VEXTdf : VEXTd<"vext", "32", v2f32>;
-
-def VEXTq8 : VEXTq<"vext", "8", v16i8>;
-def VEXTq16 : VEXTq<"vext", "16", v8i16>;
-def VEXTq32 : VEXTq<"vext", "32", v4i32>;
-def VEXTqf : VEXTq<"vext", "32", v4f32>;
+ : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
+ (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+ IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+ [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
+ (Ty QPR:$Vm), imm:$index)))]> {
+ bits<4> index;
+ let Inst{11-8} = index{3-0};
+}
+
+def VEXTd8 : VEXTd<"vext", "8", v8i8> {
+ let Inst{11-8} = index{3-0};
+}
+def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+ let Inst{11-9} = index{2-0};
+ let Inst{8} = 0b0;
+}
+def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+ let Inst{11-10} = index{1-0};
+ let Inst{9-8} = 0b00;
+}
+def VEXTdf : VEXTd<"vext", "32", v2f32> {
+ let Inst{11} = index{0};
+ let Inst{10-8} = 0b000;
+}
+
+def VEXTq8 : VEXTq<"vext", "8", v16i8> {
+ let Inst{11-8} = index{3-0};
+}
+def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+ let Inst{11-9} = index{2-0};
+ let Inst{8} = 0b0;
+}
+def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+ let Inst{11-10} = index{1-0};
+ let Inst{9-8} = 0b00;
+}
+def VEXTqf : VEXTq<"vext", "32", v4f32> {
+ let Inst{11} = index{0};
+ let Inst{10-8} = 0b000;
+}
// VTRN : Vector Transpose
@@ -3707,160 +4655,120 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
// VTBL : Vector Table Lookup
def VTBL1
- : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst),
- (ins DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTB1,
- "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>;
+ : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
+ "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
- : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
- (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2,
- "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>;
+ : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+ "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
def VTBL3
- : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
- (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3,
- "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>;
+ : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+ "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
def VTBL4
- : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
- (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src),
+ : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
+ (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
NVTBLFrm, IIC_VTB4,
- "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>;
+ "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
} // hasExtraSrcRegAllocReq = 1
+def VTBL2Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
+def VTBL3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
+def VTBL4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
+
// VTBX : Vector Table Extension
def VTBX1
- : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst),
- (ins DPR:$orig, DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTBX1,
- "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1
- DPR:$orig, DPR:$tbl1, DPR:$src)))]>;
+ : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
+ "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd",
+ [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
+ DPR:$orig, DPR:$Vn, DPR:$Vm)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
- : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
- (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2,
- "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>;
+ : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+ "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
def VTBX3
- : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
- (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src),
+ : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
NVTBLFrm, IIC_VTBX3,
- "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src",
- "$orig = $dst", []>;
+ "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+ "$orig = $Vd", []>;
def VTBX4
- : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
- DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4,
- "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src",
- "$orig = $dst", []>;
+ : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
+ DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+ "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+ "$orig = $Vd", []>;
} // hasExtraSrcRegAllocReq = 1
+def VTBX2Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
+ IIC_VTBX2, "$orig = $dst", []>;
+def VTBX3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX3, "$orig = $dst", []>;
+def VTBX4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX4, "$orig = $dst", []>;
+
//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//
-class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
- : NEONFPPat<(ResTy (OpNode SPR:$a)),
- (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
- SPR:$a, ssub_0))),
- ssub_0)>;
+class N2VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a)),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
class N3VSPat<SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
- (EXTRACT_SUBREG (v2f32
- (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$a, ssub_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$b, ssub_0))),
- ssub_0)>;
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
- (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$acc, ssub_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$a, ssub_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$b, ssub_0)),
- ssub_0)>;
-
-// These need separate instructions because they must use DPR_VFP2 register
-// class which have SPR sub-registers.
-
-// Vector Add Operations used for single-precision FP
-let neverHasSideEffects = 1 in
-def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>;
-def : N3VSPat<fadd, VADDfd_sfp>;
-
-// Vector Sub Operations used for single-precision FP
-let neverHasSideEffects = 1 in
-def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>;
-def : N3VSPat<fsub, VSUBfd_sfp>;
-
-// Vector Multiply Operations used for single-precision FP
-let neverHasSideEffects = 1 in
-def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>;
-def : N3VSPat<fmul, VMULfd_sfp>;
-
-// Vector Multiply-Accumulate/Subtract used for single-precision FP
-// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
-// we want to avoid them for now. e.g., alternating vmla/vadd instructions.
-
-//let neverHasSideEffects = 1 in
-//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32",
-// v2f32, fmul, fadd>;
-//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>;
-
-//let neverHasSideEffects = 1 in
-//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32",
-// v2f32, fmul, fsub>;
-//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>;
-
-// Vector Absolute used for single-precision FP
-let neverHasSideEffects = 1 in
-def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
- "vabs", "f32", "$dst, $src", "", []>;
-def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>;
-
-// Vector Negate used for single-precision FP
-let neverHasSideEffects = 1 in
-def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
- "vneg", "f32", "$dst, $src", "", []>;
-def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
-
-// Vector Maximum used for single-precision FP
-let neverHasSideEffects = 1 in
-def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
- (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND,
- "vmax", "f32", "$dst, $src1, $src2", "", []>;
-def : N3VSPat<NEONfmax, VMAXfd_sfp>;
-
-// Vector Minimum used for single-precision FP
-let neverHasSideEffects = 1 in
-def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
- (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND,
- "vmin", "f32", "$dst, $src1, $src2", "", []>;
-def : N3VSPat<NEONfmin, VMINfd_sfp>;
-
-// Vector Convert between single-precision FP and integer
-let neverHasSideEffects = 1 in
-def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
- v2i32, v2f32, fp_to_sint>;
-def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
-
-let neverHasSideEffects = 1 in
-def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
- v2i32, v2f32, fp_to_uint>;
-def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
-
-let neverHasSideEffects = 1 in
-def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
- v2f32, v2i32, sint_to_fp>;
-def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
-
-let neverHasSideEffects = 1 in
-def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
- v2f32, v2i32, uint_to_fp>;
-def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (Inst
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$acc, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$a, ssub_0),
+ (INSERT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+ SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+def : N3VSPat<fadd, VADDfd>;
+def : N3VSPat<fsub, VSUBfd>;
+def : N3VSPat<fmul, VMULfd>;
+def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N2VSPat<fabs, VABSfd>;
+def : N2VSPat<fneg, VNEGfd>;
+def : N3VSPat<NEONfmax, VMAXfd>;
+def : N3VSPat<NEONfmin, VMINfd>;
+def : N2VSPat<arm_ftosi, VCVTf2sd>;
+def : N2VSPat<arm_ftoui, VCVTf2ud>;
+def : N2VSPat<arm_sitof, VCVTs2fd>;
+def : N2VSPat<arm_uitof, VCVTu2fd>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index a13ff12..826ef46 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1,4 +1,4 @@
-//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===//
+//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,7 +16,7 @@
//
def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def imm_neg_XFORM : SDNodeXForm<imm, [{
@@ -26,7 +26,6 @@ def imm_comp_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
}]>;
-
/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
def imm0_7 : PatLeaf<(i32 imm), [{
return (uint32_t)N->getZExtValue() < 8;
@@ -50,9 +49,9 @@ def imm8_255_neg : PatLeaf<(i32 imm), [{
return Val >= 8 && Val < 256;
}], imm_neg_XFORM>;
-// Break imm's up into two pieces: an immediate + a left shift.
-// This uses thumb_immshifted to match and thumb_immshifted_val and
-// thumb_immshifted_shamt to get the val/shift pieces.
+// Break imm's up into two pieces: an immediate + a left shift. This uses
+// thumb_immshifted to match and thumb_immshifted_val and thumb_immshifted_shamt
+// to get the val/shift pieces.
def thumb_immshifted : PatLeaf<(imm), [{
return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue());
}]>;
@@ -67,6 +66,11 @@ def thumb_immshifted_shamt : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(V, MVT::i32);
}]>;
+// ADR instruction labels.
+def t_adrlabel : Operand<i32> {
+ let EncoderMethod = "getThumbAdrLabelOpValue";
+}
+
// Scaled 4 immediate.
def t_imm_s4 : Operand<i32> {
let PrintMethod = "printThumbS4ImmOperand";
@@ -74,47 +78,114 @@ def t_imm_s4 : Operand<i32> {
// Define Thumb specific addressing modes.
+def t_brtarget : Operand<OtherVT> {
+ let EncoderMethod = "getThumbBRTargetOpValue";
+}
+
+def t_bcctarget : Operand<i32> {
+ let EncoderMethod = "getThumbBCCTargetOpValue";
+}
+
+def t_cbtarget : Operand<i32> {
+ let EncoderMethod = "getThumbCBTargetOpValue";
+}
+
+def t_bltarget : Operand<i32> {
+ let EncoderMethod = "getThumbBLTargetOpValue";
+}
+
+def t_blxtarget : Operand<i32> {
+ let EncoderMethod = "getThumbBLXTargetOpValue";
+}
+
+def MemModeRegThumbAsmOperand : AsmOperandClass {
+ let Name = "MemModeRegThumb";
+ let SuperClasses = [];
+}
+
+def MemModeImmThumbAsmOperand : AsmOperandClass {
+ let Name = "MemModeImmThumb";
+ let SuperClasses = [];
+}
+
// t_addrmode_rr := reg + reg
//
def t_addrmode_rr : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
let PrintMethod = "printThumbAddrModeRROperand";
let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
}
-// t_addrmode_s4 := reg + reg
-// reg + imm5 * 4
+// t_addrmode_rrs := reg + reg
//
-def t_addrmode_s4 : Operand<i32>,
- ComplexPattern<i32, 3, "SelectThumbAddrModeS4", []> {
- let PrintMethod = "printThumbAddrModeS4Operand";
- let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+def t_addrmode_rrs1 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+ let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+def t_addrmode_rrs2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+ let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+def t_addrmode_rrs4 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+ let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+
+// t_addrmode_is4 := reg + imm5 * 4
+//
+def t_addrmode_is4 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let PrintMethod = "printThumbAddrModeImm5S4Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
}
-// t_addrmode_s2 := reg + reg
-// reg + imm5 * 2
+// t_addrmode_is2 := reg + imm5 * 2
//
-def t_addrmode_s2 : Operand<i32>,
- ComplexPattern<i32, 3, "SelectThumbAddrModeS2", []> {
- let PrintMethod = "printThumbAddrModeS2Operand";
- let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+def t_addrmode_is2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let PrintMethod = "printThumbAddrModeImm5S2Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
}
-// t_addrmode_s1 := reg + reg
-// reg + imm5
+// t_addrmode_is1 := reg + imm5
//
-def t_addrmode_s1 : Operand<i32>,
- ComplexPattern<i32, 3, "SelectThumbAddrModeS1", []> {
- let PrintMethod = "printThumbAddrModeS1Operand";
- let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+def t_addrmode_is1 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
+ let EncoderMethod = "getAddrModeISOpValue";
+ let PrintMethod = "printThumbAddrModeImm5S1Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
}
// t_addrmode_sp := sp + imm8 * 4
//
def t_addrmode_sp : Operand<i32>,
ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+ let EncoderMethod = "getAddrModeThumbSPOpValue";
let PrintMethod = "printThumbAddrModeSPOperand";
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_pc := <label> => pc + imm8 * 4
+//
+def t_addrmode_pc : Operand<i32> {
+ let EncoderMethod = "getAddrModePCOpValue";
+ let ParserMatchClass = MemModeImmThumbAsmOperand;
}
//===----------------------------------------------------------------------===//
@@ -126,132 +197,162 @@ def t_addrmode_sp : Operand<i32>,
// these will always be in pairs, and asserts if it finds otherwise. Better way?
let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def tADJCALLSTACKUP :
-PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
- "${:comment} tADJCALLSTACKUP $amt1",
- [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>;
+ PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
+ [(ARMcallseq_end imm:$amt1, imm:$amt2)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
def tADJCALLSTACKDOWN :
-PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
- "${:comment} tADJCALLSTACKDOWN $amt",
- [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>;
+ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
+ [(ARMcallseq_start imm:$amt)]>,
+ Requires<[IsThumb, IsThumb1Only]>;
+}
+
+// T1Disassembly - A simple class to make encoding some disassembly patterns
+// easier and less verbose.
+class T1Disassembly<bits<2> op1, bits<8> op2>
+ : T1Encoding<0b101111> {
+ let Inst{9-8} = op1;
+ let Inst{7-0} = op2;
}
def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "",
[/* For disassembly only; pattern left blank */]>,
- T1Encoding<0b101111> {
- let Inst{9-8} = 0b11;
- let Inst{7-0} = 0b00000000;
-}
+ T1Disassembly<0b11, 0x00>; // A8.6.110
def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "",
[/* For disassembly only; pattern left blank */]>,
- T1Encoding<0b101111> {
- let Inst{9-8} = 0b11;
- let Inst{7-0} = 0b00010000;
-}
+ T1Disassembly<0b11, 0x10>; // A8.6.410
def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "",
[/* For disassembly only; pattern left blank */]>,
- T1Encoding<0b101111> {
- let Inst{9-8} = 0b11;
- let Inst{7-0} = 0b00100000;
-}
+ T1Disassembly<0b11, 0x20>; // A8.6.408
def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "",
[/* For disassembly only; pattern left blank */]>,
- T1Encoding<0b101111> {
- let Inst{9-8} = 0b11;
- let Inst{7-0} = 0b00110000;
-}
+ T1Disassembly<0b11, 0x30>; // A8.6.409
def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "",
[/* For disassembly only; pattern left blank */]>,
- T1Encoding<0b101111> {
- let Inst{9-8} = 0b11;
- let Inst{7-0} = 0b01000000;
-}
+ T1Disassembly<0b11, 0x40>; // A8.6.157
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Disassembly<0b10, {?,?,?,?,?,?,?,?}> {
+ // A8.6.22
+ bits<8> val;
+ let Inst{7-0} = val;
+}
def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe",
[/* For disassembly only; pattern left blank */]>,
T1Encoding<0b101101> {
+ // A8.6.156
let Inst{9-5} = 0b10010;
- let Inst{3} = 1;
+ let Inst{4} = 1;
+ let Inst{3} = 1; // Big-Endian
+ let Inst{2-0} = 0b000;
}
def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle",
[/* For disassembly only; pattern left blank */]>,
T1Encoding<0b101101> {
+ // A8.6.156
let Inst{9-5} = 0b10010;
- let Inst{3} = 0;
+ let Inst{4} = 1;
+ let Inst{3} = 0; // Little-Endian
+ let Inst{2-0} = 0b000;
}
-// The i32imm operand $val can be used by a debugger to store more information
-// about the breakpoint.
-def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
+// Change Processor State is a system instruction -- for disassembly only.
+def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
+ NoItinerary, "cps$imod $iflags",
[/* For disassembly only; pattern left blank */]>,
- T1Encoding<0b101111> {
- let Inst{9-8} = 0b10;
+ T1Misc<0b0110011> {
+ // A8.6.38 & B6.1.1
+ bit imod;
+ bits<3> iflags;
+
+ let Inst{4} = imod;
+ let Inst{3} = 0;
+ let Inst{2-0} = iflags;
}
-// Change Processor State is a system instruction -- for disassembly only.
-// The singleton $opt operand contains the following information:
-// opt{4-0} = mode ==> don't care
-// opt{5} = changemode ==> 0 (false for 16-bit Thumb instr)
-// opt{8-6} = AIF from Inst{2-0}
-// opt{10-9} = 1:imod from Inst{4} with 0b10 as enable and 0b11 as disable
-//
-// The opt{4-0} and opt{5} sub-fields are to accommodate 32-bit Thumb and ARM
-// CPS which has more options.
-def tCPS : T1I<(outs), (ins cps_opt:$opt), NoItinerary, "cps$opt",
- [/* For disassembly only; pattern left blank */]>,
- T1Misc<0b0110011>;
-
// For both thumb1 and thumb2.
-let isNotDuplicable = 1 in
-def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr,
- "\n$cp:\n\tadd\t$dst, pc",
- [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
+let isNotDuplicable = 1, isCodeGenOnly = 1 in
+def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
+ [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
T1Special<{0,0,?,?}> {
- let Inst{6-3} = 0b1111; // A8.6.6 Rm = pc
+ // A8.6.6
+ bits<3> dst;
+ let Inst{6-3} = 0b1111; // Rm = pc
+ let Inst{2-0} = dst;
}
-// PC relative add.
+// PC relative add (ADR).
def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
- "add\t$dst, pc, $rhs", []>,
- T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
+ "add\t$dst, pc, $rhs", []>,
+ T1Encoding<{1,0,1,0,0,?}> {
+ // A6.2 & A8.6.10
+ bits<3> dst;
+ bits<8> rhs;
+ let Inst{10-8} = dst;
+ let Inst{7-0} = rhs;
+}
-// ADD rd, sp, #imm8
+// ADD <Rd>, sp, #<imm8>
// This is rematerializable, which is particularly useful for taking the
// address of locals.
-let isReMaterializable = 1 in {
+let isReMaterializable = 1 in
def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
- "add\t$dst, $sp, $rhs", []>,
- T1Encoding<{1,0,1,0,1,?}>; // A6.2 & A8.6.8
+ "add\t$dst, $sp, $rhs", []>,
+ T1Encoding<{1,0,1,0,1,?}> {
+ // A6.2 & A8.6.8
+ bits<3> dst;
+ bits<8> rhs;
+ let Inst{10-8} = dst;
+ let Inst{7-0} = rhs;
}
-// ADD sp, sp, #imm7
+// ADD sp, sp, #<imm7>
def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
"add\t$dst, $rhs", []>,
- T1Misc<{0,0,0,0,0,?,?}>; // A6.2.5 & A8.6.8
+ T1Misc<{0,0,0,0,0,?,?}> {
+ // A6.2.5 & A8.6.8
+ bits<7> rhs;
+ let Inst{6-0} = rhs;
+}
-// SUB sp, sp, #imm7
+// SUB sp, sp, #<imm7>
+// FIXME: The encoding and the ASM string don't match up.
def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
"sub\t$dst, $rhs", []>,
- T1Misc<{0,0,0,0,1,?,?}>; // A6.2.5 & A8.6.215
+ T1Misc<{0,0,0,0,1,?,?}> {
+ // A6.2.5 & A8.6.214
+ bits<7> rhs;
+ let Inst{6-0} = rhs;
+}
-// ADD rm, sp
+// ADD <Rm>, sp
def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
"add\t$dst, $rhs", []>,
T1Special<{0,0,?,?}> {
- let Inst{6-3} = 0b1101; // A8.6.9 Encoding T1
+ // A8.6.9 Encoding T1
+ bits<4> dst;
+ let Inst{7} = dst{3};
+ let Inst{6-3} = 0b1101;
+ let Inst{2-0} = dst{2-0};
}
-// ADD sp, rm
+// ADD sp, <Rm>
def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
"add\t$dst, $rhs", []>,
T1Special<{0,0,?,?}> {
// A8.6.9 Encoding T2
+ bits<4> dst;
let Inst{7} = 1;
+ let Inst{6-3} = dst;
let Inst{2-0} = 0b101;
}
@@ -260,21 +361,37 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
//
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
- def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr", [(ARMretflag)]>,
- T1Special<{1,1,0,?}> { // A6.2.3 & A8.6.25
+ def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr",
+ [(ARMretflag)]>,
+ T1Special<{1,1,0,?}> {
+ // A6.2.3 & A8.6.25
let Inst{6-3} = 0b1110; // Rm = lr
+ let Inst{2-0} = 0b000;
}
+
// Alternative return instruction used by vararg functions.
- def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target",[]>,
- T1Special<{1,1,0,?}>; // A6.2.3 & A8.6.25
+ def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm),
+ IIC_Br, "bx\t$Rm",
+ []>,
+ T1Special<{1,1,0,?}> {
+ // A6.2.3 & A8.6.25
+ bits<4> Rm;
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = 0b000;
+ }
}
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
- def tBRIND : TI<(outs), (ins GPR:$dst), IIC_Br, "mov\tpc, $dst",
- [(brind GPR:$dst)]>,
- T1Special<{1,0,1,?}> {
- // <Rd> = Inst{7:2-0} = pc
+ def tBRIND : TI<(outs), (ins GPR:$Rm),
+ IIC_Br,
+ "mov\tpc, $Rm",
+ [(brind GPR:$Rm)]>,
+ T1Special<{1,0,?,?}> {
+ // A8.6.97
+ bits<4> Rm;
+ let Inst{7} = 1; // <Rd> = Inst{7:2-0} = pc
+ let Inst{6-3} = Rm;
let Inst{2-0} = 0b111;
}
}
@@ -282,28 +399,52 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
// FIXME: remove when we have a way to marking a MI with these properties.
let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
hasExtraDefRegAllocReq = 1 in
-def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br,
- "pop${p}\t$dsts", []>,
- T1Misc<{1,1,0,?,?,?,?}>;
+def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iPop_Br,
+ "pop${p}\t$regs", []>,
+ T1Misc<{1,1,0,?,?,?,?}> {
+ // A8.6.121
+ bits<16> regs;
+ let Inst{8} = regs{15}; // registers = P:'0000000':register_list
+ let Inst{7-0} = regs{7-0};
+}
+// All calls clobber the non-callee saved registers. SP is marked as a use to
+// prevent stack-pointer assignments that appear immediately before calls from
+// potentially appearing dead.
let isCall = 1,
+ // On non-Darwin platforms R9 is callee-saved.
Defs = [R0, R1, R2, R3, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23,
- D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
+ D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+ Uses = [SP] in {
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
- (outs), (ins i32imm:$func, variable_ops), IIC_Br,
- "bl\t${func:call}",
+ (outs), (ins t_bltarget:$func, variable_ops), IIC_Br,
+ "bl\t$func",
[(ARMtcall tglobaladdr:$func)]>,
- Requires<[IsThumb, IsNotDarwin]>;
+ Requires<[IsThumb, IsNotDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-0} = func{10-0};
+ }
// ARMv5T and above, also used for Thumb2
def tBLXi : TIx2<0b11110, 0b11, 0,
- (outs), (ins i32imm:$func, variable_ops), IIC_Br,
- "blx\t${func:call}",
+ (outs), (ins t_blxtarget:$func, variable_ops), IIC_Br,
+ "blx\t$func",
[(ARMcall tglobaladdr:$func)]>,
- Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+ Requires<[IsThumb, HasV5T, IsNotDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-1} = func{10-1};
+ let Inst{0} = 0; // func{0} is assumed zero
+ }
// Also used for Thumb2
def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
@@ -313,642 +454,1002 @@ let isCall = 1,
T1Special<{1,1,1,?}>; // A6.2.3 & A8.6.24;
// ARMv4T
+ // FIXME: Should be a pseudo.
+ let isCodeGenOnly = 1 in
def tBX : TIx2<{?,?,?,?,?}, {?,?}, ?,
(outs), (ins tGPR:$func, variable_ops), IIC_Br,
"mov\tlr, pc\n\tbx\t$func",
[(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsThumb1Only, IsNotDarwin]>;
+ Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>;
}
-// On Darwin R9 is call-clobbered.
let isCall = 1,
+ // On Darwin R9 is call-clobbered.
+ // R7 is marked as a use to prevent frame-pointer assignments from being
+ // moved above / below calls.
Defs = [R0, R1, R2, R3, R9, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23,
- D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
+ D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+ Uses = [R7, SP] in {
// Also used for Thumb2
def tBLr9 : TIx2<0b11110, 0b11, 1,
- (outs), (ins i32imm:$func, variable_ops), IIC_Br,
- "bl\t${func:call}",
+ (outs), (ins pred:$p, t_bltarget:$func, variable_ops),
+ IIC_Br, "bl${p}\t$func",
[(ARMtcall tglobaladdr:$func)]>,
- Requires<[IsThumb, IsDarwin]>;
+ Requires<[IsThumb, IsDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-0} = func{10-0};
+ }
// ARMv5T and above, also used for Thumb2
def tBLXi_r9 : TIx2<0b11110, 0b11, 0,
- (outs), (ins i32imm:$func, variable_ops), IIC_Br,
- "blx\t${func:call}",
+ (outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
+ IIC_Br, "blx${p}\t$func",
[(ARMcall tglobaladdr:$func)]>,
- Requires<[IsThumb, HasV5T, IsDarwin]>;
+ Requires<[IsThumb, HasV5T, IsDarwin]> {
+ bits<21> func;
+ let Inst{25-16} = func{20-11};
+ let Inst{13} = 1;
+ let Inst{11} = 1;
+ let Inst{10-1} = func{10-1};
+ let Inst{0} = 0; // func{0} is assumed zero
+ }
// Also used for Thumb2
- def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
- "blx\t$func",
+ def tBLXr_r9 : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
+ "blx${p}\t$func",
[(ARMtcall GPR:$func)]>,
Requires<[IsThumb, HasV5T, IsDarwin]>,
- T1Special<{1,1,1,?}>; // A6.2.3 & A8.6.24
+ T1Special<{1,1,1,?}> {
+ // A6.2.3 & A8.6.24
+ bits<4> func;
+ let Inst{6-3} = func;
+ let Inst{2-0} = 0b000;
+ }
// ARMv4T
+ let isCodeGenOnly = 1 in
+ // FIXME: Should be a pseudo.
def tBXr9 : TIx2<{?,?,?,?,?}, {?,?}, ?,
(outs), (ins tGPR:$func, variable_ops), IIC_Br,
"mov\tlr, pc\n\tbx\t$func",
[(ARMcall_nolink tGPR:$func)]>,
- Requires<[IsThumb1Only, IsDarwin]>;
+ Requires<[IsThumb, IsThumb1Only, IsDarwin]>;
}
-let isBranch = 1, isTerminator = 1 in {
- let isBarrier = 1 in {
- let isPredicable = 1 in
- def tB : T1I<(outs), (ins brtarget:$target), IIC_Br,
- "b\t$target", [(br bb:$target)]>,
- T1Encoding<{1,1,1,0,0,?}>;
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ let isPredicable = 1 in
+ def tB : T1I<(outs), (ins t_brtarget:$target), IIC_Br,
+ "b\t$target", [(br bb:$target)]>,
+ T1Encoding<{1,1,1,0,0,?}> {
+ bits<11> target;
+ let Inst{10-0} = target;
+ }
// Far jump
+ // Just a pseudo for a tBL instruction. Needed to let regalloc know about
+ // the clobber of LR.
let Defs = [LR] in
- def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br,
- "bl\t$target\t${:comment} far jump",[]>;
-
- def tBR_JTr : T1JTI<(outs),
- (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target\n\t.align\t2$jt",
- [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>,
- Encoding16 {
- let Inst{15-7} = 0b010001101;
- let Inst{2-0} = 0b111;
- }
+ def tBfar : tPseudoInst<(outs), (ins t_bltarget:$target),
+ Size4Bytes, IIC_Br, []>;
+
+ def tBR_JTr : tPseudoInst<(outs),
+ (ins tGPR:$target, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br,
+ [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> {
+ list<Predicate> Predicates = [IsThumb, IsThumb1Only];
}
}
// FIXME: should be able to write a pattern for ARMBrcond, but can't use
// a two-value operand where a dag node expects two operands. :(
let isBranch = 1, isTerminator = 1 in
- def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), IIC_Br,
- "b$cc\t$target",
+ def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br,
+ "b${p}\t$target",
[/*(ARMbrcond bb:$target, imm:$cc)*/]>,
- T1Encoding<{1,1,0,1,?,?}>;
+ T1Encoding<{1,1,0,1,?,?}> {
+ bits<4> p;
+ bits<8> target;
+ let Inst{11-8} = p;
+ let Inst{7-0} = target;
+}
// Compare and branch on zero / non-zero
let isBranch = 1, isTerminator = 1 in {
- def tCBZ : T1I<(outs), (ins tGPR:$cmp, brtarget:$target), IIC_Br,
- "cbz\t$cmp, $target", []>,
- T1Misc<{0,0,?,1,?,?,?}>;
+ def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+ "cbz\t$Rn, $target", []>,
+ T1Misc<{0,0,?,1,?,?,?}> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
- def tCBNZ : T1I<(outs), (ins tGPR:$cmp, brtarget:$target), IIC_Br,
+ def tCBNZ : T1I<(outs), (ins tGPR:$cmp, t_cbtarget:$target), IIC_Br,
"cbnz\t$cmp, $target", []>,
- T1Misc<{1,0,?,1,?,?,?}>;
+ T1Misc<{1,0,?,1,?,?,?}> {
+ // A8.6.27
+ bits<6> target;
+ bits<3> Rn;
+ let Inst{9} = target{5};
+ let Inst{7-3} = target{4-0};
+ let Inst{2-0} = Rn;
+ }
}
// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
// A8.6.16 B: Encoding T1
// If Inst{11-8} == 0b1111 then SEE SVC
-let isCall = 1 in {
-def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>,
- Encoding16 {
+let isCall = 1, Uses = [SP] in
+def tSVC : T1pI<(outs), (ins i32imm:$imm), IIC_Br,
+ "svc", "\t$imm", []>, Encoding16 {
+ bits<8> imm;
let Inst{15-12} = 0b1101;
- let Inst{11-8} = 0b1111;
-}
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = imm;
}
-// A8.6.16 B: Encoding T1
-// If Inst{11-8} == 0b1110 then UNDEFINED
-// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to
-// binutils
+// The assembler uses 0xDEFE for a trap instruction.
let isBarrier = 1, isTerminator = 1 in
def tTRAP : TI<(outs), (ins), IIC_Br,
- ".short 0xdefe ${:comment} trap", [(trap)]>, Encoding16 {
- let Inst{15-12} = 0b1101;
- let Inst{11-8} = 0b1110;
+ "trap", [(trap)]>, Encoding16 {
+ let Inst = 0xdefe;
}
//===----------------------------------------------------------------------===//
// Load Store Instructions.
//
+// Loads: reg/reg and reg/imm5
let canFoldAsLoad = 1, isReMaterializable = 1 in
-def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
- "ldr", "\t$dst, $addr",
- [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>,
- T1LdSt<0b100>;
-def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
- "ldr", "\t$dst, $addr",
- []>,
- T1LdSt4Imm<{1,?,?}>;
-
-def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr,
- "ldrb", "\t$dst, $addr",
- [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>,
- T1LdSt<0b110>;
-def tLDRBi: T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr,
- "ldrb", "\t$dst, $addr",
- []>,
- T1LdSt1Imm<{1,?,?}>;
-
-def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr,
- "ldrh", "\t$dst, $addr",
- [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>,
- T1LdSt<0b101>;
-def tLDRHi: T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr,
- "ldrh", "\t$dst, $addr",
- []>,
- T1LdSt2Imm<{1,?,?}>;
+multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+ Operand AddrMode_r, Operand AddrMode_i,
+ AddrMode am, InstrItinClass itin_r,
+ InstrItinClass itin_i, string asm,
+ PatFrag opnode> {
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+ def i : // reg/imm5
+ T1pILdStEncodeImm<imm_opc, 1 /* Load */,
+ (outs tGPR:$Rt), (ins AddrMode_i:$addr),
+ am, itin_i, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+}
+// Stores: reg/reg and reg/imm5
+multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+ Operand AddrMode_r, Operand AddrMode_i,
+ AddrMode am, InstrItinClass itin_r,
+ InstrItinClass itin_i, string asm,
+ PatFrag opnode> {
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
+ def i : // reg/imm5
+ T1pILdStEncodeImm<imm_opc, 0 /* Store */,
+ (outs), (ins tGPR:$Rt, AddrMode_i:$addr),
+ am, itin_i, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+}
+
+// A8.6.57 & A8.6.60
+defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4,
+ t_addrmode_is4, AddrModeT1_4,
+ IIC_iLoad_r, IIC_iLoad_i, "ldr",
+ UnOpFrag<(load node:$Src)>>;
+
+// A8.6.64 & A8.6.61
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1,
+ t_addrmode_is1, AddrModeT1_1,
+ IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
+ UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// A8.6.76 & A8.6.73
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
+ t_addrmode_is2, AddrModeT1_2,
+ IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
+ UnOpFrag<(zextloadi16 node:$Src)>>;
let AddedComplexity = 10 in
-def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr,
+def tLDRSB : // A8.6.80
+ T1pILdStEncode<0b011, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ AddrModeT1_1, IIC_iLoad_bh_r,
"ldrsb", "\t$dst, $addr",
- [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>,
- T1LdSt<0b011>;
+ [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
let AddedComplexity = 10 in
-def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr,
+def tLDRSH : // A8.6.84
+ T1pILdStEncode<0b111, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ AddrModeT1_2, IIC_iLoad_bh_r,
"ldrsh", "\t$dst, $addr",
- [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>,
- T1LdSt<0b111>;
+ [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
let canFoldAsLoad = 1 in
-def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
- "ldr", "\t$dst, $addr",
- [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>,
- T1LdStSP<{1,?,?}>;
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+ T1LdStSP<{1,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
// Special instruction for restore. It cannot clobber condition register
// when it's expanded by eliminateCallFramePseudoInstr().
let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in
-def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
- "ldr", "\t$dst, $addr", []>,
- T1LdStSP<{1,?,?}>;
+// FIXME: Pseudo for tLDRspi
+def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+ "ldr", "\t$dst, $addr", []>,
+ T1LdStSP<{1,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
// Load tconstpool
// FIXME: Use ldr.n to work around a Darwin assembler bug.
let canFoldAsLoad = 1, isReMaterializable = 1 in
-def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
- "ldr", ".n\t$dst, $addr",
- [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>,
- T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59
-
-// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
- isReMaterializable = 1 in
-def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
- "ldr", "\t$dst, $addr", []>,
- T1LdStSP<{1,?,?}>;
-
-def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer,
- "str", "\t$src, $addr",
- [(store tGPR:$src, t_addrmode_s4:$addr)]>,
- T1LdSt<0b000>;
-def tSTRi: T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer,
- "str", "\t$src, $addr",
- []>,
- T1LdSt4Imm<{0,?,?}>;
-
-def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer,
- "strb", "\t$src, $addr",
- [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>,
- T1LdSt<0b010>;
-def tSTRBi: T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer,
- "strb", "\t$src, $addr",
- []>,
- T1LdSt1Imm<{0,?,?}>;
-
-def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer,
- "strh", "\t$src, $addr",
- [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>,
- T1LdSt<0b001>;
-def tSTRHi: T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer,
- "strh", "\t$src, $addr",
- []>,
- T1LdSt2Imm<{0,?,?}>;
-
-def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
- "str", "\t$src, $addr",
- [(store tGPR:$src, t_addrmode_sp:$addr)]>,
- T1LdStSP<{0,?,?}>;
-
-let mayStore = 1, neverHasSideEffects = 1 in {
-// Special instruction for spill. It cannot clobber condition register
-// when it's expanded by eliminateCallFramePseudoInstr().
-def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", ".n\t$Rt, $addr",
+ [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// A8.6.194 & A8.6.192
+defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
+ t_addrmode_is4, AddrModeT1_4,
+ IIC_iStore_r, IIC_iStore_i, "str",
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+
+// A8.6.197 & A8.6.195
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
+ t_addrmode_is1, AddrModeT1_1,
+ IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// A8.6.207 & A8.6.205
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
+ t_addrmode_is2, AddrModeT1_2,
+ IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
+ BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+
+def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
+ "str", "\t$Rt, $addr",
+ [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
+ T1LdStSP<{0,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+let mayStore = 1, neverHasSideEffects = 1 in
+// Special instruction for spill. It cannot clobber condition register when it's
+// expanded by eliminateCallFramePseudoInstr().
+// FIXME: Pseudo for tSTRspi
+def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i,
"str", "\t$src, $addr", []>,
- T1LdStSP<{0,?,?}>;
+ T1LdStSP<{0,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
}
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
-// These requires base address to be written back or one of the loaded regs.
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def tLDM : T1I<(outs),
- (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
- IIC_iLoadm,
- "ldm${addr:submode}${p}\t$addr, $dsts", []>,
- T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
-
-def tLDM_UPD : T1It<(outs tGPR:$wb),
- (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
- IIC_iLoadm,
- "ldm${addr:submode}${p}\t$addr!, $dsts",
- "$addr.addr = $wb", []>,
- T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
-} // mayLoad, neverHasSideEffects = 1, hasExtraDefRegAllocReq
-
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
-def tSTM_UPD : T1It<(outs tGPR:$wb),
- (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
- IIC_iStorem,
- "stm${addr:submode}${p}\t$addr!, $srcs",
- "$addr.addr = $wb", []>,
- T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189
+multiclass thumb_ldst_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bits<6> T1Enc,
+ bit L_bit> {
+ def IA :
+ T1I<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "ia${p}\t$Rn, $regs"), []>,
+ T1Encoding<T1Enc> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+ }
+ def IA_UPD :
+ T1It<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []>,
+ T1Encoding<T1Enc> {
+ bits<3> Rn;
+ bits<8> regs;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = regs;
+ }
+}
+
+// These require base address to be written back or one of the loaded regs.
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm tLDM : thumb_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu,
+ {1,1,0,0,1,?}, 1>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm tSTM : thumb_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu,
+ {1,1,0,0,0,?}, 0>;
+
+} // neverHasSideEffects
let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
-def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br,
- "pop${p}\t$dsts", []>,
- T1Misc<{1,1,0,?,?,?,?}>;
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iPop,
+ "pop${p}\t$regs", []>,
+ T1Misc<{1,1,0,?,?,?,?}> {
+ bits<16> regs;
+ let Inst{8} = regs{15};
+ let Inst{7-0} = regs{7-0};
+}
let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
-def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_Br,
- "push${p}\t$srcs", []>,
- T1Misc<{0,1,0,?,?,?,?}>;
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+ IIC_iStore_m,
+ "push${p}\t$regs", []>,
+ T1Misc<{0,1,0,?,?,?,?}> {
+ bits<16> regs;
+ let Inst{8} = regs{14};
+ let Inst{7-0} = regs{7-0};
+}
//===----------------------------------------------------------------------===//
// Arithmetic Instructions.
//
+// Helper classes for encoding T1pI patterns:
+class T1pIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1pI<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rm;
+ bits<3> Rn;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rn;
+}
+class T1pIMiscEncode<bits<7> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1pI<oops, iops, itin, opc, asm, pattern>,
+ T1Misc<opA> {
+ bits<3> Rm;
+ bits<3> Rd;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sI patterns:
+class T1sIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rd;
+ bits<3> Rn;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+}
+class T1sIGenEncode<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rm;
+ bits<3> Rn;
+ bits<3> Rd;
+ let Inst{8-6} = Rm;
+ let Inst{5-3} = Rn;
+ let Inst{2-0} = Rd;
+}
+class T1sIGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sI<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rd;
+ bits<3> Rm;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sIt patterns:
+class T1sItDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sIt<oops, iops, itin, opc, asm, pattern>,
+ T1DataProcessing<opA> {
+ bits<3> Rdn;
+ bits<3> Rm;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rdn;
+}
+class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T1sIt<oops, iops, itin, opc, asm, pattern>,
+ T1General<opA> {
+ bits<3> Rdn;
+ bits<8> imm8;
+ let Inst{10-8} = Rdn;
+ let Inst{7-0} = imm8;
+}
+
// Add with carry register
let isCommutable = 1, Uses = [CPSR] in
-def tADC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
- "adc", "\t$dst, $rhs",
- [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>,
- T1DataProcessing<0b0101>;
+def tADC : // A8.6.2
+ T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
+ "adc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>;
// Add immediate
-def tADDi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
- "add", "\t$dst, $lhs, $rhs",
- [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>,
- T1General<0b01110>;
+def tADDi3 : // A8.6.4 T1
+ T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), IIC_iALUi,
+ "add", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+}
-def tADDi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
- "add", "\t$dst, $rhs",
- [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>,
- T1General<{1,1,0,?,?}>;
+def tADDi8 : // A8.6.4 T2
+ T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
+ IIC_iALUi,
+ "add", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>;
// Add register
let isCommutable = 1 in
-def tADDrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
- "add", "\t$dst, $lhs, $rhs",
- [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>,
- T1General<0b01100>;
+def tADDrr : // A8.6.6 T1
+ T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "add", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>;
let neverHasSideEffects = 1 in
-def tADDhirr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
- "add", "\t$dst, $rhs", []>,
- T1Special<{0,0,?,?}>;
+def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
+ "add", "\t$Rdn, $Rm", []>,
+ T1Special<{0,0,?,?}> {
+ // A8.6.6 T2
+ bits<4> Rdn;
+ bits<4> Rm;
+ let Inst{7} = Rdn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rdn{2-0};
+}
-// And register
+// AND register
let isCommutable = 1 in
-def tAND : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
- "and", "\t$dst, $rhs",
- [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>,
- T1DataProcessing<0b0000>;
+def tAND : // A8.6.12
+ T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "and", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>;
// ASR immediate
-def tASRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
- "asr", "\t$dst, $lhs, $rhs",
- [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>,
- T1General<{0,1,0,?,?}>;
+def tASRri : // A8.6.14
+ T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ IIC_iMOVsi,
+ "asr", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
// ASR register
-def tASRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
- "asr", "\t$dst, $rhs",
- [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>,
- T1DataProcessing<0b0100>;
+def tASRrr : // A8.6.15
+ T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "asr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>;
// BIC register
-def tBIC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
- "bic", "\t$dst, $rhs",
- [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>,
- T1DataProcessing<0b1110>;
+def tBIC : // A8.6.20
+ T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "bic", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>;
// CMN register
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
//FIXME: Disable CMN, as CCodes are backwards from compare expectations
// Compare-to-zero still works out, just not the relationals
-//def tCMN : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
-// "cmn", "\t$lhs, $rhs",
-// [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>,
-// T1DataProcessing<0b1011>;
-def tCMNz : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
- "cmn", "\t$lhs, $rhs",
- [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>,
- T1DataProcessing<0b1011>;
-}
+//def tCMN : // A8.6.33
+// T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs),
+// IIC_iCMPr,
+// "cmn", "\t$lhs, $rhs",
+// [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMNz : // A8.6.33
+ T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iCMPr,
+ "cmn", "\t$Rn, $Rm",
+ [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>;
+
+} // isCompare = 1, Defs = [CPSR]
// CMP immediate
-let Defs = [CPSR] in {
-def tCMPi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi,
- "cmp", "\t$lhs, $rhs",
- [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>,
- T1General<{1,0,1,?,?}>;
-def tCMPzi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi,
- "cmp", "\t$lhs, $rhs",
- [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>,
- T1General<{1,0,1,?,?}>;
+let isCompare = 1, Defs = [CPSR] in {
+def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, i32imm:$imm8), IIC_iCMPi,
+ "cmp", "\t$Rn, $imm8",
+ [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
+ T1General<{1,0,1,?,?}> {
+ // A8.6.35
+ bits<3> Rn;
+ bits<8> imm8;
+ let Inst{10-8} = Rn;
+ let Inst{7-0} = imm8;
}
// CMP register
-let Defs = [CPSR] in {
-def tCMPr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
- "cmp", "\t$lhs, $rhs",
- [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>,
- T1DataProcessing<0b1010>;
-def tCMPzr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
- "cmp", "\t$lhs, $rhs",
- [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>,
- T1DataProcessing<0b1010>;
-
-def tCMPhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
- "cmp", "\t$lhs, $rhs", []>,
- T1Special<{0,1,?,?}>;
-def tCMPzhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
- "cmp", "\t$lhs, $rhs", []>,
- T1Special<{0,1,?,?}>;
+def tCMPr : // A8.6.36 T1
+ T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iCMPr,
+ "cmp", "\t$Rn, $Rm",
+ [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>;
+
+def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
+ "cmp", "\t$Rn, $Rm", []>,
+ T1Special<{0,1,?,?}> {
+ // A8.6.36 T2
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{7} = Rn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rn{2-0};
}
+} // isCompare = 1, Defs = [CPSR]
// XOR register
let isCommutable = 1 in
-def tEOR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
- "eor", "\t$dst, $rhs",
- [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>,
- T1DataProcessing<0b0001>;
+def tEOR : // A8.6.45
+ T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "eor", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>;
// LSL immediate
-def tLSLri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
- "lsl", "\t$dst, $lhs, $rhs",
- [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>,
- T1General<{0,0,0,?,?}>;
+def tLSLri : // A8.6.88
+ T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ IIC_iMOVsi,
+ "lsl", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
// LSL register
-def tLSLrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
- "lsl", "\t$dst, $rhs",
- [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>,
- T1DataProcessing<0b0010>;
+def tLSLrr : // A8.6.89
+ T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "lsl", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>;
// LSR immediate
-def tLSRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
- "lsr", "\t$dst, $lhs, $rhs",
- [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>,
- T1General<{0,0,1,?,?}>;
+def tLSRri : // A8.6.90
+ T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+ IIC_iMOVsi,
+ "lsr", "\t$Rd, $Rm, $imm5",
+ [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm:$imm5)))]> {
+ bits<5> imm5;
+ let Inst{10-6} = imm5;
+}
// LSR register
-def tLSRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
- "lsr", "\t$dst, $rhs",
- [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>,
- T1DataProcessing<0b0011>;
-
-// move register
-def tMOVi8 : T1sI<(outs tGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
- "mov", "\t$dst, $src",
- [(set tGPR:$dst, imm0_255:$src)]>,
- T1General<{1,0,0,?,?}>;
+def tLSRrr : // A8.6.91
+ T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "lsr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move register
+let isMoveImm = 1 in
+def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi,
+ "mov", "\t$Rd, $imm8",
+ [(set tGPR:$Rd, imm0_255:$imm8)]>,
+ T1General<{1,0,0,?,?}> {
+ // A8.6.96
+ bits<3> Rd;
+ bits<8> imm8;
+ let Inst{10-8} = Rd;
+ let Inst{7-0} = imm8;
+}
// TODO: A7-73: MOV(2) - mov setting flag.
-
let neverHasSideEffects = 1 in {
// FIXME: Make this predicable.
-def tMOVr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
- "mov\t$dst, $src", []>,
- T1Special<0b1000>;
+def tMOVr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+ "mov\t$Rd, $Rm", []>,
+ T1Special<0b1000> {
+ // A8.6.97
+ bits<4> Rd;
+ bits<4> Rm;
+ // Bits {7-6} are encoded by the T1Special value.
+ let Inst{5-3} = Rm{2-0};
+ let Inst{2-0} = Rd{2-0};
+}
let Defs = [CPSR] in
-def tMOVSr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
- "movs\t$dst, $src", []>, Encoding16 {
+def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+ "movs\t$Rd, $Rm", []>, Encoding16 {
+ // A8.6.97
+ bits<3> Rd;
+ bits<3> Rm;
let Inst{15-6} = 0b0000000000;
+ let Inst{5-3} = Rm;
+ let Inst{2-0} = Rd;
}
// FIXME: Make these predicable.
-def tMOVgpr2tgpr : T1I<(outs tGPR:$dst), (ins GPR:$src), IIC_iMOVr,
- "mov\t$dst, $src", []>,
- T1Special<{1,0,0,?}>;
-def tMOVtgpr2gpr : T1I<(outs GPR:$dst), (ins tGPR:$src), IIC_iMOVr,
- "mov\t$dst, $src", []>,
- T1Special<{1,0,?,0}>;
-def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
- "mov\t$dst, $src", []>,
- T1Special<{1,0,?,?}>;
+def tMOVgpr2tgpr : T1I<(outs tGPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+ "mov\t$Rd, $Rm", []>,
+ T1Special<{1,0,0,?}> {
+ // A8.6.97
+ bits<4> Rd;
+ bits<4> Rm;
+ // Bit {7} is encoded by the T1Special value.
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rd{2-0};
+}
+def tMOVtgpr2gpr : T1I<(outs GPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+ "mov\t$Rd, $Rm", []>,
+ T1Special<{1,0,?,0}> {
+ // A8.6.97
+ bits<4> Rd;
+ bits<4> Rm;
+ // Bit {6} is encoded by the T1Special value.
+ let Inst{7} = Rd{3};
+ let Inst{5-3} = Rm{2-0};
+ let Inst{2-0} = Rd{2-0};
+}
+def tMOVgpr2gpr : T1I<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+ "mov\t$Rd, $Rm", []>,
+ T1Special<{1,0,?,?}> {
+ // A8.6.97
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{7} = Rd{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rd{2-0};
+}
} // neverHasSideEffects
-// multiply register
+// Multiply register
let isCommutable = 1 in
-def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32,
- "mul", "\t$dst, $rhs, $dst", /* A8.6.105 MUL Encoding T1 */
- [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>,
- T1DataProcessing<0b1101>;
-
-// move inverse register
-def tMVN : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
- "mvn", "\t$dst, $src",
- [(set tGPR:$dst, (not tGPR:$src))]>,
- T1DataProcessing<0b1111>;
-
-// bitwise or register
+def tMUL : // A8.6.105 T1
+ T1sItDPEncode<0b1101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMUL32,
+ "mul", "\t$Rdn, $Rm, $Rdn",
+ [(set tGPR:$Rdn, (mul tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move inverse register
+def tMVN : // A8.6.107
+ T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr,
+ "mvn", "\t$Rd, $Rn",
+ [(set tGPR:$Rd, (not tGPR:$Rn))]>;
+
+// Bitwise or register
let isCommutable = 1 in
-def tORR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
- "orr", "\t$dst, $rhs",
- [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>,
- T1DataProcessing<0b1100>;
-
-// swaps
-def tREV : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
- "rev", "\t$dst, $src",
- [(set tGPR:$dst, (bswap tGPR:$src))]>,
- Requires<[IsThumb1Only, HasV6]>,
- T1Misc<{1,0,1,0,0,0,?}>;
-
-def tREV16 : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
- "rev16", "\t$dst, $src",
- [(set tGPR:$dst,
- (or (and (srl tGPR:$src, (i32 8)), 0xFF),
- (or (and (shl tGPR:$src, (i32 8)), 0xFF00),
- (or (and (srl tGPR:$src, (i32 8)), 0xFF0000),
- (and (shl tGPR:$src, (i32 8)), 0xFF000000)))))]>,
- Requires<[IsThumb1Only, HasV6]>,
- T1Misc<{1,0,1,0,0,1,?}>;
-
-def tREVSH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
- "revsh", "\t$dst, $src",
- [(set tGPR:$dst,
- (sext_inreg
- (or (srl (and tGPR:$src, 0xFF00), (i32 8)),
- (shl tGPR:$src, (i32 8))), i16))]>,
- Requires<[IsThumb1Only, HasV6]>,
- T1Misc<{1,0,1,0,1,1,?}>;
-
-// rotate right register
-def tROR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr,
- "ror", "\t$dst, $rhs",
- [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>,
- T1DataProcessing<0b0111>;
-
-// negate register
-def tRSB : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALUi,
- "rsb", "\t$dst, $src, #0",
- [(set tGPR:$dst, (ineg tGPR:$src))]>,
- T1DataProcessing<0b1001>;
+def tORR : // A8.6.114
+ T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iBITr,
+ "orr", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>;
+
+// Swaps
+def tREV : // A8.6.134
+ T1pIMiscEncode<{1,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "rev", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (bswap tGPR:$Rm))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREV16 : // A8.6.135
+ T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "rev16", "\t$Rd, $Rm",
+ [(set tGPR:$Rd,
+ (or (and (srl tGPR:$Rm, (i32 8)), 0xFF),
+ (or (and (shl tGPR:$Rm, (i32 8)), 0xFF00),
+ (or (and (srl tGPR:$Rm, (i32 8)), 0xFF0000),
+ (and (shl tGPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREVSH : // A8.6.136
+ T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "revsh", "\t$Rd, $Rm",
+ [(set tGPR:$Rd,
+ (sext_inreg
+ (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)),
+ (shl tGPR:$Rm, (i32 8))), i16))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Rotate right register
+def tROR : // A8.6.139
+ T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iMOVsr,
+ "ror", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>;
+
+// Negate register
+def tRSB : // A8.6.141
+ T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn),
+ IIC_iALUi,
+ "rsb", "\t$Rd, $Rn, #0",
+ [(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
// Subtract with carry register
let Uses = [CPSR] in
-def tSBC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
- "sbc", "\t$dst, $rhs",
- [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>,
- T1DataProcessing<0b0110>;
+def tSBC : // A8.6.151
+ T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "sbc", "\t$Rdn, $Rm",
+ [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>;
// Subtract immediate
-def tSUBi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
- "sub", "\t$dst, $lhs, $rhs",
- [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>,
- T1General<0b01111>;
-
-def tSUBi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi,
- "sub", "\t$dst, $rhs",
- [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>,
- T1General<{1,1,1,?,?}>;
-
-// subtract register
-def tSUBrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
- "sub", "\t$dst, $lhs, $rhs",
- [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>,
- T1General<0b01101>;
+def tSUBi3 : // A8.6.210 T1
+ T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+ IIC_iALUi,
+ "sub", "\t$Rd, $Rm, $imm3",
+ [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> {
+ bits<3> imm3;
+ let Inst{8-6} = imm3;
+}
-// TODO: A7-96: STMIA - store multiple.
+def tSUBi8 : // A8.6.210 T2
+ T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
+ IIC_iALUi,
+ "sub", "\t$Rdn, $imm8",
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>;
+
+// Subtract register
+def tSUBrr : // A8.6.212
+ T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ IIC_iALUr,
+ "sub", "\t$Rd, $Rn, $Rm",
+ [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>;
-// sign-extend byte
-def tSXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
- "sxtb", "\t$dst, $src",
- [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>,
- Requires<[IsThumb1Only, HasV6]>,
- T1Misc<{0,0,1,0,0,1,?}>;
-
-// sign-extend short
-def tSXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
- "sxth", "\t$dst, $src",
- [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>,
- Requires<[IsThumb1Only, HasV6]>,
- T1Misc<{0,0,1,0,0,0,?}>;
-
-// test
-let isCommutable = 1, Defs = [CPSR] in
-def tTST : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
- "tst", "\t$lhs, $rhs",
- [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>,
- T1DataProcessing<0b1000>;
-
-// zero-extend byte
-def tUXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
- "uxtb", "\t$dst, $src",
- [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>,
- Requires<[IsThumb1Only, HasV6]>,
- T1Misc<{0,0,1,0,1,1,?}>;
-
-// zero-extend short
-def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
- "uxth", "\t$dst, $src",
- [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>,
- Requires<[IsThumb1Only, HasV6]>,
- T1Misc<{0,0,1,0,1,0,?}>;
+// TODO: A7-96: STMIA - store multiple.
+// Sign-extend byte
+def tSXTB : // A8.6.222
+ T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "sxtb", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Sign-extend short
+def tSXTH : // A8.6.224
+ T1pIMiscEncode<{0,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "sxth", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Test
+let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
+def tTST : // A8.6.230
+ T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
+ "tst", "\t$Rn, $Rm",
+ [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>;
+
+// Zero-extend byte
+def tUXTB : // A8.6.262
+ T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "uxtb", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Zero-extend short
+def tUXTH : // A8.6.264
+ T1pIMiscEncode<{0,0,1,0,1,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+ IIC_iUNAr,
+ "uxth", "\t$Rd, $Rm",
+ [(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
// Expanded after instruction selection into a branch sequence.
let usesCustomInserter = 1 in // Expanded after instruction selection.
def tMOVCCr_pseudo :
PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
- NoItinerary, "${:comment} tMOVCCr $cc",
+ NoItinerary,
[/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
// 16-bit movcc in IT blocks for Thumb2.
let neverHasSideEffects = 1 in {
-def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
- "mov", "\t$dst, $rhs", []>,
- T1Special<{1,0,?,?}>;
+def tMOVCCr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iCMOVr,
+ "mov", "\t$Rdn, $Rm", []>,
+ T1Special<{1,0,?,?}> {
+ bits<4> Rdn;
+ bits<4> Rm;
+ let Inst{7} = Rdn{3};
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = Rdn{2-0};
+}
+
+let isMoveImm = 1 in
+def tMOVCCi : T1pIt<(outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$Rm), IIC_iCMOVi,
+ "mov", "\t$Rdn, $Rm", []>,
+ T1General<{1,0,0,?,?}> {
+ bits<3> Rdn;
+ bits<8> Rm;
+ let Inst{10-8} = Rdn;
+ let Inst{7-0} = Rm;
+}
-def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi,
- "mov", "\t$dst, $rhs", []>,
- T1General<{1,0,0,?,?}>;
} // neverHasSideEffects
// tLEApcrel - Load a pc-relative address into a register without offending the
// assembler.
-let neverHasSideEffects = 1 in {
-let isReMaterializable = 1 in
-def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
- "adr$p\t$dst, #$label", []>,
- T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
-} // neverHasSideEffects
-def tLEApcrelJT : T1I<(outs tGPR:$dst),
- (ins i32imm:$label, nohash_imm:$id, pred:$p),
- IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>,
- T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
+def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
+ IIC_iALUi, "adr{$p}\t$Rd, #$addr", []>,
+ T1Encoding<{1,0,1,0,0,?}> {
+ bits<3> Rd;
+ bits<8> addr;
+ let Inst{10-8} = Rd;
+ let Inst{7-0} = addr;
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
+ Size2Bytes, IIC_iALUi, []>;
+
+def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ Size2Bytes, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
+
+class tMovRCopro<string opc, bit direction>
+ : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-24} = 0b1110;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>;
+def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>;
+
+class tMovRRCopro<string opc, bit direction>
+ : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"),
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-24} = 0b1100;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def tMCRR : tMovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */>;
+def tMRRC : tMovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions. For disassembly only.
+//
+def tCDP : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-24} = 0b1110;
+
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
+}
//===----------------------------------------------------------------------===//
// TLS Instructions
//
// __aeabi_read_tp preserves the registers r1-r3.
-let isCall = 1,
- Defs = [R0, LR] in {
- def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br,
- "bl\t__aeabi_read_tp",
- [(set R0, ARMthread_pointer)]>;
+let isCall = 1, Defs = [R0, LR], Uses = [SP] in
+def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br,
+ "bl\t__aeabi_read_tp",
+ [(set R0, ARMthread_pointer)]> {
+ // Encoding is 0xf7fffffe.
+ let Inst = 0xf7fffffe;
}
+//===----------------------------------------------------------------------===//
// SJLJ Exception handling intrinsics
-// eh_sjlj_setjmp() is an instruction sequence to store the return
-// address and save #0 in R0 for the non-longjmp case.
-// Since by its nature we may be coming from some other function to get
-// here, and we're using the stack frame for the containing function to
-// save/restore registers, we can't keep anything live in regs across
-// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
-// when we get here from a longjmp(). We force everthing out of registers
-// except for our own input by listing the relevant registers in Defs. By
-// doing so, we also cause the prologue/epilogue code to actively preserve
-// all of the callee-saved resgisters, which is exactly what we want.
-// $val is a scratch register for our use.
-let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ], hasSideEffects = 1,
- isBarrier = 1 in {
- def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
- AddrModeNone, SizeSpecial, NoItinerary,
- "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
- "adds\t$val, #7\n\t"
- "str\t$val, [$src, #4]\n\t"
- "movs\tr0, #0\n\t"
- "b\t1f\n\t"
- "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
- "1:", "",
- [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
-}
+//
+
+// eh_sjlj_setjmp() is an instruction sequence to store the return address and
+// save #0 in R0 for the non-longjmp case. Since by its nature we may be coming
+// from some other function to get here, and we're using the stack frame for the
+// containing function to save/restore registers, we can't keep anything live in
+// regs across the eh_sjlj_setjmp(), else it will almost certainly have been
+// tromped upon when we get here from a longjmp(). We force everthing out of
+// registers except for our own input by listing the relevant registers in
+// Defs. By doing so, we also cause the prologue/epilogue code to actively
+// preserve all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ],
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in
+def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
+ AddrModeNone, SizeSpecial, NoItinerary, "","",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
// FIXME: Non-Darwin version(s)
-let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
- Defs = [ R7, LR, SP ] in {
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
+ Defs = [ R7, LR, SP ] in
def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
- AddrModeNone, SizeSpecial, IndexModeNone,
- Pseudo, NoItinerary,
- "ldr\t$scratch, [$src, #8]\n\t"
- "mov\tsp, $scratch\n\t"
- "ldr\t$scratch, [$src, #4]\n\t"
- "ldr\tr7, [$src]\n\t"
- "bx\t$scratch", "",
- [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
- Requires<[IsThumb, IsDarwin]>;
-}
+ AddrModeNone, SizeSpecial, IndexModeNone,
+ Pseudo, NoItinerary, "", "",
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsThumb, IsDarwin]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
+// Comparisons
+def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
+ (tCMPi8 tGPR:$Rn, imm0_255:$imm8)>;
+def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
+ (tCMPr tGPR:$Rn, tGPR:$Rm)>;
+
// Add with carry
def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs),
(tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
@@ -991,27 +1492,42 @@ def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>,
Requires<[IsThumb, HasV5T, IsDarwin]>;
// zextload i1 -> zextload i8
-def : T1Pat<(zextloadi1 t_addrmode_s1:$addr),
- (tLDRB t_addrmode_s1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
+ (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
+ (tLDRBi t_addrmode_is1:$addr)>;
// extload -> zextload
-def : T1Pat<(extloadi1 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>;
-def : T1Pat<(extloadi8 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>;
-def : T1Pat<(extloadi16 t_addrmode_s2:$addr), (tLDRH t_addrmode_s2:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
// If it's impossible to use [r,r] address mode for sextload, select to
// ldr{b|h} + sxt{b|h} instead.
-def : T1Pat<(sextloadi8 t_addrmode_s1:$addr),
- (tSXTB (tLDRB t_addrmode_s1:$addr))>,
- Requires<[IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi16 t_addrmode_s2:$addr),
- (tSXTH (tLDRH t_addrmode_s2:$addr))>,
- Requires<[IsThumb1Only, HasV6]>;
-
-def : T1Pat<(sextloadi8 t_addrmode_s1:$addr),
- (tASRri (tLSLri (tLDRB t_addrmode_s1:$addr), 24), 24)>;
-def : T1Pat<(sextloadi16 t_addrmode_s1:$addr),
- (tASRri (tLSLri (tLDRH t_addrmode_s1:$addr), 16), 16)>;
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+ (tSXTB (tLDRBi t_addrmode_is1:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+ (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+ (tSXTH (tLDRHi t_addrmode_is2:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+ (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+ (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+ (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+ (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+ (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
// Large immediate handling.
@@ -1028,8 +1544,7 @@ def : T1Pat<(i32 imm0_255_comp:$src),
// scheduling.
let isReMaterializable = 1 in
def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary,
- "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ NoItinerary,
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
- Requires<[IsThumb1Only]>;
+ Requires<[IsThumb, IsThumb1Only]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 6ba0a44..0e01be5 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -21,16 +21,12 @@ def it_mask : Operand<i32> {
let PrintMethod = "printThumbITMask";
}
-// Table branch address
-def tb_addrmode : Operand<i32> {
- let PrintMethod = "printTBAddrMode";
-}
-
// Shifted operands. No register controlled shifts for Thumb2.
// Note: We do not support rrx shifted operands yet.
def t2_so_reg : Operand<i32>, // reg imm
ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
[shl,srl,sra,rotr]> {
+ let EncoderMethod = "getT2SORegOpValue";
let PrintMethod = "printT2SOOperand";
let MIOperandInfo = (ops rGPR, i32imm);
}
@@ -47,11 +43,10 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
// t2_so_imm - Match a 32-bit immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
-// immediate splatted into multiple bytes of the word. t2_so_imm values are
-// represented in the imm field in the same 12-bit form that they are encoded
-// into t2_so_imm instructions: the 8-bit immediate is the least significant
-// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
-def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]>;
+// immediate splatted into multiple bytes of the word.
+def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]> {
+ let EncoderMethod = "getT2SOImmOpValue";
+}
// t2_so_imm_not - Match an immediate that is a complement
// of a t2_so_imm.
@@ -63,7 +58,7 @@ def t2_so_imm_not : Operand<i32>,
// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
def t2_so_imm_neg : Operand<i32>,
PatLeaf<(imm), [{
- return ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())) != -1;
+ return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
}], t2_so_imm_neg_XFORM>;
// Break t2_so_imm's up into two pieces. This handles immediates with up to 16
@@ -128,27 +123,41 @@ def imm0_255_not : PatLeaf<(i32 imm), [{
// t2addrmode_imm12 := reg + imm12
def t2addrmode_imm12 : Operand<i32>,
ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
- let PrintMethod = "printT2AddrModeImm12Operand";
+ let PrintMethod = "printAddrModeImm12Operand";
+ let EncoderMethod = "getAddrModeImm12OpValue";
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemMode5AsmOperand;
}
+// ADR instruction labels.
+def t2adrlabel : Operand<i32> {
+ let EncoderMethod = "getT2AdrLabelOpValue";
+}
+
+
// t2addrmode_imm8 := reg +/- imm8
def t2addrmode_imm8 : Operand<i32>,
ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
let PrintMethod = "printT2AddrModeImm8Operand";
+ let EncoderMethod = "getT2AddrModeImm8OpValue";
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemMode5AsmOperand;
}
def t2am_imm8_offset : Operand<i32>,
- ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset", []>{
+ ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
+ [], [SDNPWantRoot]> {
let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+ let EncoderMethod = "getT2AddrModeImm8OffsetOpValue";
+ let ParserMatchClass = MemMode5AsmOperand;
}
// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
-def t2addrmode_imm8s4 : Operand<i32>,
- ComplexPattern<i32, 2, "SelectT2AddrModeImm8s4", []> {
+def t2addrmode_imm8s4 : Operand<i32> {
let PrintMethod = "printT2AddrModeImm8s4Operand";
+ let EncoderMethod = "getT2AddrModeImm8s4OpValue";
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+ let ParserMatchClass = MemMode5AsmOperand;
}
def t2am_imm8s4_offset : Operand<i32> {
@@ -159,7 +168,9 @@ def t2am_imm8s4_offset : Operand<i32> {
def t2addrmode_so_reg : Operand<i32>,
ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
let PrintMethod = "printT2AddrModeSoRegOperand";
+ let EncoderMethod = "getT2AddrModeSORegOpValue";
let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
+ let ParserMatchClass = MemMode5AsmOperand;
}
@@ -167,45 +178,294 @@ def t2addrmode_so_reg : Operand<i32>,
// Multiclass helpers...
//
+
+class T2OneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+
+class T2sOneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2OneRegCmpImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+
+class T2OneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2sOneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2OneRegCmpShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2TwoReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2sTwoReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2TwoRegCmp<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+
+class T2TwoRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2sTwoRegImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+class T2TwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<5> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+}
+
+class T2sTwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<5> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+}
+
+class T2ThreeReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2sThreeReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
+class T2TwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2sTwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2sI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> ShiftedRm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = ShiftedRm{3-0};
+ let Inst{5-4} = ShiftedRm{6-5};
+ let Inst{14-12} = ShiftedRm{11-9};
+ let Inst{7-6} = ShiftedRm{8-7};
+}
+
+class T2FourReg<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
+
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rd;
+ let Inst{3-0} = Rm;
+}
+
+class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-23} = 0b111110111;
+ let Inst{22-20} = opc22_20;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = RdHi;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+
/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
/// unary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
-multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Cheap = 0, bit ReMat = 0> {
+multiclass T2I_un_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
// shifted imm
- def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
- opc, "\t$dst, $src",
- [(set rGPR:$dst, (opnode t2_so_imm:$src))]> {
+ def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
let isAsCheapAsAMove = Cheap;
let isReMaterializable = ReMat;
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
- let Inst{20} = ?; // The S bit.
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
}
// register
- def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVr,
- opc, ".w\t$dst, $src",
- [(set rGPR:$dst, (opnode rGPR:$src))]> {
+ def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
+ opc, ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
- let Inst{20} = ?; // The S bit.
let Inst{19-16} = 0b1111; // Rn
let Inst{14-12} = 0b000; // imm3
let Inst{7-6} = 0b00; // imm2
let Inst{5-4} = 0b00; // type
}
// shifted register
- def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
- opc, ".w\t$dst, $src",
- [(set rGPR:$dst, (opnode t2_so_reg:$src))]> {
+ def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rd, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
- let Inst{20} = ?; // The S bit.
let Inst{19-16} = 0b1111; // Rn
}
}
@@ -213,94 +473,97 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
/// binary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
-multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0, string wide = ""> {
+multiclass T2I_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0, string wide = ""> {
// shifted imm
- def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
- opc, "\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]> {
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
+ opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
- let Inst{20} = ?; // The S bit.
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
- opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
- [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir,
+ opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"),
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
- let Inst{20} = ?; // The S bit.
let Inst{14-12} = 0b000; // imm3
let Inst{7-6} = 0b00; // imm2
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
- opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
- [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]> {
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"),
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
- let Inst{20} = ?; // The S bit.
}
}
/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
// the ".w" prefix to indicate that they are wide.
-multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0> :
- T2I_bin_irs<opcod, opc, opnode, Commutable, ".w">;
+multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> :
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w">;
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
/// reversed. The 'rr' form is only defined for the disassembler; for codegen
/// it is equivalent to the T2I_bin_irs counterpart.
multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
- opc, ".w\t$dst, $rhs, $lhs",
- [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> {
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
- let Inst{20} = ?; // The S bit.
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, rGPR:$lhs), IIC_iALUr,
- opc, "\t$dst, $rhs, $lhs",
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, "\t$Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
- let Inst{20} = ?; // The S bit.
let Inst{14-12} = 0b000; // imm3
let Inst{7-6} = 0b00; // imm2
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
- opc, "\t$dst, $rhs, $lhs",
- [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
- let Inst{20} = ?; // The S bit.
}
}
/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
/// instruction modifies the CPSR register.
-let Defs = [CPSR] in {
-multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0> {
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
// shifted imm
- def ri : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
- !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ def ri : T2TwoRegImm<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+ !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -308,9 +571,10 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2I<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
- !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
+ def rr : T2ThreeReg<
+ (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir,
+ !strconcat(opc, "s"), ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -321,9 +585,10 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
- !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ def rs : T2TwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ !strconcat(opc, "s"), ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -340,51 +605,58 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
- def ri : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
- opc, ".w\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24} = 1;
let Inst{23-21} = op23_21;
- let Inst{20} = 0; // The S bit.
let Inst{15} = 0;
}
}
// 12-bit imm
- def ri12 : T2I<(outs rGPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
- !strconcat(opc, "w"), "\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> {
+ def ri12 : T2I<
+ (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
+ !strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
let Inst{31-27} = 0b11110;
- let Inst{25} = 1;
- let Inst{24} = 0;
+ let Inst{26} = imm{11};
+ let Inst{25-24} = 0b10;
let Inst{23-21} = op23_21;
let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = Rn;
let Inst{15} = 0;
+ let Inst{14-12} = imm{10-8};
+ let Inst{11-8} = Rd;
+ let Inst{7-0} = imm{7-0};
}
// register
- def rr : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
- opc, ".w\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24} = 1;
let Inst{23-21} = op23_21;
- let Inst{20} = 0; // The S bit.
let Inst{14-12} = 0b000; // imm3
let Inst{7-6} = 0b00; // imm2
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
- opc, ".w\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24} = 1;
let Inst{23-21} = op23_21;
- let Inst{20} = 0; // The S bit.
}
}
@@ -395,50 +667,49 @@ let Uses = [CPSR] in {
multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
- opc, "\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>,
+ def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+ IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
- let Inst{20} = 0; // The S bit.
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
- opc, ".w\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>,
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
- let Inst{20} = 0; // The S bit.
let Inst{14-12} = 0b000; // imm3
let Inst{7-6} = 0b00; // imm2
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
- opc, ".w\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>,
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
- let Inst{20} = 0; // The S bit.
}
}
// Carry setting variants
-let Defs = [CPSR] in {
+let isCodeGenOnly = 1, Defs = [CPSR] in {
multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
- opc, "\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>,
+ def ri : T2sTwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ opc, "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -447,9 +718,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
- opc, ".w\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>,
+ def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
@@ -461,9 +732,10 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
- opc, ".w\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>,
+ def rs : T2sTwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -476,12 +748,13 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
/// version is not needed since this is only for codegen.
-let Defs = [CPSR] in {
+let isCodeGenOnly = 1, Defs = [CPSR] in {
multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
- !strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs",
- [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> {
+ def ri : T2TwoRegImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+ !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -489,9 +762,10 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{15} = 0;
}
// shifted register
- def rs : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
- !strconcat(opc, "s"), "\t$dst, $rhs, $lhs",
- [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
+ def rs : T2TwoRegShiftedReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, !strconcat(opc, "s"), "\t$Rd, $Rn, $ShiftedRm",
+ [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -504,18 +778,20 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// rotate operation that produces a value.
multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
// 5-bit imm
- def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
- opc, ".w\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode rGPR:$lhs, imm1_31:$rhs))]> {
+ def ri : T2sTwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$imm), IIC_iMOVsi,
+ opc, ".w\t$Rd, $Rm, $imm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm, imm1_31:$imm))]> {
let Inst{31-27} = 0b11101;
let Inst{26-21} = 0b010010;
let Inst{19-16} = 0b1111; // Rn
let Inst{5-4} = opcod;
}
// register
- def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iMOVsr,
- opc, ".w\t$dst, $lhs, $rhs",
- [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
+ def rr : T2sThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr,
+ opc, ".w\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-21} = opcod;
@@ -528,11 +804,14 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
/// patterns. Similar to T2I_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
let isCompare = 1, Defs = [CPSR] in {
-multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
+multiclass T2I_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode> {
// shifted imm
- def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi,
- opc, ".w\t$lhs, $rhs",
- [(opnode GPR:$lhs, t2_so_imm:$rhs)]> {
+ def ri : T2OneRegCmpImm<
+ (outs), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+ opc, ".w\t$Rn, $imm",
+ [(opnode GPR:$Rn, t2_so_imm:$imm)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -541,7 +820,8 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{11-8} = 0b1111; // Rd
}
// register
- def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), IIC_iCMPr,
+ def rr : T2TwoRegCmp<
+ (outs), (ins GPR:$lhs, rGPR:$rhs), iir,
opc, ".w\t$lhs, $rhs",
[(opnode GPR:$lhs, rGPR:$rhs)]> {
let Inst{31-27} = 0b11101;
@@ -554,9 +834,10 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iCMPsi,
- opc, ".w\t$lhs, $rhs",
- [(opnode GPR:$lhs, t2_so_reg:$rhs)]> {
+ def rs : T2OneRegCmpShiftedReg<
+ (outs), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+ opc, ".w\t$Rn, $ShiftedRm",
+ [(opnode GPR:$Rn, t2_so_reg:$ShiftedRm)]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -567,20 +848,29 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
}
/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
-multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> {
- def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), IIC_iLoadi,
- opc, ".w\t$dst, $addr",
- [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]> {
+multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
+ def i12 : T2Ii12<(outs GPR:$Rt), (ins t2addrmode_imm12:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode t2addrmode_imm12:$addr))]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
let Inst{23} = 1;
let Inst{22-21} = opcod;
let Inst{20} = 1; // load
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<17> addr;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm
}
- def i8 : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi,
- opc, "\t$dst, $addr",
- [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]> {
+ def i8 : T2Ii8 <(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode t2addrmode_imm8:$addr))]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
@@ -591,10 +881,18 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> {
// Offset: index==TRUE, wback==FALSE
let Inst{10} = 1; // The P bit.
let Inst{8} = 0; // The W bit.
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{9} = addr{8}; // U
+ let Inst{7-0} = addr{7-0}; // imm
}
- def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), IIC_iLoadr,
- opc, ".w\t$dst, $addr",
- [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]> {
+ def s : T2Iso <(outs GPR:$Rt), (ins t2addrmode_so_reg:$addr), iis,
+ opc, ".w\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
@@ -602,10 +900,20 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> {
let Inst{22-21} = opcod;
let Inst{20} = 1; // load
let Inst{11-6} = 0b000000;
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm
}
- def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
- opc, ".w\t$dst, $addr",
- [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]> {
+
+ // FIXME: Is the pci variant actually needed?
+ def pci : T2Ipc <(outs GPR:$Rt), (ins i32imm:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
let isReMaterializable = 1;
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
@@ -614,22 +922,35 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> {
let Inst{22-21} = opcod;
let Inst{20} = 1; // load
let Inst{19-16} = 0b1111; // Rn
+ bits<4> Rt;
+ bits<12> addr;
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-0} = addr{11-0};
}
}
/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
-multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
- def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), IIC_iStorei,
- opc, ".w\t$src, $addr",
- [(opnode GPR:$src, t2addrmode_imm12:$addr)]> {
+multiclass T2I_st<bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
+ def i12 : T2Ii12<(outs), (ins GPR:$Rt, t2addrmode_imm12:$addr), iii,
+ opc, ".w\t$Rt, $addr",
+ [(opnode GPR:$Rt, t2addrmode_imm12:$addr)]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0001;
let Inst{22-21} = opcod;
let Inst{20} = 0; // !load
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<17> addr;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm
}
- def i8 : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), IIC_iStorei,
- opc, "\t$src, $addr",
- [(opnode GPR:$src, t2addrmode_imm8:$addr)]> {
+ def i8 : T2Ii8 <(outs), (ins GPR:$Rt, t2addrmode_imm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(opnode GPR:$Rt, t2addrmode_imm8:$addr)]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
let Inst{22-21} = opcod;
@@ -638,24 +959,40 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
// Offset: index==TRUE, wback==FALSE
let Inst{10} = 1; // The P bit.
let Inst{8} = 0; // The W bit.
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{9} = addr{8}; // U
+ let Inst{7-0} = addr{7-0}; // imm
}
- def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), IIC_iStorer,
- opc, ".w\t$src, $addr",
- [(opnode GPR:$src, t2addrmode_so_reg:$addr)]> {
+ def s : T2Iso <(outs), (ins GPR:$Rt, t2addrmode_so_reg:$addr), iis,
+ opc, ".w\t$Rt, $addr",
+ [(opnode GPR:$Rt, t2addrmode_so_reg:$addr)]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
let Inst{22-21} = opcod;
let Inst{20} = 0; // !load
let Inst{11-6} = 0b000000;
+
+ bits<4> Rt;
+ let Inst{15-12} = Rt;
+
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm
}
}
-/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
+/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
- opc, ".w\t$dst, $src",
- [(set rGPR:$dst, (opnode rGPR:$src))]> {
+multiclass T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+ def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+ opc, ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -664,25 +1001,27 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
- opc, ".w\t$dst, $src, ror $rot",
- [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]> {
+ def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+ opc, ".w\t$Rd, $Rm, ror $rot",
+ [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
let Inst{19-16} = 0b1111; // Rn
let Inst{15-12} = 0b1111;
let Inst{7} = 1;
- let Inst{5-4} = {?,?}; // rotate
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
}
}
// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
-multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
- opc, "\t$dst, $src",
- [(set rGPR:$dst, (opnode rGPR:$src))]>,
- Requires<[HasT2ExtractPack]> {
+multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
+ def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+ opc, "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rm))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -691,25 +1030,27 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
- opc, "\t$dst, $src, ror $rot",
- [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack]> {
+ def r_rot : T2TwoReg<(outs rGPR:$dst), (ins rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$dst, $Rm, ror $rot",
+ [(set rGPR:$dst, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
let Inst{19-16} = 0b1111; // Rn
let Inst{15-12} = 0b1111;
let Inst{7} = 1;
- let Inst{5-4} = {?,?}; // rotate
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
}
}
// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
// supported yet.
-multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
- def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
- opc, "\t$dst, $src", []> {
+multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
+ def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+ opc, "\t$Rd, $Rm", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -718,25 +1059,27 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
- opc, "\t$dst, $src, ror $rot", []> {
+ def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr,
+ opc, "\t$Rd, $Rm, ror $rot", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
let Inst{19-16} = 0b1111; // Rn
let Inst{15-12} = 0b1111;
let Inst{7} = 1;
- let Inst{5-4} = {?,?}; // rotate
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
}
}
-/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
+/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
- opc, "\t$dst, $LHS, $RHS",
- [(set rGPR:$dst, (opnode rGPR:$LHS, rGPR:$RHS))]>,
- Requires<[HasT2ExtractPack]> {
+multiclass T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+ def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
+ opc, "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -744,25 +1087,28 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
- IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot",
- [(set rGPR:$dst, (opnode rGPR:$LHS,
- (rotr rGPR:$RHS, rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack]> {
+ def rr_rot : T2ThreeReg<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+ [(set rGPR:$Rd, (opnode rGPR:$Rn,
+ (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack, IsThumb2]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
let Inst{15-12} = 0b1111;
let Inst{7} = 1;
- let Inst{5-4} = {?,?}; // rotate
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
}
}
// DO variant - disassembly only, no pattern
-multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
- def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
- opc, "\t$dst, $LHS, $RHS", []> {
+multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> {
+ def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
+ opc, "\t$Rd, $Rn, $Rm", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -770,14 +1116,16 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
- IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> {
+ def rr_rot : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot),
+ IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
let Inst{15-12} = 0b1111;
let Inst{7} = 1;
- let Inst{5-4} = {?,?}; // rotate
+
+ bits<2> rot;
+ let Inst{5-4} = rot{1-0}; // rotate
}
}
@@ -789,24 +1137,23 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
// Miscellaneous Instructions.
//
+class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : T2XI<oops, iops, itin, asm, pattern> {
+ bits<4> Rd;
+ bits<12> label;
+
+ let Inst{11-8} = Rd;
+ let Inst{26} = label{11};
+ let Inst{14-12} = label{10-8};
+ let Inst{7-0} = label{7-0};
+}
+
// LEApcrel - Load a pc-relative address into a register without offending the
// assembler.
-let neverHasSideEffects = 1 in {
-let isReMaterializable = 1 in
-def t2LEApcrel : T2XI<(outs rGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
- "adr${p}.w\t$dst, #$label", []> {
- let Inst{31-27} = 0b11110;
- let Inst{25-24} = 0b10;
- // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
- let Inst{22} = 0;
- let Inst{20} = 0;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15} = 0;
-}
-} // neverHasSideEffects
-def t2LEApcrelJT : T2XI<(outs rGPR:$dst),
- (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi,
- "adr${p}.w\t$dst, #${label}_${id}", []> {
+def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
+ (ins t2adrlabel:$addr, pred:$p),
+ IIC_iALUi, "adr{$p}.w\t$Rd, #$addr", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -814,76 +1161,88 @@ def t2LEApcrelJT : T2XI<(outs rGPR:$dst),
let Inst{20} = 0;
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
-}
+ bits<4> Rd;
+ bits<13> addr;
+ let Inst{11-8} = Rd;
+ let Inst{23} = addr{12};
+ let Inst{21} = addr{12};
+ let Inst{26} = addr{11};
+ let Inst{14-12} = addr{10-8};
+ let Inst{7-0} = addr{7-0};
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
+ Size4Bytes, IIC_iALUi, []>;
+def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
+ (ins i32imm:$label, nohash_imm:$id, pred:$p),
+ Size4Bytes, IIC_iALUi,
+ []>;
+
+
+// FIXME: None of these add/sub SP special instructions should be necessary
+// at all for thumb2 since they use the same encodings as the generic
+// add/sub instructions. In thumb1 we need them since they have dedicated
+// encodings. At the least, they should be pseudo instructions.
// ADD r, sp, {so_imm|i12}
-def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
- IIC_iALUi, "add", ".w\t$dst, $sp, $imm", []> {
+let isCodeGenOnly = 1 in {
+def t2ADDrSPi : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm),
+ IIC_iALUi, "add", ".w\t$Rd, $Rn, $imm", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b1000;
- let Inst{20} = ?; // The S bit.
- let Inst{19-16} = 0b1101; // Rn = sp
let Inst{15} = 0;
}
-def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
- IIC_iALUi, "addw", "\t$dst, $sp, $imm", []> {
+def t2ADDrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm),
+ IIC_iALUi, "addw", "\t$Rd, $Rn, $imm", []> {
let Inst{31-27} = 0b11110;
- let Inst{25} = 1;
- let Inst{24-21} = 0b0000;
- let Inst{20} = 0; // The S bit.
- let Inst{19-16} = 0b1101; // Rn = sp
+ let Inst{25-20} = 0b100000;
let Inst{15} = 0;
}
// ADD r, sp, so_reg
-def t2ADDrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
- IIC_iALUsi, "add", ".w\t$dst, $sp, $rhs", []> {
+def t2ADDrSPs : T2sTwoRegShiftedReg<
+ (outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
+ IIC_iALUsi, "add", ".w\t$Rd, $Rn, $ShiftedRm", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b1000;
- let Inst{20} = ?; // The S bit.
- let Inst{19-16} = 0b1101; // Rn = sp
let Inst{15} = 0;
}
// SUB r, sp, {so_imm|i12}
-def t2SUBrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
- IIC_iALUi, "sub", ".w\t$dst, $sp, $imm", []> {
+def t2SUBrSPi : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm),
+ IIC_iALUi, "sub", ".w\t$Rd, $Rn, $imm", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b1101;
- let Inst{20} = ?; // The S bit.
- let Inst{19-16} = 0b1101; // Rn = sp
let Inst{15} = 0;
}
-def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
- IIC_iALUi, "subw", "\t$dst, $sp, $imm", []> {
+def t2SUBrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm),
+ IIC_iALUi, "subw", "\t$Rd, $Rn, $imm", []> {
let Inst{31-27} = 0b11110;
- let Inst{25} = 1;
- let Inst{24-21} = 0b0101;
- let Inst{20} = 0; // The S bit.
- let Inst{19-16} = 0b1101; // Rn = sp
+ let Inst{25-20} = 0b101010;
let Inst{15} = 0;
}
// SUB r, sp, so_reg
-def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
+def t2SUBrSPs : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$imm),
IIC_iALUsi,
- "sub", "\t$dst, $sp, $rhs", []> {
+ "sub", "\t$Rd, $Rn, $imm", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b1101;
- let Inst{20} = ?; // The S bit.
let Inst{19-16} = 0b1101; // Rn = sp
let Inst{15} = 0;
}
+} // end isCodeGenOnly = 1
// Signed and unsigned division on v7-M
-def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
- "sdiv", "\t$dst, $a, $b",
- [(set rGPR:$dst, (sdiv rGPR:$a, rGPR:$b))]>,
- Requires<[HasDivide]> {
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+ "sdiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
let Inst{20} = 0b1;
@@ -891,10 +1250,10 @@ def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
let Inst{7-4} = 0b1111;
}
-def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
- "udiv", "\t$dst, $a, $b",
- [(set rGPR:$dst, (udiv rGPR:$a, rGPR:$b))]>,
- Requires<[HasDivide]> {
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+ "udiv", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[HasDivide, IsThumb2]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
let Inst{20} = 0b1;
@@ -908,26 +1267,26 @@ def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
// Load
let canFoldAsLoad = 1, isReMaterializable = 1 in
-defm t2LDR : T2I_ld<0, 0b10, "ldr", UnOpFrag<(load node:$Src)>>;
+defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si,
+ UnOpFrag<(load node:$Src)>>;
// Loads with zero extension
-defm t2LDRH : T2I_ld<0, 0b01, "ldrh", UnOpFrag<(zextloadi16 node:$Src)>>;
-defm t2LDRB : T2I_ld<0, 0b00, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>;
+defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ UnOpFrag<(zextloadi16 node:$Src)>>;
+defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ UnOpFrag<(zextloadi8 node:$Src)>>;
// Loads with sign extension
-defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>;
-defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>;
+defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ UnOpFrag<(sextloadi16 node:$Src)>>;
+defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+ UnOpFrag<(sextloadi8 node:$Src)>>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
-def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
+def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr),
- IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>;
-def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
- (ins i32imm:$addr), IIC_iLoadi,
- "ldrd", "\t$dst1, $addr", []> {
- let Inst{19-16} = 0b1111; // Rn
-}
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", []>;
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// zextload i1 -> zextload i8
@@ -976,70 +1335,71 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
// not via pattern.
// Indexed loads
+
let mayLoad = 1, neverHasSideEffects = 1 in {
-def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb),
+def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn),
(ins t2addrmode_imm8:$addr),
- AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
- "ldr", "\t$dst, $addr!", "$addr.base = $base_wb",
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
+ "ldr", "\t$Rt, $addr!", "$addr.base = $Rn",
[]>;
-def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
- "ldr", "\t$dst, [$base], $offset", "$base = $base_wb",
+def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
+ "ldr", "\t$Rt, [$Rn], $addr", "$base = $Rn",
[]>;
-def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$dst, GPR:$base_wb),
+def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
(ins t2addrmode_imm8:$addr),
- AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
- "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb",
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn",
[]>;
-def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
- "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb",
+def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
[]>;
-def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$dst, GPR:$base_wb),
+def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
(ins t2addrmode_imm8:$addr),
- AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
- "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb",
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn",
[]>;
-def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
- "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb",
+def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrh", "\t$Rt, [$Rn], $addr", "$base = $Rn",
[]>;
-def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$dst, GPR:$base_wb),
+def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
(ins t2addrmode_imm8:$addr),
- AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
- "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb",
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn",
[]>;
-def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
- "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb",
+def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
[]>;
-def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$dst, GPR:$base_wb),
+def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
(ins t2addrmode_imm8:$addr),
- AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
- "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb",
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn",
[]>;
-def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
- "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb",
+def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn),
+ (ins GPR:$base, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+ "ldrsh", "\t$dst, [$Rn], $addr", "$base = $Rn",
[]>;
-} // mayLoad = 1, neverHasSideEffects = 1
+} // mayLoad = 1, neverHasSideEffects = 1
// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
// for disassembly only.
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
-class T2IldT<bit signed, bits<2> type, string opc>
- : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc,
- "\t$dst, $addr", []> {
+class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ "\t$Rt, $addr", []> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
@@ -1048,74 +1408,83 @@ class T2IldT<bit signed, bits<2> type, string opc>
let Inst{20} = 1; // load
let Inst{11} = 1;
let Inst{10-8} = 0b110; // PUW.
+
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = addr{12-9};
+ let Inst{7-0} = addr{7-0};
}
-def t2LDRT : T2IldT<0, 0b10, "ldrt">;
-def t2LDRBT : T2IldT<0, 0b00, "ldrbt">;
-def t2LDRHT : T2IldT<0, 0b01, "ldrht">;
-def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt">;
-def t2LDRSHT : T2IldT<1, 0b01, "ldrsht">;
+def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
+def t2LDRBT : T2IldT<0, 0b00, "ldrbt", IIC_iLoad_bh_i>;
+def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
// Store
-defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>;
-defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
-defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
+ BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword
let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
- (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr),
- IIC_iStorer, "strd", "\t$src1, $addr", []>;
+ (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>;
// Indexed stores
def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
- "str", "\t$src, [$base, $offset]!", "$base = $base_wb",
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+ "str", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
[(set GPR:$base_wb,
- (pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+ (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
- "str", "\t$src, [$base], $offset", "$base = $base_wb",
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
+ "str", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
[(set GPR:$base_wb,
- (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+ (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
- "strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+ "strh", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
[(set GPR:$base_wb,
- (pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+ (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
- "strh", "\t$src, [$base], $offset", "$base = $base_wb",
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strh", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
[(set GPR:$base_wb,
- (post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+ (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
- "strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
[(set GPR:$base_wb,
- (pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+ (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
- "strb", "\t$src, [$base], $offset", "$base = $base_wb",
+ (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+ "strb", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
[(set GPR:$base_wb,
- (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+ (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
// only.
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
-class T2IstT<bits<2> type, string opc>
- : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), IIC_iStorei, opc,
- "\t$src, $addr", []> {
+class T2IstT<bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ "\t$Rt, $addr", []> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = 0; // not signed
@@ -1124,51 +1493,62 @@ class T2IstT<bits<2> type, string opc>
let Inst{20} = 0; // store
let Inst{11} = 1;
let Inst{10-8} = 0b110; // PUW
+
+ bits<4> Rt;
+ bits<13> addr;
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = addr{12-9};
+ let Inst{7-0} = addr{7-0};
}
-def t2STRT : T2IstT<0b10, "strt">;
-def t2STRBT : T2IstT<0b00, "strbt">;
-def t2STRHT : T2IstT<0b01, "strht">;
+def t2STRT : T2IstT<0b10, "strt", IIC_iStore_i>;
+def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
+def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
// ldrd / strd pre / post variants
// For disassembly only.
-def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$base, t2am_imm8s4_offset:$imm), NoItinerary,
- "ldrd", "\t$dst1, $dst2, [$base, $imm]!", []>;
+def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs GPR:$Rt, GPR:$Rt2),
+ (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
-def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$base, t2am_imm8s4_offset:$imm), NoItinerary,
- "ldrd", "\t$dst1, $dst2, [$base], $imm", []>;
+def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$Rt, GPR:$Rt2),
+ (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>;
def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs),
- (ins GPR:$src1, GPR:$src2, GPR:$base, t2am_imm8s4_offset:$imm),
- NoItinerary, "strd", "\t$src1, $src2, [$base, $imm]!", []>;
+ (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
- (ins GPR:$src1, GPR:$src2, GPR:$base, t2am_imm8s4_offset:$imm),
- NoItinerary, "strd", "\t$src1, $src2, [$base], $imm", []>;
+ (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+ IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>;
// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
// data/instruction access. These are for disassembly only.
-//
-// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0.
-// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
-multiclass T2Ipl<bit instr, bit write, string opc> {
+// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
+// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1).
+multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
- def i12 : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc,
- "\t[$base, $imm]", []> {
+ def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
- let Inst{23} = 1; // U = 1
let Inst{22} = 0;
let Inst{21} = write;
let Inst{20} = 1;
let Inst{15-12} = 0b1111;
+
+ bits<17> addr;
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{23} = addr{12}; // U
+ let Inst{11-0} = addr{11-0}; // imm12
}
- def i8 : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoadi, opc,
- "\t[$base, $imm]", []> {
+ def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_imm8:$addr, (i32 write), (i32 instr))]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{23} = 0; // U = 0
@@ -1177,22 +1557,15 @@ multiclass T2Ipl<bit instr, bit write, string opc> {
let Inst{20} = 1;
let Inst{15-12} = 0b1111;
let Inst{11-8} = 0b1100;
- }
- def pci : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoadi, opc,
- "\t[pc, $imm]", []> {
- let Inst{31-25} = 0b1111100;
- let Inst{24} = instr;
- let Inst{23} = ?; // add = (U == 1)
- let Inst{22} = 0;
- let Inst{21} = write;
- let Inst{20} = 1;
- let Inst{19-16} = 0b1111; // Rn = 0b1111
- let Inst{15-12} = 0b1111;
+ bits<13> addr;
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{7-0} = addr{7-0}; // imm8
}
- def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoadi, opc,
- "\t[$base, $a]", []> {
+ def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
+ "\t$addr",
+ [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{23} = 0; // add = TRUE for T1
@@ -1201,133 +1574,174 @@ multiclass T2Ipl<bit instr, bit write, string opc> {
let Inst{20} = 1;
let Inst{15-12} = 0b1111;
let Inst{11-6} = 0000000;
- let Inst{5-4} = 0b00; // no shift is applied
- }
- def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoadi, opc,
- "\t[$base, $a, lsl $shamt]", []> {
- let Inst{31-25} = 0b1111100;
- let Inst{24} = instr;
- let Inst{23} = 0; // add = TRUE for T1
- let Inst{22} = 0;
- let Inst{21} = write;
- let Inst{20} = 1;
- let Inst{15-12} = 0b1111;
- let Inst{11-6} = 0000000;
+ bits<10> addr;
+ let Inst{19-16} = addr{9-6}; // Rn
+ let Inst{3-0} = addr{5-2}; // Rm
+ let Inst{5-4} = addr{1-0}; // imm2
}
}
-defm t2PLD : T2Ipl<0, 0, "pld">;
-defm t2PLDW : T2Ipl<0, 1, "pldw">;
-defm t2PLI : T2Ipl<1, 0, "pli">;
+defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>;
+defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
+defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops), IIC_iLoadm,
- "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b00;
- let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
- let Inst{22} = 0;
- let Inst{21} = 0; // The W bit.
- let Inst{20} = 1; // Load
-}
+multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
+ InstrItinClass itin_upd, bit L_bit> {
+ def IA :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "ia${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
-def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops), IIC_iLoadm,
- "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
- "$addr.addr = $wb", []> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b00;
- let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
- let Inst{22} = 0;
- let Inst{21} = 1; // The W bit.
- let Inst{20} = 1; // Load
-}
-} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def IA_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "ia${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
- reglist:$srcs, variable_ops), IIC_iStorem,
- "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b00;
- let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
- let Inst{22} = 0;
- let Inst{21} = 0; // The W bit.
- let Inst{20} = 0; // Store
-}
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def DB :
+ T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin, !strconcat(asm, "db${p}.w\t$Rn, $regs"), []> {
+ bits<4> Rn;
+ bits<16> regs;
-def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$srcs, variable_ops),
- IIC_iStorem,
- "stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs",
- "$addr.addr = $wb", []> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b00;
- let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
- let Inst{22} = 0;
- let Inst{21} = 1; // The W bit.
- let Inst{20} = 0; // Store
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
+ def DB_UPD :
+ T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+ itin_upd, !strconcat(asm, "db${p}.w\t$Rn, $regs"), "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b00;
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
+ }
}
-} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm t2LDM : thumb2_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
+
+} // neverHasSideEffects
+
//===----------------------------------------------------------------------===//
// Move Instructions.
//
let neverHasSideEffects = 1 in
-def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
- "mov", ".w\t$dst, $src", []> {
+def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+ "mov", ".w\t$Rd, $Rm", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
- let Inst{20} = ?; // The S bit.
let Inst{19-16} = 0b1111; // Rn
let Inst{14-12} = 0b000;
let Inst{7-4} = 0b0000;
}
// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in
-def t2MOVi : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
- "mov", ".w\t$dst, $src",
- [(set rGPR:$dst, t2_so_imm:$src)]> {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+ AddedComplexity = 1 in
+def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
+ "mov", ".w\t$Rd, $imm",
+ [(set rGPR:$Rd, t2_so_imm:$imm)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b0010;
- let Inst{20} = ?; // The S bit.
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
}
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def t2MOVi16 : T2I<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
- "movw", "\t$dst, $src",
- [(set rGPR:$dst, imm0_65535:$src)]> {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi,
+ "movw", "\t$Rd, $imm",
+ [(set rGPR:$Rd, imm0_65535:$imm)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0010;
let Inst{20} = 0; // The S bit.
let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
}
-let Constraints = "$src = $dst" in
-def t2MOVTi16 : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$imm), IIC_iMOVi,
- "movt", "\t$dst, $imm",
- [(set rGPR:$dst,
+def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def t2MOVTi16 : T2I<(outs rGPR:$Rd),
+ (ins rGPR:$src, i32imm_hilo16:$imm), IIC_iMOVi,
+ "movt", "\t$Rd, $imm",
+ [(set rGPR:$Rd,
(or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0110;
let Inst{20} = 0; // The S bit.
let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
}
+def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+} // Constraints
+
def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
//===----------------------------------------------------------------------===//
@@ -1336,28 +1750,28 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
// Sign extenders
-defm t2SXTB : T2I_unary_rrot<0b100, "sxtb",
+defm t2SXTB : T2I_ext_rrot<0b100, "sxtb",
UnOpFrag<(sext_inreg node:$Src, i8)>>;
-defm t2SXTH : T2I_unary_rrot<0b000, "sxth",
+defm t2SXTH : T2I_ext_rrot<0b000, "sxth",
UnOpFrag<(sext_inreg node:$Src, i16)>>;
-defm t2SXTB16 : T2I_unary_rrot_sxtb16<0b010, "sxtb16">;
+defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
-defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab",
+defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-defm t2SXTAH : T2I_bin_rrot<0b000, "sxtah",
+defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
-defm t2SXTAB16 : T2I_bin_rrot_DO<0b010, "sxtab16">;
+defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">;
// TODO: SXT(A){B|H}16 - done for disassembly only
// Zero extenders
let AddedComplexity = 16 in {
-defm t2UXTB : T2I_unary_rrot<0b101, "uxtb",
+defm t2UXTB : T2I_ext_rrot<0b101, "uxtb",
UnOpFrag<(and node:$Src, 0x000000FF)>>;
-defm t2UXTH : T2I_unary_rrot<0b001, "uxth",
+defm t2UXTH : T2I_ext_rrot<0b001, "uxth",
UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16",
+defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
@@ -1365,15 +1779,17 @@ defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16",
// instead so we can include a check for masking back in the upper
// eight bits of the source into the lower eight bits of the result.
//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
-// (t2UXTB16r_rot rGPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
+// (t2UXTB16r_rot rGPR:$Src, 24)>,
+// Requires<[HasT2ExtractPack, IsThumb2]>;
def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot rGPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
+ (t2UXTB16r_rot rGPR:$Src, 8)>,
+ Requires<[HasT2ExtractPack, IsThumb2]>;
-defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
+defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-defm t2UXTAH : T2I_bin_rrot<0b001, "uxtah",
+defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
-defm t2UXTAB16 : T2I_bin_rrot_DO<0b011, "uxtab16">;
+defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">;
}
//===----------------------------------------------------------------------===//
@@ -1387,8 +1803,10 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub",
// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsi,
BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsi,
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
@@ -1436,8 +1854,8 @@ def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm),
// Select Bytes -- for disassembly only
-def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel",
- "\t$dst, $a, $b", []> {
+def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []> {
let Inst{31-27} = 0b11111;
let Inst{26-24} = 0b010;
let Inst{23} = 0b1;
@@ -1450,28 +1868,41 @@ def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel",
// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
// And Miscellaneous operations -- for disassembly only
class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
- list<dag> pat = [/* For disassembly only; pattern left blank */]>
- : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), NoItinerary, opc,
- "\t$dst, $a, $b", pat> {
+ list<dag> pat = [/* For disassembly only; pattern left blank */],
+ dag iops = (ins rGPR:$Rn, rGPR:$Rm),
+ string asm = "\t$Rd, $Rn, $Rm">
+ : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0101;
let Inst{22-20} = op22_20;
let Inst{15-12} = 0b1111;
let Inst{7-4} = op7_4;
+
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
}
// Saturating add/subtract -- for disassembly only
def t2QADD : T2I_pam<0b000, 0b1000, "qadd",
- [(set rGPR:$dst, (int_arm_qadd rGPR:$a, rGPR:$b))]>;
+ [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
-def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">;
-def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">;
+def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
def t2QSUB : T2I_pam<0b000, 0b1010, "qsub",
- [(set rGPR:$dst, (int_arm_qsub rGPR:$a, rGPR:$b))]>;
+ [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
@@ -1511,21 +1942,61 @@ def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">;
def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
+// Helper class for disassembly only
+// A6.3.16 & A6.3.17
+// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
+class T2ThreeReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
+class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2FourReg<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
-def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst),
- (ins rGPR:$a, rGPR:$b),
- NoItinerary, "usad8", "\t$dst, $a, $b", []> {
+def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm),
+ NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []> {
let Inst{15-12} = 0b1111;
}
-def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst),
- (ins rGPR:$a, rGPR:$b, rGPR:$acc), NoItinerary, "usada8",
- "\t$dst, $a, $b, $acc", []>;
+def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
+ "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>;
// Signed/Unsigned saturate -- for disassembly only
-def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
- NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+class T2SatI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> sat_imm;
+ bits<7> sh;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{4-0} = sat_imm{4-0};
+ let Inst{21} = sh{6};
+ let Inst{14-12} = sh{4-2};
+ let Inst{7-6} = sh{1-0};
+}
+
+def t2SSAT: T2SatI<
+ (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
+ NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
@@ -1533,8 +2004,9 @@ def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
let Inst{15} = 0;
}
-def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
- "ssat16", "\t$dst, $bit_pos, $a",
+def t2SSAT16: T2SatI<
+ (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary,
+ "ssat16", "\t$Rd, $sat_imm, $Rn",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
@@ -1545,8 +2017,9 @@ def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
let Inst{7-6} = 0b00; // imm2 = '00'
}
-def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
- NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+def t2USAT: T2SatI<
+ (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
+ NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1110;
@@ -1554,8 +2027,9 @@ def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
let Inst{15} = 0;
}
-def t2USAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
- "usat16", "\t$dst, $bit_pos, $a",
+def t2USAT16: T2SatI<
+ (outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary,
+ "usat16", "\t$dst, $sat_imm, $Rn",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1110;
@@ -1579,23 +2053,23 @@ defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>;
defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
let Uses = [CPSR] in {
-def t2MOVrx : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
- "rrx", "\t$dst, $src",
- [(set rGPR:$dst, (ARMrrx rGPR:$src))]> {
+def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "rrx", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
- let Inst{20} = ?; // The S bit.
let Inst{19-16} = 0b1111; // Rn
let Inst{14-12} = 0b000;
let Inst{7-4} = 0b0011;
}
}
-let Defs = [CPSR] in {
-def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
- "lsrs", ".w\t$dst, $src, #1",
- [(set rGPR:$dst, (ARMsrl_flag rGPR:$src))]> {
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def t2MOVsrl_flag : T2TwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "lsrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1606,9 +2080,10 @@ def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
-def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
- "asrs", ".w\t$dst, $src, #1",
- [(set rGPR:$dst, (ARMsra_flag rGPR:$src))]> {
+def t2MOVsra_flag : T2TwoRegShiftImm<
+ (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+ "asrs", ".w\t$Rd, $Rm, #1",
+ [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1626,39 +2101,67 @@ def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
//
defm t2AND : T2I_bin_w_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
defm t2ORR : T2I_bin_w_irs<0b0010, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
-defm t2ANDS : T2I_bin_s_irs<0b0000, "and",
- BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>;
+class T2BitFI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<5> msb;
+ bits<5> lsb;
+
+ let Inst{11-8} = Rd;
+ let Inst{4-0} = msb{4-0};
+ let Inst{14-12} = lsb{4-2};
+ let Inst{7-6} = lsb{1-0};
+}
+
+class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2BitFI<oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rn;
-let Constraints = "$src = $dst" in
-def t2BFC : T2I<(outs rGPR:$dst), (ins rGPR:$src, bf_inv_mask_imm:$imm),
- IIC_iUNAsi, "bfc", "\t$dst, $imm",
- [(set rGPR:$dst, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
+ let Inst{19-16} = Rn;
+}
+
+let Constraints = "$src = $Rd" in
+def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
+ IIC_iUNAsi, "bfc", "\t$Rd, $imm",
+ [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
+
+ bits<10> imm;
+ let msb{4-0} = imm{9-5};
+ let lsb{4-0} = imm{4-0};
}
-def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
- IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []> {
+def t2SBFX: T2TwoRegBitFI<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+ IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10100;
let Inst{15} = 0;
}
-def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
- IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []> {
+def t2UBFX: T2TwoRegBitFI<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+ IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b11100;
@@ -1666,24 +2169,50 @@ def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
}
// A8.6.18 BFI - Bitfield insert (Encoding T1)
-let Constraints = "$src = $dst" in
-def t2BFI : T2I<(outs rGPR:$dst),
- (ins rGPR:$src, rGPR:$val, bf_inv_mask_imm:$imm),
- IIC_iALUi, "bfi", "\t$dst, $val, $imm",
- [(set rGPR:$dst, (ARMbfi rGPR:$src, rGPR:$val,
- bf_inv_mask_imm:$imm))]> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 1;
- let Inst{24-20} = 0b10110;
- let Inst{15} = 0;
+let Constraints = "$src = $Rd" in {
+ def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd),
+ (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm),
+ IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm",
+ [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
+ bf_inv_mask_imm:$imm))]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{15} = 0;
+
+ bits<10> imm;
+ let msb{4-0} = imm{9-5};
+ let lsb{4-0} = imm{4-0};
+ }
+
+ // GNU as only supports this form of bfi (w/ 4 arguments)
+ let isAsmParserOnly = 1 in
+ def t2BFI4p : T2TwoRegBitFI<(outs rGPR:$Rd),
+ (ins rGPR:$src, rGPR:$Rn, lsb_pos_imm:$lsbit,
+ width_imm:$width),
+ IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width",
+ []> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-20} = 0b10110;
+ let Inst{15} = 0;
+
+ bits<5> lsbit;
+ bits<5> width;
+ let msb{4-0} = width; // Custom encoder => lsb+width-1
+ let lsb{4-0} = lsbit;
+ }
}
-defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS,
- (not node:$RHS))>, 0, "">;
+defm t2ORN : T2I_bin_irs<0b0011, "orn",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">;
// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
let AddedComplexity = 1 in
-defm t2MVN : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>;
+defm t2MVN : T2I_un_irs <0b0011, "mvn",
+ IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
+ UnOpFrag<(not node:$Src)>, 1, 1>;
let AddedComplexity = 1 in
@@ -1702,9 +2231,9 @@ def : T2Pat<(t2_so_imm_not:$src),
// Multiply Instructions.
//
let isCommutable = 1 in
-def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
- "mul", "\t$dst, $a, $b",
- [(set rGPR:$dst, (mul rGPR:$a, rGPR:$b))]> {
+def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "mul", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1712,83 +2241,63 @@ def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0000; // Multiply
}
-def t2MLA: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
- "mla", "\t$dst, $a, $b, $c",
- [(set rGPR:$dst, (add (mul rGPR:$a, rGPR:$b), rGPR:$c))]> {
+def t2MLA: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
- let Inst{15-12} = {?, ?, ?, ?}; // Ra
let Inst{7-4} = 0b0000; // Multiply
}
-def t2MLS: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
- "mls", "\t$dst, $a, $b, $c",
- [(set rGPR:$dst, (sub rGPR:$c, (mul rGPR:$a, rGPR:$b)))]> {
+def t2MLS: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "mls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
- let Inst{15-12} = {?, ?, ?, ?}; // Ra
let Inst{7-4} = 0b0001; // Multiply and Subtract
}
// Extra precision multiplies with low / high results
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
-def t2SMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
- (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
- "smull", "\t$ldst, $hdst, $a, $b", []> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0111;
- let Inst{22-20} = 0b000;
- let Inst{7-4} = 0b0000;
-}
-
-def t2UMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
- (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
- "umull", "\t$ldst, $hdst, $a, $b", []> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0111;
- let Inst{22-20} = 0b010;
- let Inst{7-4} = 0b0000;
-}
+def t2SMULL : T2MulLong<0b000, 0b0000,
+ (outs rGPR:$Rd, rGPR:$Ra),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ "smull", "\t$Rd, $Ra, $Rn, $Rm", []>;
+
+def t2UMULL : T2MulLong<0b010, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
} // isCommutable
// Multiply + accumulate
-def t2SMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
- (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
- "smlal", "\t$ldst, $hdst, $a, $b", []>{
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0111;
- let Inst{22-20} = 0b100;
- let Inst{7-4} = 0b0000;
-}
-
-def t2UMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
- (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
- "umlal", "\t$ldst, $hdst, $a, $b", []>{
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0111;
- let Inst{22-20} = 0b110;
- let Inst{7-4} = 0b0000;
-}
-
-def t2UMAAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
- (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
- "umaal", "\t$ldst, $hdst, $a, $b", []>{
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0111;
- let Inst{22-20} = 0b110;
- let Inst{7-4} = 0b0110;
-}
+def t2SMLAL : T2MulLong<0b100, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMLAL : T2MulLong<0b110, 0b0000,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMAAL : T2MulLong<0b110, 0b0110,
+ (outs rGPR:$RdLo, rGPR:$RdHi),
+ (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
} // neverHasSideEffects
// Rounding variants of the below included for disassembly only
// Most significant word multiply
-def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
- "smmul", "\t$dst, $a, $b",
- [(set rGPR:$dst, (mulhs rGPR:$a, rGPR:$b))]> {
+def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "smmul", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -1796,8 +2305,8 @@ def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
- "smmulr", "\t$dst, $a, $b", []> {
+def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+ "smmulr", "\t$Rd, $Rn, $Rm", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -1805,49 +2314,49 @@ def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
-def t2SMMLA : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
- "smmla", "\t$dst, $a, $b, $c",
- [(set rGPR:$dst, (add (mulhs rGPR:$a, rGPR:$b), rGPR:$c))]> {
+def t2SMMLA : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
- let Inst{15-12} = {?, ?, ?, ?}; // Ra
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMLAR: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
- "smmlar", "\t$dst, $a, $b, $c", []> {
+def t2SMMLAR: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
- let Inst{15-12} = {?, ?, ?, ?}; // Ra
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
-def t2SMMLS: T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
- "smmls", "\t$dst, $a, $b, $c",
- [(set rGPR:$dst, (sub rGPR:$c, (mulhs rGPR:$a, rGPR:$b)))]> {
+def t2SMMLS: T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
- let Inst{15-12} = {?, ?, ?, ?}; // Ra
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMLSR:T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
- "smmlsr", "\t$dst, $a, $b, $c", []> {
+def t2SMMLSR:T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+ "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
- let Inst{15-12} = {?, ?, ?, ?}; // Ra
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
multiclass T2I_smul<string opc, PatFrag opnode> {
- def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
- !strconcat(opc, "bb"), "\t$dst, $a, $b",
- [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
- (sext_inreg rGPR:$b, i16)))]> {
+ def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1856,10 +2365,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
- !strconcat(opc, "bt"), "\t$dst, $a, $b",
- [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
- (sra rGPR:$b, (i32 16))))]> {
+ def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1868,10 +2377,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b01;
}
- def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
- !strconcat(opc, "tb"), "\t$dst, $a, $b",
- [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
- (sext_inreg rGPR:$b, i16)))]> {
+ def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1880,10 +2389,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b10;
}
- def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
- !strconcat(opc, "tt"), "\t$dst, $a, $b",
- [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
- (sra rGPR:$b, (i32 16))))]> {
+ def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1892,10 +2401,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b11;
}
- def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
- !strconcat(opc, "wb"), "\t$dst, $a, $b",
- [(set rGPR:$dst, (sra (opnode rGPR:$a,
- (sext_inreg rGPR:$b, i16)), (i32 16)))]> {
+ def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+ (sext_inreg rGPR:$Rm, i16)), (i32 16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1904,10 +2413,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
- !strconcat(opc, "wt"), "\t$dst, $a, $b",
- [(set rGPR:$dst, (sra (opnode rGPR:$a,
- (sra rGPR:$b, (i32 16))), (i32 16)))]> {
+ def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+ !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+ (sra rGPR:$Rm, (i32 16))), (i32 16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1919,75 +2428,75 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
multiclass T2I_smla<string opc, PatFrag opnode> {
- def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
- !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc",
- [(set rGPR:$dst, (add rGPR:$acc,
- (opnode (sext_inreg rGPR:$a, i16),
- (sext_inreg rGPR:$b, i16))))]> {
+ def BB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra,
+ (opnode (sext_inreg rGPR:$Rn, i16),
+ (sext_inreg rGPR:$Rm, i16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
- let Inst{15-12} = {?, ?, ?, ?}; // Ra
let Inst{7-6} = 0b00;
let Inst{5-4} = 0b00;
}
- def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
- !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
- [(set rGPR:$dst, (add rGPR:$acc, (opnode (sext_inreg rGPR:$a, i16),
- (sra rGPR:$b, (i32 16)))))]> {
+ def BT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
+ (sra rGPR:$Rm, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
- let Inst{15-12} = {?, ?, ?, ?}; // Ra
let Inst{7-6} = 0b00;
let Inst{5-4} = 0b01;
}
- def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
- !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc",
- [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
- (sext_inreg rGPR:$b, i16))))]> {
+ def TB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sext_inreg rGPR:$Rm, i16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
- let Inst{15-12} = {?, ?, ?, ?}; // Ra
let Inst{7-6} = 0b00;
let Inst{5-4} = 0b10;
}
- def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
- !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
- [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
- (sra rGPR:$b, (i32 16)))))]> {
+ def TT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+ (sra rGPR:$Rm, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
- let Inst{15-12} = {?, ?, ?, ?}; // Ra
let Inst{7-6} = 0b00;
let Inst{5-4} = 0b11;
}
- def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
- !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
- [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
- (sext_inreg rGPR:$b, i16)), (i32 16))))]> {
+ def WB : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+ (sext_inreg rGPR:$Rm, i16)), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
- let Inst{15-12} = {?, ?, ?, ?}; // Ra
let Inst{7-6} = 0b00;
let Inst{5-4} = 0b00;
}
- def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
- !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
- [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
- (sra rGPR:$b, (i32 16))), (i32 16))))]> {
+ def WT : T2FourReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+ !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+ (sra rGPR:$Rm, (i32 16))), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
- let Inst{15-12} = {?, ?, ?, ?}; // Ra
let Inst{7-6} = 0b00;
let Inst{5-4} = 0b01;
}
@@ -1997,62 +2506,68 @@ defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
-def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs rGPR:$ldst,rGPR:$hdst),
- (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs rGPR:$ldst,rGPR:$hdst),
- (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs rGPR:$ldst,rGPR:$hdst),
- (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs rGPR:$ldst,rGPR:$hdst),
- (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>;
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
// These are for disassembly only.
-def t2SMUAD: T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
- IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
+def t2SMUAD: T2ThreeReg_mac<
+ 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUADX:T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
- IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
+def t2SMUADX:T2ThreeReg_mac<
+ 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSD: T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
- IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
+def t2SMUSD: T2ThreeReg_mac<
+ 0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSDX:T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
- IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
+def t2SMUSDX:T2ThreeReg_mac<
+ 0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst),
- (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlad",
- "\t$dst, $a, $b, $acc", []>;
-def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst),
- (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smladx",
- "\t$dst, $a, $b, $acc", []>;
-def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst),
- (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsd",
- "\t$dst, $a, $b, $acc", []>;
-def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst),
- (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsdx",
- "\t$dst, $a, $b, $acc", []>;
-def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs rGPR:$ldst,rGPR:$hdst),
- (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlald",
- "\t$ldst, $hdst, $a, $b", []>;
-def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
- (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaldx",
- "\t$ldst, $hdst, $a, $b", []>;
-def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs rGPR:$ldst,rGPR:$hdst),
- (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsld",
- "\t$ldst, $hdst, $a, $b", []>;
-def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
- (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsldx",
- "\t$ldst, $hdst, $a, $b", []>;
+def t2SMLAD : T2ThreeReg_mac<
+ 0, 0b010, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
+ "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLADX : T2FourReg_mac<
+ 0, 0b010, 0b0001, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
+ "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
+ "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
+ "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald",
+ "\t$Ra, $Rd, $Rm, $Rn", []>;
+def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx",
+ "\t$Ra, $Rd, $Rm, $Rn", []>;
+def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld",
+ "\t$Ra, $Rd, $Rm, $Rn", []>;
+def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+ (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
+ "\t$Ra, $Rd, $Rm, $Rn", []>;
//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
@@ -2060,99 +2575,117 @@ def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
+ : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
let Inst{31-27} = 0b11111;
let Inst{26-22} = 0b01010;
let Inst{21-20} = op1;
let Inst{15-12} = 0b1111;
let Inst{7-6} = 0b10;
let Inst{5-4} = op2;
+ let Rn{3-0} = Rm;
}
-def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
- "clz", "\t$dst, $src", [(set rGPR:$dst, (ctlz rGPR:$src))]>;
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>;
-def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
- "rbit", "\t$dst, $src",
- [(set rGPR:$dst, (ARMrbit rGPR:$src))]>;
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rbit", "\t$Rd, $Rm",
+ [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>;
-def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
- "rev", ".w\t$dst, $src", [(set rGPR:$dst, (bswap rGPR:$src))]>;
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>;
-def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
- "rev16", ".w\t$dst, $src",
- [(set rGPR:$dst,
- (or (and (srl rGPR:$src, (i32 8)), 0xFF),
- (or (and (shl rGPR:$src, (i32 8)), 0xFF00),
- (or (and (srl rGPR:$src, (i32 8)), 0xFF0000),
- (and (shl rGPR:$src, (i32 8)), 0xFF000000)))))]>;
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "rev16", ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd,
+ (or (and (srl rGPR:$Rm, (i32 8)), 0xFF),
+ (or (and (shl rGPR:$Rm, (i32 8)), 0xFF00),
+ (or (and (srl rGPR:$Rm, (i32 8)), 0xFF0000),
+ (and (shl rGPR:$Rm, (i32 8)), 0xFF000000)))))]>;
-def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
- "revsh", ".w\t$dst, $src",
- [(set rGPR:$dst,
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+ "revsh", ".w\t$Rd, $Rm",
+ [(set rGPR:$Rd,
(sext_inreg
- (or (srl (and rGPR:$src, 0xFF00), (i32 8)),
- (shl rGPR:$src, (i32 8))), i16))]>;
-
-def t2PKHBT : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh",
- [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF),
- (and (shl rGPR:$src2, lsl_amt:$sh),
+ (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
+ (shl rGPR:$Rm, (i32 8))), i16))]>;
+
+def t2PKHBT : T2ThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+ IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
+ (and (shl rGPR:$Rm, lsl_amt:$sh),
0xFFFF0000)))]>,
- Requires<[HasT2ExtractPack]> {
+ Requires<[HasT2ExtractPack, IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-20} = 0b01100;
let Inst{5} = 0; // BT form
let Inst{4} = 0;
+
+ bits<8> sh;
+ let Inst{14-12} = sh{7-5};
+ let Inst{7-6} = sh{4-3};
}
// Alternate cases for PKHBT where identities eliminate some nodes.
def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
(t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
- Requires<[HasT2ExtractPack]>;
+ Requires<[HasT2ExtractPack, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
(t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
- Requires<[HasT2ExtractPack]>;
+ Requires<[HasT2ExtractPack, IsThumb2]>;
// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
// will match the pattern below.
-def t2PKHTB : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh",
- [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF0000),
- (and (sra rGPR:$src2, asr_amt:$sh),
+def t2PKHTB : T2ThreeReg<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
+ (and (sra rGPR:$Rm, asr_amt:$sh),
0xFFFF)))]>,
- Requires<[HasT2ExtractPack]> {
+ Requires<[HasT2ExtractPack, IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-20} = 0b01100;
let Inst{5} = 1; // TB form
let Inst{4} = 0;
+
+ bits<8> sh;
+ let Inst{14-12} = sh{7-5};
+ let Inst{7-6} = sh{4-3};
}
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
(t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
- Requires<[HasT2ExtractPack]>;
+ Requires<[HasT2ExtractPack, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
(and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
(t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
- Requires<[HasT2ExtractPack]>;
+ Requires<[HasT2ExtractPack, IsThumb2]>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
-defm t2CMPz : T2I_cmp_irs<0b1101, "cmp",
- BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>;
+
+def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_imm:$imm),
+ (t2CMPri GPR:$lhs, t2_so_imm:$imm)>;
+def : T2Pat<(ARMcmpZ GPR:$lhs, rGPR:$rhs),
+ (t2CMPrr GPR:$lhs, rGPR:$rhs)>;
+def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_reg:$rhs),
+ (t2CMPrs GPR:$lhs, t2_so_reg:$rhs)>;
//FIXME: Disable CMN, as CCodes are backwards from compare expectations
// Compare-to-zero still works out, just not the relationals
//defm t2CMN : T2I_cmp_irs<0b1000, "cmn",
// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
defm t2CMNz : T2I_cmp_irs<0b1000, "cmn",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
//def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
@@ -2162,18 +2695,21 @@ def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm),
(t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>;
defm t2TST : T2I_cmp_irs<0b0000, "tst",
- BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>>;
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
- BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>;
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+ BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
-def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr,
- "mov", ".w\t$dst, $true",
- [/*(set rGPR:$dst, (ARMcmov rGPR:$false, rGPR:$true, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $dst"> {
+def t2MOVCCr : T2TwoReg<
+ (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm), IIC_iCMOVr,
+ "mov", ".w\t$Rd, $Rm",
+ [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd"> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2183,10 +2719,11 @@ def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr,
let Inst{7-4} = 0b0000;
}
-def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true),
- IIC_iCMOVi, "mov", ".w\t$dst, $true",
-[/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $dst"> {
+let isMoveImm = 1 in
+def t2MOVCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
+ IIC_iCMOVi, "mov", ".w\t$Rd, $imm",
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd"> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b0010;
@@ -2195,9 +2732,49 @@ def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true),
let Inst{15} = 0;
}
+let isMoveImm = 1 in
+def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm),
+ IIC_iCMOVi,
+ "movw", "\t$Rd, $imm", []>,
+ RegConstraint<"$false = $Rd"> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+
+ bits<4> Rd;
+ bits<16> imm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = imm{15-12};
+ let Inst{26} = imm{11};
+ let Inst{14-12} = imm{10-8};
+ let Inst{7-0} = imm{7-0};
+}
+
+let isMoveImm = 1 in
+def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
+ (ins rGPR:$false, i32imm:$src, pred:$p),
+ IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">;
+
+let isMoveImm = 1 in
+def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
+ IIC_iCMOVi, "mvn", ".w\t$Rd, $imm",
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
+ imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd"> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 0;
+ let Inst{24-21} = 0b0011;
+ let Inst{20} = 0; // The S bit.
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0;
+}
+
class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
+ : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2205,22 +2782,22 @@ class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{19-16} = 0b1111; // Rn
let Inst{5-4} = opcod; // Shift type.
}
-def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$dst),
- (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
- IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>,
- RegConstraint<"$false = $dst">;
-def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$dst),
- (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
- IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>,
- RegConstraint<"$false = $dst">;
-def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$dst),
- (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
- IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>,
- RegConstraint<"$false = $dst">;
-def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst),
- (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
- IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>,
- RegConstraint<"$false = $dst">;
+def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
+def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+ IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
+ RegConstraint<"$false = $Rd">;
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
@@ -2229,78 +2806,29 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "",
- [(ARMMemBarrier)]>, Requires<[IsThumb, HasDB]> {
- let Inst{31-4} = 0xF3BF8F5;
- // FIXME: add support for options other than a full system DMB
- let Inst{3-0} = 0b1111;
-}
-
-def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "",
- [(ARMSyncBarrier)]>, Requires<[IsThumb, HasDB]> {
- let Inst{31-4} = 0xF3BF8F4;
- // FIXME: add support for options other than a full system DSB
- let Inst{3-0} = 0b1111;
-}
+def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f5;
+ let Inst{3-0} = opt;
}
-
-// Helper class for multiclass T2MemB -- for disassembly only
-class T2I_memb<string opc, string asm>
- : T2I<(outs), (ins), NoItinerary, opc, asm,
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasV7]> {
- let Inst{31-20} = 0xf3b;
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
}
-multiclass T2MemB<bits<4> op7_4, string opc> {
-
- def st : T2I_memb<opc, "\tst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1110;
- }
-
- def ish : T2I_memb<opc, "\tish"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1011;
- }
-
- def ishst : T2I_memb<opc, "\tishst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1010;
- }
-
- def nsh : T2I_memb<opc, "\tnsh"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0111;
- }
-
- def nshst : T2I_memb<opc, "\tnshst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0110;
- }
-
- def osh : T2I_memb<opc, "\tosh"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0011;
- }
-
- def oshst : T2I_memb<opc, "\toshst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0010;
- }
+def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+ "dsb", "\t$opt",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb, HasDB]> {
+ bits<4> opt;
+ let Inst{31-4} = 0xf3bf8f4;
+ let Inst{3-0} = opt;
}
-// These DMB variants are for disassembly only.
-defm t2DMB : T2MemB<0b0101, "dmb">;
-
-// These DSB variants are for disassembly only.
-defm t2DSB : T2MemB<0b0100, "dsb">;
-
// ISB has only full system option -- for disassembly only
-def t2ISBsy : T2I_memb<"isb", ""> {
- let Inst{7-4} = 0b0110;
+def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasV7]> {
+ let Inst{31-4} = 0xf3bf8f6;
let Inst{3-0} = 0b1111;
}
@@ -2314,6 +2842,11 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
let Inst{7-6} = 0b01;
let Inst{5-4} = opcod;
let Inst{3-0} = 0b1111;
+
+ bits<4> Rn;
+ bits<4> Rt;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
}
class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
InstrItinClass itin, string opc, string asm, string cstr,
@@ -2324,60 +2857,88 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
let Inst{11-8} = rt2;
let Inst{7-6} = 0b01;
let Inst{5-4} = opcod;
+
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rt;
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
}
let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
- Size4Bytes, NoItinerary, "ldrexb", "\t$dest, [$ptr]",
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
+ Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, [$Rn]",
"", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
- Size4Bytes, NoItinerary, "ldrexh", "\t$dest, [$ptr]",
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
+ Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, [$Rn]",
"", []>;
-def t2LDREX : Thumb2I<(outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
+def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
Size4Bytes, NoItinerary,
- "ldrex", "\t$dest, [$ptr]", "",
+ "ldrex", "\t$Rt, [$Rn]", "",
[]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000101;
let Inst{11-8} = 0b1111;
let Inst{7-0} = 0b00000000; // imm8 = 0
+
+ bits<4> Rn;
+ bits<4> Rt;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
}
-def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$dest, rGPR:$dest2), (ins rGPR:$ptr),
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn),
AddrModeNone, Size4Bytes, NoItinerary,
- "ldrexd", "\t$dest, $dest2, [$ptr]", "",
- [], {?, ?, ?, ?}>;
+ "ldrexd", "\t$Rt, $Rt2, [$Rn]", "",
+ [], {?, ?, ?, ?}> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
}
-let mayStore = 1, Constraints = "@earlyclobber $success" in {
-def t2STREXB : T2I_strex<0b00, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
AddrModeNone, Size4Bytes, NoItinerary,
- "strexb", "\t$success, $src, [$ptr]", "", []>;
-def t2STREXH : T2I_strex<0b01, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
+ "strexb", "\t$Rd, $Rt, [$Rn]", "", []>;
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
AddrModeNone, Size4Bytes, NoItinerary,
- "strexh", "\t$success, $src, [$ptr]", "", []>;
-def t2STREX : Thumb2I<(outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
+ "strexh", "\t$Rd, $Rt, [$Rn]", "", []>;
+def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
AddrModeNone, Size4Bytes, NoItinerary,
- "strex", "\t$success, $src, [$ptr]", "",
+ "strex", "\t$Rd, $Rt, [$Rn]", "",
[]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000100;
let Inst{7-0} = 0b00000000; // imm8 = 0
+
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rt;
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rt;
}
-def t2STREXD : T2I_strex<0b11, (outs rGPR:$success),
- (ins rGPR:$src, rGPR:$src2, rGPR:$ptr),
+def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, rGPR:$Rt2, rGPR:$Rn),
AddrModeNone, Size4Bytes, NoItinerary,
- "strexd", "\t$success, $src, $src2, [$ptr]", "", [],
- {?, ?, ?, ?}>;
+ "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", "", [],
+ {?, ?, ?, ?}> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
}
// Clear-Exclusive is for disassembly only.
-def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV7]> {
- let Inst{31-20} = 0xf3b;
+def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasV7]> {
+ let Inst{31-16} = 0xf3bf;
let Inst{15-14} = 0b10;
+ let Inst{13} = 0;
let Inst{12} = 0;
+ let Inst{11-8} = 0b1111;
let Inst{7-4} = 0b0010;
+ let Inst{3-0} = 0b1111;
}
//===----------------------------------------------------------------------===//
@@ -2386,7 +2947,7 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "",
// __aeabi_read_tp preserves the registers r1-r3.
let isCall = 1,
- Defs = [R0, R12, LR, CPSR] in {
+ Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
def t2TPsoft : T2XI<(outs), (ins), IIC_Br,
"bl\t__aeabi_read_tp",
[(set R0, ARMthread_pointer)]> {
@@ -2413,32 +2974,18 @@ let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0,
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
- D31 ], hasSideEffects = 1, isBarrier = 1 in {
+ D31 ], hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
- AddrModeNone, SizeSpecial, NoItinerary,
- "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
- "adds\t$val, #7\n\t"
- "str\t$val, [$src, #4]\n\t"
- "movs\tr0, #0\n\t"
- "b\t1f\n\t"
- "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
- "1:", "",
+ AddrModeNone, SizeSpecial, NoItinerary, "", "",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, HasVFP2]>;
}
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
- hasSideEffects = 1, isBarrier = 1 in {
+ hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
- AddrModeNone, SizeSpecial, NoItinerary,
- "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
- "adds\t$val, #7\n\t"
- "str\t$val, [$src, #4]\n\t"
- "movs\tr0, #0\n\t"
- "b\t1f\n\t"
- "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
- "1:", "",
+ AddrModeNone, SizeSpecial, NoItinerary, "", "",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, NoVFP]>;
}
@@ -2453,82 +3000,77 @@ let Defs =
// operand list.
// FIXME: Should pc be an implicit operand like PICADD, etc?
let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
- hasExtraDefRegAllocReq = 1 in
- def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops), IIC_Br,
- "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
- "$addr.addr = $wb", []> {
+ hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def t2LDMIA_RET: T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ IIC_iLoad_mBr,
+ "ldmia${p}.w\t$Rn!, $regs",
+ "$Rn = $wb", []> {
+ bits<4> Rn;
+ bits<16> regs;
+
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
- let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
- let Inst{22} = 0;
- let Inst{21} = 1; // The W bit.
- let Inst{20} = 1; // Load
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = 0;
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = 1;
+ let Inst{19-16} = Rn;
+ let Inst{15-0} = regs;
}
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
let isPredicable = 1 in
-def t2B : T2XI<(outs), (ins brtarget:$target), IIC_Br,
+def t2B : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br,
"b.w\t$target",
[(br bb:$target)]> {
let Inst{31-27} = 0b11110;
let Inst{15-14} = 0b10;
let Inst{12} = 1;
+
+ bits<20> target;
+ let Inst{26} = target{19};
+ let Inst{11} = target{18};
+ let Inst{13} = target{17};
+ let Inst{21-16} = target{16-11};
+ let Inst{10-0} = target{10-0};
}
let isNotDuplicable = 1, isIndirectBranch = 1 in {
-def t2BR_JT :
- T2JTI<(outs),
- (ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target$jt",
- [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0100100;
- let Inst{19-16} = 0b1111;
- let Inst{14-12} = 0b000;
- let Inst{11-8} = 0b1111; // Rd = pc
- let Inst{7-4} = 0b0000;
-}
+def t2BR_JT : t2PseudoInst<(outs),
+ (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br,
+ [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
// FIXME: Add a non-pc based case that can be predicated.
-def t2TBB :
- T2JTI<(outs),
- (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "tbb\t$index$jt", []> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0001101;
- let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction)
- let Inst{15-8} = 0b11110000;
- let Inst{7-4} = 0b0000; // B form
-}
-
-def t2TBH :
- T2JTI<(outs),
- (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "tbh\t$index$jt", []> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0001101;
- let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction)
- let Inst{15-8} = 0b11110000;
- let Inst{7-4} = 0b0001; // H form
-}
-
-// Generic versions of the above two instructions, for disassembly only
-
-def t2TBBgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br,
- "tbb", "\t[$a, $b]", []>{
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0001101;
- let Inst{15-8} = 0b11110000;
- let Inst{7-4} = 0b0000; // B form
-}
-
-def t2TBHgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br,
- "tbh", "\t[$a, $b, lsl #1]", []> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0001101;
- let Inst{15-8} = 0b11110000;
- let Inst{7-4} = 0b0001; // H form
+def t2TBB_JT : t2PseudoInst<(outs),
+ (ins GPR:$index, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br, []>;
+
+def t2TBH_JT : t2PseudoInst<(outs),
+ (ins GPR:$index, i32imm:$jt, i32imm:$id),
+ SizeSpecial, IIC_Br, []>;
+
+def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
+ "tbb", "\t[$Rn, $Rm]", []> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111010001101;
+ let Inst{19-16} = Rn;
+ let Inst{15-5} = 0b11110000000;
+ let Inst{4} = 0; // B form
+ let Inst{3-0} = Rm;
+}
+
+def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
+ "tbh", "\t[$Rn, $Rm, lsl #1]", []> {
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111010001101;
+ let Inst{19-16} = Rn;
+ let Inst{15-5} = 0b11110000000;
+ let Inst{4} = 1; // H form
+ let Inst{3-0} = Rm;
}
} // isNotDuplicable, isIndirectBranch
@@ -2543,6 +3085,16 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
let Inst{31-27} = 0b11110;
let Inst{15-14} = 0b10;
let Inst{12} = 0;
+
+ bits<4> p;
+ let Inst{25-22} = p;
+
+ bits<21> target;
+ let Inst{26} = target{20};
+ let Inst{11} = target{19};
+ let Inst{13} = target{18};
+ let Inst{21-16} = target{17-12};
+ let Inst{10-0} = target{11-1};
}
@@ -2554,6 +3106,11 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
// 16-bit instruction.
let Inst{31-16} = 0x0000;
let Inst{15-8} = 0b10111111;
+
+ bits<4> cc;
+ bits<4> mask;
+ let Inst{7-4} = cc;
+ let Inst{3-0} = mask;
}
// Branch and Exchange Jazelle -- for disassembly only
@@ -2565,22 +3122,44 @@ def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func",
let Inst{25-20} = 0b111100;
let Inst{15-14} = 0b10;
let Inst{12} = 0;
+
+ bits<4> func;
+ let Inst{19-16} = func;
}
-// Change Processor State is a system instruction -- for disassembly only.
-// The singleton $opt operand contains the following information:
-// opt{4-0} = mode from Inst{4-0}
-// opt{5} = changemode from Inst{17}
-// opt{8-6} = AIF from Inst{8-6}
-// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
-def t2CPS : T2XI<(outs),(ins cps_opt:$opt), NoItinerary, "cps$opt",
- [/* For disassembly only; pattern left blank */]> {
+// Change Processor State is a system instruction -- for disassembly and
+// parsing only.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
+ !strconcat("cps", asm_op),
+ [/* For disassembly only; pattern left blank */]> {
+ bits<2> imod;
+ bits<3> iflags;
+ bits<5> mode;
+ bit M;
+
let Inst{31-27} = 0b11110;
- let Inst{26} = 0;
+ let Inst{26} = 0;
let Inst{25-20} = 0b111010;
+ let Inst{19-16} = 0b1111;
let Inst{15-14} = 0b10;
- let Inst{12} = 0;
-}
+ let Inst{12} = 0;
+ let Inst{10-9} = imod;
+ let Inst{8} = M;
+ let Inst{7-5} = iflags;
+ let Inst{4-0} = mode;
+}
+
+let M = 1 in
+ def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+ "$imod.w\t$iflags, $mode">;
+let mode = 0, M = 0 in
+ def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
+ "$imod.w\t$iflags">;
+let imod = 0, iflags = 0, M = 1 in
+ def t2CPS1p : t2CPS<(ins i32imm:$mode), "\t$mode">;
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
@@ -2589,6 +3168,7 @@ class T2I_hint<bits<8> op7_0, string opc, string asm>
: T2I<(outs), (ins), NoItinerary, opc, asm,
[/* For disassembly only; pattern left blank */]> {
let Inst{31-20} = 0xf3a;
+ let Inst{19-16} = 0b1111;
let Inst{15-14} = 0b10;
let Inst{12} = 0;
let Inst{10-8} = 0b000;
@@ -2608,6 +3188,9 @@ def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt",
let Inst{12} = 0;
let Inst{10-8} = 0b000;
let Inst{7-4} = 0b1111;
+
+ bits<4> opt;
+ let Inst{3-0} = opt;
}
// Secure Monitor Call is a system instruction -- for disassembly only
@@ -2617,83 +3200,86 @@ def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
let Inst{31-27} = 0b11110;
let Inst{26-20} = 0b1111111;
let Inst{15-12} = 0b1000;
-}
-// Store Return State is a system instruction -- for disassembly only
-def t2SRSDBW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0000010; // W = 1
+ bits<4> opt;
+ let Inst{19-16} = opt;
}
-def t2SRSDB : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0000000; // W = 0
-}
+class T2SRS<bits<12> op31_20,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-20} = op31_20{11-0};
-def t2SRSIAW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0011010; // W = 1
+ bits<5> mode;
+ let Inst{4-0} = mode{4-0};
}
-def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0011000; // W = 0
-}
+// Store Return State is a system instruction -- for disassembly only
+def t2SRSDBW : T2SRS<0b111010000010,
+ (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SRSDB : T2SRS<0b111010000000,
+ (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SRSIAW : T2SRS<0b111010011010,
+ (outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SRSIA : T2SRS<0b111010011000,
+ (outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]>;
// Return From Exception is a system instruction -- for disassembly only
-def t2RFEDBW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfedb", "\t$base!",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0000011; // W = 1
-}
-def t2RFEDB : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeab", "\t$base",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0000001; // W = 0
-}
+class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-20} = op31_20{11-0};
-def t2RFEIAW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base!",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0011011; // W = 1
+ bits<4> Rn;
+ let Inst{19-16} = Rn;
}
-def t2RFEIA : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-20} = 0b0011001; // W = 0
-}
+def t2RFEDBW : T2RFE<0b111010000011,
+ (outs), (ins rGPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEDB : T2RFE<0b111010000001,
+ (outs), (ins rGPR:$Rn), NoItinerary, "rfeab", "\t$Rn",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEIAW : T2RFE<0b111010011011,
+ (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
+ [/* For disassembly only; pattern left blank */]>;
+def t2RFEIA : T2RFE<0b111010011001,
+ (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
+ [/* For disassembly only; pattern left blank */]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
-// Two piece so_imms.
-def : T2Pat<(or rGPR:$LHS, t2_so_imm2part:$RHS),
- (t2ORRri (t2ORRri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
- (t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(xor rGPR:$LHS, t2_so_imm2part:$RHS),
- (t2EORri (t2EORri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
- (t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add rGPR:$LHS, t2_so_imm2part:$RHS),
- (t2ADDri (t2ADDri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
- (t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add rGPR:$LHS, t2_so_neg_imm2part:$RHS),
- (t2SUBri (t2SUBri rGPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
- (t2_so_neg_imm2part_2 imm:$RHS))>;
-
// 32-bit immediate using movw + movt.
-// This is a single pseudo instruction to make it re-materializable. Remove
-// when we can do generalized remat.
-let isReMaterializable = 1 in
-def t2MOVi32imm : T2Ix2<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
- "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
- [(set rGPR:$dst, (i32 imm:$src))]>;
+// This is a single pseudo instruction to make it re-materializable.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set rGPR:$dst, (i32 imm:$src))]>,
+ Requires<[IsThumb, HasV6T2]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if pic).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsThumb2, UseMovt]>;
+
+def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2,
+ [(set rGPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+ Requires<[IsThumb2, UseMovt]>;
+}
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
@@ -2709,10 +3295,9 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// be expanded into two instructions late to allow if-conversion and
// scheduling.
let canFoldAsLoad = 1, isReMaterializable = 1 in
-def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary,
- "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
- [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ IIC_iLoadiALU,
+ [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;
@@ -2720,48 +3305,128 @@ def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
// Move between special register and ARM core register -- for disassembly only
//
-// Rd = Instr{11-8}
-def t2MRS : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{26} = 0;
- let Inst{25-21} = 0b11111;
- let Inst{20} = 0; // The R bit.
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
+class T2SpecialReg<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-20} = op31_20{11-0};
+ let Inst{15-14} = op15_14{1-0};
+ let Inst{12} = op12{0};
}
-// Rd = Instr{11-8}
-def t2MRSsys : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{26} = 0;
- let Inst{25-21} = 0b11111;
- let Inst{20} = 1; // The R bit.
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
+class T2MRS<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : T2SpecialReg<op31_20, op15_14, op12, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = 0b1111;
}
-// Rn = Inst{19-16}
-def t2MSR : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr",
- "\tcpsr$mask, $src",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{26} = 0;
- let Inst{25-21} = 0b11100;
- let Inst{20} = 0; // The R bit.
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
+def t2MRS : T2MRS<0b111100111110, 0b10, 0,
+ (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
+ [/* For disassembly only; pattern left blank */]>;
+def t2MRSsys : T2MRS<0b111100111111, 0b10, 0,
+ (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
+ [/* For disassembly only; pattern left blank */]>;
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */,
+ 0 /* op12 */, (outs), (ins msr_mask:$mask, rGPR:$Rn),
+ NoItinerary, "msr", "\t$mask, $Rn",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<5> mask;
+ bits<4> Rn;
+ let Inst{19-16} = Rn;
+ let Inst{20} = mask{4}; // R Bit
+ let Inst{13} = 0b0;
+ let Inst{11-8} = mask{3-0};
}
-// Rn = Inst{19-16}
-def t2MSRsys : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr",
- "\tspsr$mask, $src",
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
+
+class t2MovRCopro<string opc, bit direction>
+ : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-24} = 0b1110;
+ let Inst{20} = direction;
+ let Inst{4} = 1;
+
+ bits<4> Rt;
+ bits<4> cop;
+ bits<3> opc1;
+ bits<3> opc2;
+ bits<4> CRm;
+ bits<4> CRn;
+
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = cop;
+ let Inst{23-21} = opc1;
+ let Inst{7-5} = opc2;
+ let Inst{3-0} = CRm;
+ let Inst{19-16} = CRn;
+}
+
+def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */>;
+def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */>;
+
+class t2MovRRCopro<string opc, bit direction>
+ : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"),
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-24} = 0b1100;
+ let Inst{23-21} = 0b010;
+ let Inst{20} = direction;
+
+ bits<4> Rt;
+ bits<4> Rt2;
+ bits<4> cop;
+ bits<4> opc1;
+ bits<4> CRm;
+
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+ let Inst{11-8} = cop;
+ let Inst{7-4} = opc1;
+ let Inst{3-0} = CRm;
+}
+
+def t2MCRR2 : t2MovRRCopro<"mcrr2",
+ 0 /* from ARM core register to coprocessor */>;
+def t2MRRC2 : t2MovRRCopro<"mrrc2",
+ 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions. For disassembly only.
+//
+
+def t2CDP2 : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+ c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+ "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{26} = 0;
- let Inst{25-21} = 0b11100;
- let Inst{20} = 1; // The R bit.
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
+ let Inst{27-24} = 0b1110;
+
+ bits<4> opc1;
+ bits<4> CRn;
+ bits<4> CRd;
+ bits<4> cop;
+ bits<3> opc2;
+ bits<4> CRm;
+
+ let Inst{3-0} = CRm;
+ let Inst{4} = 0;
+ let Inst{7-5} = opc2;
+ let Inst{11-8} = cop;
+ let Inst{15-12} = CRd;
+ let Inst{19-16} = CRn;
+ let Inst{23-20} = opc1;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index c29e096..920c5c9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -1,4 +1,4 @@
-//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===//
+//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,30 +11,26 @@
//
//===----------------------------------------------------------------------===//
-def SDT_FTOI :
-SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
-def SDT_ITOF :
-SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
-def SDT_CMPFP0 :
-SDTypeProfile<0, 1, [SDTCisFP<0>]>;
-def SDT_VMOVDRR :
-SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
- SDTCisSameAs<1, 2>]>;
-
-def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
-def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
-def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
-def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
-def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
-def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
-def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
-def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
+def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
+def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
+def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
+def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+
//===----------------------------------------------------------------------===//
// Operand Definitions.
//
-
def vfp_f32imm : Operand<f32>,
PatLeaf<(f32 fpimm), [{
return ARM::getVFPf32Imm(N->getValueAPF()) != -1;
@@ -55,86 +51,136 @@ def vfp_f64imm : Operand<f64>,
//
let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
- IIC_fpLoad64, "vldr", ".64\t$dst, $addr",
- [(set DPR:$dst, (f64 (load addrmode5:$addr)))]>;
-def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
- IIC_fpLoad32, "vldr", ".32\t$dst, $addr",
- [(set SPR:$dst, (load addrmode5:$addr))]>;
-} // canFoldAsLoad
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
+ IIC_fpLoad64, "vldr", ".64\t$Dd, $addr",
+ [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
-def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
- IIC_fpStore64, "vstr", ".64\t$src, $addr",
- [(store (f64 DPR:$src), addrmode5:$addr)]>;
+def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
+ IIC_fpLoad32, "vldr", ".32\t$Sd, $addr",
+ [(set SPR:$Sd, (load addrmode5:$addr))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
-def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
- IIC_fpStore32, "vstr", ".32\t$src, $addr",
- [(store SPR:$src, addrmode5:$addr)]>;
+} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
-//===----------------------------------------------------------------------===//
-// Load / store multiple Instructions.
-//
+def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
+ IIC_fpStore64, "vstr", ".64\t$Dd, $addr",
+ [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def VLDMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
- variable_ops), IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
- let Inst{20} = 1;
+def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
+ IIC_fpStore32, "vstr", ".32\t$Sd, $addr",
+ [(store SPR:$Sd, addrmode5:$addr)]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
}
-def VLDMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
- variable_ops), IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
- let Inst{20} = 1;
-}
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
-def VLDMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops),
- IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t$addr!, $dsts",
- "$addr.addr = $wb", []> {
- let Inst{20} = 1;
+multiclass vfp_ldst_mult<string asm, bit L_bit,
+ InstrItinClass itin, InstrItinClass itin_upd> {
+ // Double Precision
+ def DIA :
+ AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DIA_UPD :
+ AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+ def DDB :
+ AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+ }
+ def DDB_UPD :
+ AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+ }
+
+ // Single Precision
+ def SIA :
+ AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SIA_UPD :
+ AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b01; // Increment After
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SDB :
+ AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+ IndexModeNone, itin,
+ !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 0; // No writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
+ def SDB_UPD :
+ AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+ IndexModeUpd, itin_upd,
+ !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{21} = 1; // Writeback
+ let Inst{20} = L_bit;
+
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines.
+ let D = VFPNeonDomain;
+ }
}
-def VLDMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops),
- IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t$addr!, $dsts",
- "$addr.addr = $wb", []> {
- let Inst{20} = 1;
-}
-} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
+let neverHasSideEffects = 1 in {
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def VSTMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
- variable_ops), IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
- let Inst{20} = 0;
-}
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
-def VSTMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
- variable_ops), IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
- let Inst{20} = 0;
-}
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>;
-def VSTMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$srcs, variable_ops),
- IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t$addr!, $srcs",
- "$addr.addr = $wb", []> {
- let Inst{20} = 0;
-}
+} // neverHasSideEffects
-def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$srcs, variable_ops),
- IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t$addr!, $srcs",
- "$addr.addr = $wb", []> {
- let Inst{20} = 0;
-}
-} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
+def : MnemonicAlias<"vldm", "vldmia">;
+def : MnemonicAlias<"vstm", "vstmia">;
// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
@@ -142,56 +188,71 @@ def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
// FP Binary Operations.
//
-def VADDD : ADbI<0b11100, 0b11, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpALU64, "vadd", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fadd DPR:$a, (f64 DPR:$b)))]>;
-
-def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpALU32, "vadd", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
-
-// These are encoded as unary instructions.
-let Defs = [FPSCR] in {
-def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$a, DPR:$b),
- IIC_fpCMP64, "vcmpe", ".f64\t$a, $b",
- [(arm_cmpfp DPR:$a, (f64 DPR:$b))]>;
-
-def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$a, DPR:$b),
- IIC_fpCMP64, "vcmp", ".f64\t$a, $b",
- [/* For disassembly only; pattern left blank */]>;
-
-def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$a, SPR:$b),
- IIC_fpCMP32, "vcmpe", ".f32\t$a, $b",
- [(arm_cmpfp SPR:$a, SPR:$b)]>;
-
-def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$a, SPR:$b),
- IIC_fpCMP32, "vcmp", ".f32\t$a, $b",
- [/* For disassembly only; pattern left blank */]>;
+def VADDD : ADbI<0b11100, 0b11, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VADDS : ASbIn<0b11100, 0b11, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
}
-def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpDIV64, "vdiv", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fdiv DPR:$a, (f64 DPR:$b)))]>;
-
-def VDIVS : ASbI<0b11101, 0b00, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpDIV32, "vdiv", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
-
-def VMULD : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpMUL64, "vmul", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fmul DPR:$a, (f64 DPR:$b)))]>;
-
-def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpMUL32, "vmul", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
+def VSUBD : ADbI<0b11100, 0b11, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
-def VNMULD : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpMUL64, "vnmul", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fneg (fmul DPR:$a, (f64 DPR:$b))))]>;
+def VDIVD : ADbI<0b11101, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VDIVS : ASbI<0b11101, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+
+def VMULD : ADbI<0b11100, 0b10, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VMULS : ASbIn<0b11100, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
-def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpMUL32, "vnmul", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]>;
+def VNMULD : ADbI<0b11100, 0b10, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+
+def VNMULS : ASbI<0b11100, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
// Match reassociated forms only if not sign dependent rounding.
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
@@ -199,53 +260,128 @@ def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
def : Pat<(fmul (fneg SPR:$a), SPR:$b),
(VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+// These are encoded as unary instructions.
+let Defs = [FPSCR] in {
+def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+
+def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
-def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpALU64, "vsub", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fsub DPR:$a, (f64 DPR:$b)))]>;
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
+ [/* For disassembly only; pattern left blank */]>;
-def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpALU32, "vsub", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]>;
+def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+} // Defs = [FPSCR]
//===----------------------------------------------------------------------===//
// FP Unary Operations.
//
-def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpUNA64, "vabs", ".f64\t$dst, $a",
- [(set DPR:$dst, (fabs (f64 DPR:$a)))]>;
-
-def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,(outs SPR:$dst), (ins SPR:$a),
- IIC_fpUNA32, "vabs", ".f32\t$dst, $a",
- [(set SPR:$dst, (fabs SPR:$a))]>;
+def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
+
+def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fabs SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
let Defs = [FPSCR] in {
-def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$a),
- IIC_fpCMP64, "vcmpe", ".f64\t$a, #0",
- [(arm_cmpfp0 (f64 DPR:$a))]>;
+def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
+ [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
-def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$a),
- IIC_fpCMP64, "vcmp", ".f64\t$a, #0",
- [/* For disassembly only; pattern left blank */]>;
+def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
+ [(arm_cmpfp0 SPR:$Sd)]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
-def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$a),
- IIC_fpCMP32, "vcmpe", ".f32\t$a, #0",
- [(arm_cmpfp0 SPR:$a)]>;
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
-def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$a),
- IIC_fpCMP32, "vcmp", ".f32\t$a, #0",
- [/* For disassembly only; pattern left blank */]>;
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
}
-def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$dst), (ins SPR:$a),
- IIC_fpCVTDS, "vcvt", ".f64.f32\t$dst, $a",
- [(set DPR:$dst, (fextend SPR:$a))]>;
+def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+} // Defs = [FPSCR]
+
+def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
+ [(set DPR:$Dd, (fextend SPR:$Sm))]> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
// Special case encoding: bits 11-8 is 0b1011.
-def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
- IIC_fpCVTSD, "vcvt", ".f32.f64\t$dst, $a",
- [(set SPR:$dst, (fround DPR:$a))]> {
+def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
+ IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (fround DPR:$Dm))]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
let Inst{27-23} = 0b11101;
let Inst{21-16} = 0b110111;
let Inst{11-8} = 0b1011;
@@ -255,6 +391,7 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
// Between half-precision and single-precision. For disassembly only.
+// FIXME: Verify encoding after integrated assembler is working.
def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
@@ -277,47 +414,94 @@ def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
-let neverHasSideEffects = 1 in {
-def VMOVD: ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpUNA64, "vmov", ".f64\t$dst, $a", []>;
-
-def VMOVS: ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
- IIC_fpUNA32, "vmov", ".f32\t$dst, $a", []>;
-} // neverHasSideEffects
+def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
+
+def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fneg SPR:$Sm))]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
-def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpUNA64, "vneg", ".f64\t$dst, $a",
- [(set DPR:$dst, (fneg (f64 DPR:$a)))]>;
+def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
-def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,(outs SPR:$dst), (ins SPR:$a),
- IIC_fpUNA32, "vneg", ".f32\t$dst, $a",
- [(set SPR:$dst, (fneg SPR:$a))]>;
+def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
-def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpSQRT64, "vsqrt", ".f64\t$dst, $a",
- [(set DPR:$dst, (fsqrt (f64 DPR:$a)))]>;
+let neverHasSideEffects = 1 in {
+def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
-def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
- IIC_fpSQRT32, "vsqrt", ".f32\t$dst, $a",
- [(set SPR:$dst, (fsqrt SPR:$a))]>;
+def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// FP <-> GPR Copies. Int <-> FP Conversions.
//
-def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
- IIC_fpMOVSI, "vmov", "\t$dst, $src",
- [(set GPR:$dst, (bitconvert SPR:$src))]>;
+def VMOVRS : AVConv2I<0b11100001, 0b1010,
+ (outs GPR:$Rt), (ins SPR:$Sn),
+ IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
+ [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+ // Instruction operands.
+ bits<4> Rt;
+ bits<5> Sn;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+}
-def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
- IIC_fpMOVIS, "vmov", "\t$dst, $src",
- [(set SPR:$dst, (bitconvert GPR:$src))]>;
+def VMOVSR : AVConv4I<0b11100000, 0b1010,
+ (outs SPR:$Sn), (ins GPR:$Rt),
+ IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
+ [(set SPR:$Sn, (bitconvert GPR:$Rt))]> {
+ // Instruction operands.
+ bits<5> Sn;
+ bits<4> Rt;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+}
let neverHasSideEffects = 1 in {
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
- (outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
- IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src",
+ (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
+ IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
[/* FIXME: Can't write pattern for multiple result instr*/]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
let Inst{7-6} = 0b00;
}
@@ -333,10 +517,21 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
// FMDLR: GPR -> SPR
def VMOVDRR : AVConv5I<0b11000100, 0b1011,
- (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
- IIC_fpMOVID, "vmov", "\t$dst, $src1, $src2",
- [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> {
- let Inst{7-6} = 0b00;
+ (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
+ IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
+ [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
}
let neverHasSideEffects = 1 in
@@ -350,102 +545,183 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
// FMRDH: SPR -> GPR
// FMRDL: SPR -> GPR
// FMRRS: SPR -> GPR
-// FMRX : SPR system reg -> GPR
-
+// FMRX: SPR system reg -> GPR
// FMSRR: GPR -> SPR
+// FMXR: GPR -> VFP system reg
+
+
+// Int -> FP:
+
+class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
-// FMXR: GPR -> VFP Sstem reg
-
-
-// Int to FP:
+class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
-def VSITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011,
- (outs DPR:$dst), (ins SPR:$a),
- IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a",
- [(set DPR:$dst, (f64 (arm_sitof SPR:$a)))]> {
+def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
+ [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
let Inst{7} = 1; // s32
}
-def VSITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010,
- (outs SPR:$dst),(ins SPR:$a),
- IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a",
- [(set SPR:$dst, (arm_sitof SPR:$a))]> {
+def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd),(ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
let Inst{7} = 1; // s32
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
}
-def VUITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011,
- (outs DPR:$dst), (ins SPR:$a),
- IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a",
- [(set DPR:$dst, (f64 (arm_uitof SPR:$a)))]> {
+def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
+ [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
let Inst{7} = 0; // u32
}
-def VUITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010,
- (outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a",
- [(set SPR:$dst, (arm_uitof SPR:$a))]> {
+def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
let Inst{7} = 0; // u32
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
}
-// FP to Int:
-// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+// FP -> Int:
+
+class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
-def VTOSIZD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
- (outs SPR:$dst), (ins DPR:$a),
- IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a",
- [(set SPR:$dst, (arm_ftosi (f64 DPR:$a)))]> {
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
let Inst{7} = 1; // Z bit
}
-def VTOSIZS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
- (outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a",
- [(set SPR:$dst, (arm_ftosi SPR:$a))]> {
+def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
let Inst{7} = 1; // Z bit
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
}
-def VTOUIZD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
- (outs SPR:$dst), (ins DPR:$a),
- IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a",
- [(set SPR:$dst, (arm_ftoui (f64 DPR:$a)))]> {
+def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
let Inst{7} = 1; // Z bit
}
-def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
- (outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a",
- [(set SPR:$dst, (arm_ftoui SPR:$a))]> {
+def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
let Inst{7} = 1; // Z bit
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
}
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
-// For disassembly only.
let Uses = [FPSCR] in {
-def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
- (outs SPR:$dst), (ins DPR:$a),
- IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a",
- [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> {
+// FIXME: Verify encoding after integrated assembler is working.
+def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
let Inst{7} = 0; // Z bit
}
-def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
- (outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a",
- [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> {
+def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
let Inst{7} = 0; // Z bit
}
-def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
- (outs SPR:$dst), (ins DPR:$a),
- IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a",
- [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> {
+def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
let Inst{7} = 0; // Z bit
}
-def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
- (outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a",
- [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> {
+def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
let Inst{7} = 0; // Z bit
}
}
@@ -457,30 +733,47 @@ def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
// S32 (U=0, sx=1) -> SL
// U32 (U=1, sx=1) -> UL
-let Constraints = "$a = $dst" in {
+// FIXME: Marking these as codegen only seems wrong. They are real
+// instructions(?)
+let Constraints = "$a = $dst", isCodeGenOnly = 1 in {
// FP to Fixed-Point:
-let isCodeGenOnly = 1 in {
def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
@@ -501,30 +794,44 @@ def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]>;
-}
// Fixed-Point to FP:
-let isCodeGenOnly = 1 in {
def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
@@ -545,70 +852,120 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]>;
-}
-} // End of 'let Constraints = "$src = $dst" in'
+} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in'
//===----------------------------------------------------------------------===//
// FP FMA Operations.
//
-def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0,
- (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b),
- (f64 DPR:$dstin)))]>,
- RegConstraint<"$dstin = $dst">;
+def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
- (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
- RegConstraint<"$dstin = $dst">;
-
-def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0,
- (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b),
- (f64 DPR:$dstin)))]>,
- RegConstraint<"$dstin = $dst">;
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
-def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
- (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
- RegConstraint<"$dstin = $dst">;
-
-def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0,
- (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)),
- (f64 DPR:$dstin)))]>,
- RegConstraint<"$dstin = $dst">;
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
+
+def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
- (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
- RegConstraint<"$dstin = $dst">;
-
-def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
- (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
-def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
- (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
-
-def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0,
- (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)),
- (f64 DPR:$dstin)))]>,
- RegConstraint<"$dstin = $dst">;
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+ (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
- (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
- RegConstraint<"$dstin = $dst">;
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+ (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+ (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">,
+ Requires<[HasVFP2,UseFPVMLx]>;
+
+def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+ (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+ (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
//===----------------------------------------------------------------------===//
// FP Conditional moves.
@@ -616,92 +973,157 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
let neverHasSideEffects = 1 in {
def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
- (outs DPR:$dst), (ins DPR:$false, DPR:$true),
- IIC_fpUNA64, "vmov", ".f64\t$dst, $true",
- [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
- RegConstraint<"$false = $dst">;
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm",
+ [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+ RegConstraint<"$Dn = $Dd">;
def VMOVScc : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
- (outs SPR:$dst), (ins SPR:$false, SPR:$true),
- IIC_fpUNA32, "vmov", ".f32\t$dst, $true",
- [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
- RegConstraint<"$false = $dst">;
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm",
+ [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+ RegConstraint<"$Sn = $Sd">;
def VNEGDcc : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
- (outs DPR:$dst), (ins DPR:$false, DPR:$true),
- IIC_fpUNA64, "vneg", ".f64\t$dst, $true",
- [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
- RegConstraint<"$false = $dst">;
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+ [/*(set DPR:$Dd, (ARMcneg DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+ RegConstraint<"$Dn = $Dd">;
def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
- (outs SPR:$dst), (ins SPR:$false, SPR:$true),
- IIC_fpUNA32, "vneg", ".f32\t$dst, $true",
- [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
- RegConstraint<"$false = $dst">;
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+ [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+ RegConstraint<"$Sn = $Sd"> {
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
+}
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
-// Misc.
+// Move from VFP System Register to ARM core register.
//
-// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
-// to APSR.
-let Defs = [CPSR], Uses = [FPSCR] in
-def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
- "\tapsr_nzcv, fpscr",
- [(arm_fmstat)]> {
+class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern>:
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+ // Instruction operand.
+ bits<4> Rt;
+
let Inst{27-20} = 0b11101111;
- let Inst{19-16} = 0b0001;
- let Inst{15-12} = 0b1111;
+ let Inst{19-16} = opc19_16;
+ let Inst{15-12} = Rt;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
+ let Inst{6-5} = 0b00;
let Inst{4} = 1;
+ let Inst{3-0} = 0b0000;
}
-// FPSCR <-> GPR (for disassembly only)
+// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+// to APSR.
+let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
+ "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+
+// Application level FPSCR -> GPR
let hasSideEffects = 1, Uses = [FPSCR] in
-def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT,
- "vmrs", "\t$dst, fpscr",
- [(set GPR:$dst, (int_arm_get_fpscr))]> {
- let Inst{27-20} = 0b11101111;
- let Inst{19-16} = 0b0001;
- let Inst{11-8} = 0b1010;
- let Inst{7} = 0;
- let Inst{4} = 1;
+def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpscr",
+ [(set GPR:$Rt, (int_arm_get_fpscr))]>;
+
+// System level FPEXC, FPSID -> GPR
+let Uses = [FPSCR] in {
+ def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpexc", []>;
+ def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpsid", []>;
}
-let Defs = [FPSCR] in
-def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT,
- "vmsr", "\tfpscr, $src",
- [(int_arm_set_fpscr GPR:$src)]> {
+//===----------------------------------------------------------------------===//
+// Move from ARM core register to VFP System Register.
+//
+
+class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern>:
+ VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+ // Instruction operand.
+ bits<4> src;
+
+ // Encode instruction operand.
+ let Inst{15-12} = src;
+
let Inst{27-20} = 0b11101110;
- let Inst{19-16} = 0b0001;
+ let Inst{19-16} = opc19_16;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
let Inst{4} = 1;
}
+let Defs = [FPSCR] in {
+ // Application level GPR -> FPSCR
+ def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpscr, $src", [(int_arm_set_fpscr GPR:$src)]>;
+ // System level GPR -> FPEXC
+ def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpexc, $src", []>;
+ // System level GPR -> FPSID
+ def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpsid, $src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc.
+//
+
// Materialize FP immediates. VFP3 only.
let isReMaterializable = 1 in {
-def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
+def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
VFPMiscFrm, IIC_fpUNA64,
- "vmov", ".f64\t$dst, $imm",
- [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ "vmov", ".f64\t$Dd, $imm",
+ [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<32> imm;
+
+ // Encode instruction operands.
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+ let Inst{19} = imm{31};
+ let Inst{18-16} = imm{22-20};
+ let Inst{3-0} = imm{19-16};
+
+ // Encode remaining instruction bits.
let Inst{27-23} = 0b11101;
let Inst{21-20} = 0b11;
let Inst{11-9} = 0b101;
- let Inst{8} = 1;
+ let Inst{8} = 1; // Double precision.
let Inst{7-4} = 0b0000;
}
-def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
- VFPMiscFrm, IIC_fpUNA32,
- "vmov", ".f32\t$dst, $imm",
- [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
+ VFPMiscFrm, IIC_fpUNA32,
+ "vmov", ".f32\t$Sd, $imm",
+ [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<32> imm;
+
+ // Encode instruction operands.
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+ let Inst{19} = imm{31}; // The immediate is handled as a double.
+ let Inst{18-16} = imm{22-20};
+ let Inst{3-0} = imm{19-16};
+
+ // Encode remaining instruction bits.
let Inst{27-23} = 0b11101;
let Inst{21-20} = 0b11;
let Inst{11-9} = 0b101;
- let Inst{8} = 0;
+ let Inst{8} = 0; // Single precision.
let Inst{7-4} = 0b0000;
}
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
index 5f6d7ee..45b7e48 100644
--- a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp
@@ -22,7 +22,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Memory.h"
+#include "llvm/Support/Memory.h"
#include <cstdlib>
using namespace llvm;
@@ -43,7 +43,7 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
// CompilationCallback stub - We can't use a C function with inline assembly in
-// it, because we the prolog/epilog inserted by GCC won't work for us (we need
+// it, because the prolog/epilog inserted by GCC won't work for us. (We need
// to preserve more context and manipulate the stack directly). Instead,
// write our own wrapper, which does things our way, so we have complete
// control over register saving and restoring.
@@ -97,9 +97,10 @@ extern "C" {
"str r0, [sp,#16]\n"
// Return to the (newly modified) stub to invoke the real function.
// The above twiddling of the saved return addresses allows us to
- // deallocate everything, including the LR the stub saved, all in one
- // pop instruction.
- "ldmia sp!, {r0, r1, r2, r3, lr, pc}\n"
+ // deallocate everything, including the LR the stub saved, with two
+ // updating load instructions.
+ "ldmia sp!, {r0, r1, r2, r3, lr}\n"
+ "ldr pc, [sp], #4\n"
);
#else // Not an ARM host
void ARMCompilationCallback() {
@@ -290,7 +291,7 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
*((intptr_t*)RelocPos) |= ResultPtr;
// Set register Rn to PC.
*((intptr_t*)RelocPos) |=
- ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
break;
}
case ARM::reloc_arm_pic_jt:
diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.h b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
index f5d9eff..2f97928 100644
--- a/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h
@@ -105,7 +105,7 @@ namespace llvm {
/// model is PIC.
void Initialize(const MachineFunction &MF, bool isPIC) {
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- ConstPoolId2AddrMap.resize(AFI->getNumConstPoolEntries());
+ ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
IsPIC = isPIC;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 2b7645a..d9dc5cd 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -128,45 +128,153 @@ namespace {
char ARMLoadStoreOpt::ID = 0;
}
-static int getLoadStoreMultipleOpcode(int Opcode) {
+static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
switch (Opcode) {
- case ARM::LDR:
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDRi12:
++NumLDMGened;
- return ARM::LDM;
- case ARM::STR:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::LDMIA;
+ case ARM_AM::da: return ARM::LDMDA;
+ case ARM_AM::db: return ARM::LDMDB;
+ case ARM_AM::ib: return ARM::LDMIB;
+ }
+ break;
+ case ARM::STRi12:
++NumSTMGened;
- return ARM::STM;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::STMIA;
+ case ARM_AM::da: return ARM::STMDA;
+ case ARM_AM::db: return ARM::STMDB;
+ case ARM_AM::ib: return ARM::STMIB;
+ }
+ break;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
++NumLDMGened;
- return ARM::t2LDM;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2LDMIA;
+ case ARM_AM::db: return ARM::t2LDMDB;
+ }
+ break;
case ARM::t2STRi8:
case ARM::t2STRi12:
++NumSTMGened;
- return ARM::t2STM;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2STMIA;
+ case ARM_AM::db: return ARM::t2STMDB;
+ }
+ break;
case ARM::VLDRS:
++NumVLDMGened;
- return ARM::VLDMS;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMSIA;
+ case ARM_AM::db: return ARM::VLDMSDB;
+ }
+ break;
case ARM::VSTRS:
++NumVSTMGened;
- return ARM::VSTMS;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMSIA;
+ case ARM_AM::db: return ARM::VSTMSDB;
+ }
+ break;
case ARM::VLDRD:
++NumVLDMGened;
- return ARM::VLDMD;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMDIA;
+ case ARM_AM::db: return ARM::VLDMDDB;
+ }
+ break;
case ARM::VSTRD:
++NumVSTMGened;
- return ARM::VSTMD;
- default: llvm_unreachable("Unhandled opcode!");
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMDIA;
+ case ARM_AM::db: return ARM::VSTMDDB;
+ }
+ break;
}
+
return 0;
}
+namespace llvm {
+ namespace ARM_AM {
+
+AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDMIA_RET:
+ case ARM::LDMIA:
+ case ARM::LDMIA_UPD:
+ case ARM::STMIA:
+ case ARM::STMIA_UPD:
+ case ARM::t2LDMIA_RET:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2STMIA:
+ case ARM::t2STMIA_UPD:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSIA_UPD:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSIA_UPD:
+ case ARM::VLDMDIA:
+ case ARM::VLDMDIA_UPD:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDIA_UPD:
+ return ARM_AM::ia;
+
+ case ARM::LDMDA:
+ case ARM::LDMDA_UPD:
+ case ARM::STMDA:
+ case ARM::STMDA_UPD:
+ return ARM_AM::da;
+
+ case ARM::LDMDB:
+ case ARM::LDMDB_UPD:
+ case ARM::STMDB:
+ case ARM::STMDB_UPD:
+ case ARM::t2LDMDB:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMDB:
+ case ARM::t2STMDB_UPD:
+ case ARM::VLDMSDB:
+ case ARM::VLDMSDB_UPD:
+ case ARM::VSTMSDB:
+ case ARM::VSTMSDB_UPD:
+ case ARM::VLDMDDB:
+ case ARM::VLDMDDB_UPD:
+ case ARM::VSTMDDB:
+ case ARM::VSTMDDB_UPD:
+ return ARM_AM::db;
+
+ case ARM::LDMIB:
+ case ARM::LDMIB_UPD:
+ case ARM::STMIB:
+ case ARM::STMIB_UPD:
+ return ARM_AM::ib;
+ }
+
+ return ARM_AM::bad_am_submode;
+}
+
+ } // end namespace ARM_AM
+} // end namespace llvm
+
static bool isT2i32Load(unsigned Opc) {
return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
}
static bool isi32Load(unsigned Opc) {
- return Opc == ARM::LDR || isT2i32Load(Opc);
+ return Opc == ARM::LDRi12 || isT2i32Load(Opc);
}
static bool isT2i32Store(unsigned Opc) {
@@ -174,7 +282,7 @@ static bool isT2i32Store(unsigned Opc) {
}
static bool isi32Store(unsigned Opc) {
- return Opc == ARM::STR || isT2i32Store(Opc);
+ return Opc == ARM::STRi12 || isT2i32Store(Opc);
}
/// MergeOps - Create and insert a LDM or STM with Base as base register and
@@ -245,10 +353,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD);
- Opcode = getLoadStoreMultipleOpcode(Opcode);
+ Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
.addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
+ .addImm(Pred).addReg(PredReg);
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
@@ -271,22 +379,14 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
// First calculate which of the registers should be killed by the merged
// instruction.
const unsigned insertPos = memOps[insertAfter].Position;
-
- SmallSet<unsigned, 4> UnavailRegs;
SmallSet<unsigned, 4> KilledRegs;
DenseMap<unsigned, unsigned> Killer;
- for (unsigned i = 0; i < memOpsBegin; ++i) {
- if (memOps[i].Position < insertPos && memOps[i].isKill) {
- unsigned Reg = memOps[i].Reg;
- if (memOps[i].Merged)
- UnavailRegs.insert(Reg);
- else {
- KilledRegs.insert(Reg);
- Killer[Reg] = i;
- }
+ for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
+ if (i == memOpsBegin) {
+ i = memOpsEnd;
+ if (i == e)
+ break;
}
- }
- for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
if (memOps[i].Position < insertPos && memOps[i].isKill) {
unsigned Reg = memOps[i].Reg;
KilledRegs.insert(Reg);
@@ -297,12 +397,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
SmallVector<std::pair<unsigned, bool>, 8> Regs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
unsigned Reg = memOps[i].Reg;
- if (UnavailRegs.count(Reg))
- // Register is killed before and it's not easy / possible to update the
- // kill marker on already merged instructions. Abort.
- return;
-
- // If we are inserting the merged operation after an unmerged operation that
+ // If we are inserting the merged operation after an operation that
// uses the same register, make sure to transfer any kill flag.
bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
Regs.push_back(std::make_pair(Reg, isKill));
@@ -318,17 +413,24 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
// Merge succeeded, update records.
Merges.push_back(prior(Loc));
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- // Remove kill flags from any unmerged memops that come before insertPos.
+ // Remove kill flags from any memops that come before insertPos.
if (Regs[i-memOpsBegin].second) {
unsigned Reg = Regs[i-memOpsBegin].first;
if (KilledRegs.count(Reg)) {
unsigned j = Killer[Reg];
- memOps[j].MBBI->getOperand(0).setIsKill(false);
+ int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
+ assert(Idx >= 0 && "Cannot find killing operand");
+ memOps[j].MBBI->getOperand(Idx).setIsKill(false);
memOps[j].isKill = false;
}
+ memOps[i].isKill = true;
}
MBB.erase(memOps[i].MBBI);
+ // Update this memop to refer to the merged instruction.
+ // We may need to move kill flags again.
memOps[i].Merged = true;
+ memOps[i].MBBI = Merges.back();
+ memOps[i].Position = insertPos;
}
}
@@ -349,7 +451,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
const MachineOperand &PMO = Loc->getOperand(0);
unsigned PReg = PMO.getReg();
unsigned PRegNum = PMO.isUndef() ? UINT_MAX
- : ARMRegisterInfo::getRegisterNumbering(PReg);
+ : getARMRegisterNumbering(PReg);
unsigned Count = 1;
for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
@@ -357,7 +459,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
unsigned Reg = MO.getReg();
unsigned RegNum = MO.isUndef() ? UINT_MAX
- : ARMRegisterInfo::getRegisterNumbering(Reg);
+ : getARMRegisterNumbering(Reg);
// Register numbers must be in ascending order. For VFP, the registers
// must also be consecutive and there is a limit of 16 double-word
// registers per instruction.
@@ -440,8 +542,8 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
switch (MI->getOpcode()) {
default: return 0;
- case ARM::LDR:
- case ARM::STR:
+ case ARM::LDRi12:
+ case ARM::STRi12:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
@@ -452,31 +554,109 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::VLDRD:
case ARM::VSTRD:
return 8;
- case ARM::LDM:
- case ARM::STM:
- case ARM::t2LDM:
- case ARM::t2STM:
- case ARM::VLDMS:
- case ARM::VSTMS:
- return (MI->getNumOperands() - 4) * 4;
- case ARM::VLDMD:
- case ARM::VSTMD:
- return (MI->getNumOperands() - 4) * 8;
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::VLDMSIA:
+ case ARM::VLDMSDB:
+ case ARM::VSTMSIA:
+ case ARM::VSTMSDB:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+ case ARM::VLDMDIA:
+ case ARM::VLDMDDB:
+ case ARM::VSTMDIA:
+ case ARM::VSTMDDB:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
}
}
-static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
+static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
+ ARM_AM::AMSubMode Mode) {
switch (Opc) {
- case ARM::LDM: return ARM::LDM_UPD;
- case ARM::STM: return ARM::STM_UPD;
- case ARM::t2LDM: return ARM::t2LDM_UPD;
- case ARM::t2STM: return ARM::t2STM_UPD;
- case ARM::VLDMS: return ARM::VLDMS_UPD;
- case ARM::VLDMD: return ARM::VLDMD_UPD;
- case ARM::VSTMS: return ARM::VSTMS_UPD;
- case ARM::VSTMD: return ARM::VSTMD_UPD;
default: llvm_unreachable("Unhandled opcode!");
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::LDMIA_UPD;
+ case ARM_AM::ib: return ARM::LDMIB_UPD;
+ case ARM_AM::da: return ARM::LDMDA_UPD;
+ case ARM_AM::db: return ARM::LDMDB_UPD;
+ }
+ break;
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::STMIA_UPD;
+ case ARM_AM::ib: return ARM::STMIB_UPD;
+ case ARM_AM::da: return ARM::STMDA_UPD;
+ case ARM_AM::db: return ARM::STMDB_UPD;
+ }
+ break;
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2LDMIA_UPD;
+ case ARM_AM::db: return ARM::t2LDMDB_UPD;
+ }
+ break;
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::t2STMIA_UPD;
+ case ARM_AM::db: return ARM::t2STMDB_UPD;
+ }
+ break;
+ case ARM::VLDMSIA:
+ case ARM::VLDMSDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMSIA_UPD;
+ case ARM_AM::db: return ARM::VLDMSDB_UPD;
+ }
+ break;
+ case ARM::VLDMDIA:
+ case ARM::VLDMDDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VLDMDIA_UPD;
+ case ARM_AM::db: return ARM::VLDMDDB_UPD;
+ }
+ break;
+ case ARM::VSTMSIA:
+ case ARM::VSTMSDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMSIA_UPD;
+ case ARM_AM::db: return ARM::VSTMSDB_UPD;
+ }
+ break;
+ case ARM::VSTMDIA:
+ case ARM::VSTMDDB:
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::VSTMDIA_UPD;
+ case ARM_AM::db: return ARM::VSTMDDB_UPD;
+ }
+ break;
}
+
return 0;
}
@@ -505,16 +685,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
int Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
- bool DoMerge = false;
- ARM_AM::AMSubMode Mode = ARM_AM::ia;
-
// Can't use an updating ld/st if the base register is also a dest
// register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
- for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
if (MI->getOperand(i).getReg() == Base)
return false;
- }
- Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
+
+ bool DoMerge = false;
+ ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
// Try merging with the previous instruction.
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
@@ -560,15 +738,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
if (!DoMerge)
return false;
- unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
+ unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
.addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM4ModeImm(Mode))
.addImm(Pred).addReg(PredReg);
+
// Transfer the rest of operands.
- for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
+ for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
MIB.addOperand(MI->getOperand(OpNum));
+
// Transfer memoperands.
(*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
@@ -576,14 +755,21 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
return true;
}
-static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
+ ARM_AM::AddrOpc Mode) {
switch (Opc) {
- case ARM::LDR: return ARM::LDR_PRE;
- case ARM::STR: return ARM::STR_PRE;
- case ARM::VLDRS: return ARM::VLDMS_UPD;
- case ARM::VLDRD: return ARM::VLDMD_UPD;
- case ARM::VSTRS: return ARM::VSTMS_UPD;
- case ARM::VSTRD: return ARM::VSTMD_UPD;
+ case ARM::LDRi12:
+ return ARM::LDR_PRE;
+ case ARM::STRi12:
+ return ARM::STR_PRE;
+ case ARM::VLDRS:
+ return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+ case ARM::VLDRD:
+ return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+ case ARM::VSTRS:
+ return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+ case ARM::VSTRD:
+ return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_PRE;
@@ -595,14 +781,21 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
return 0;
}
-static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
+ ARM_AM::AddrOpc Mode) {
switch (Opc) {
- case ARM::LDR: return ARM::LDR_POST;
- case ARM::STR: return ARM::STR_POST;
- case ARM::VLDRS: return ARM::VLDMS_UPD;
- case ARM::VLDRD: return ARM::VLDMD_UPD;
- case ARM::VSTRS: return ARM::VSTMS_UPD;
- case ARM::VSTRD: return ARM::VSTMD_UPD;
+ case ARM::LDRi12:
+ return ARM::LDR_POST;
+ case ARM::STRi12:
+ return ARM::STR_POST;
+ case ARM::VLDRS:
+ return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+ case ARM::VLDRD:
+ return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+ case ARM::VSTRS:
+ return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+ case ARM::VSTRD:
+ return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_POST;
@@ -629,14 +822,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
DebugLoc dl = MI->getDebugLoc();
bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
- bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
- if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
- return false;
- if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
- return false;
- if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
+ bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
+ if (isi32Load(Opcode) || isi32Store(Opcode))
if (MI->getOperand(2).getImm() != 0)
return false;
+ if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
+ return false;
bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
// Can't do the merge if the destination register is the same as the would-be
@@ -666,7 +857,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
DoMerge = true;
}
if (DoMerge) {
- NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
MBB.erase(PrevMBBI);
}
}
@@ -685,7 +876,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
DoMerge = true;
}
if (DoMerge) {
- NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
if (NextMBBI == I) {
Advance = true;
++I;
@@ -698,12 +889,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
return false;
unsigned Offset = 0;
- if (isAM5)
- Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
- ARM_AM::db : ARM_AM::ia);
- else if (isAM2)
+ if (isAM2)
Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
- else
+ else if (!isAM5)
Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
if (isAM5) {
@@ -715,7 +903,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
.addReg(Base, getKillRegState(isLd ? BaseKill : false))
- .addImm(Offset)
.addImm(Pred).addReg(PredReg)
.addReg(MO.getReg(), (isLd ? getDefRegState(true) :
getKillRegState(MO.isKill())));
@@ -782,15 +969,14 @@ static bool isMemoryOp(const MachineInstr *MI) {
int Opcode = MI->getOpcode();
switch (Opcode) {
default: break;
- case ARM::LDR:
- case ARM::STR:
- return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
case ARM::VLDRS:
case ARM::VSTRS:
return MI->getOperand(1).isReg();
case ARM::VLDRD:
case ARM::VSTRD:
return MI->getOperand(1).isReg();
+ case ARM::LDRi12:
+ case ARM::STRi12:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
@@ -818,24 +1004,19 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
static int getMemoryOpOffset(const MachineInstr *MI) {
int Opcode = MI->getOpcode();
- bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
unsigned NumOperands = MI->getDesc().getNumOperands();
unsigned OffField = MI->getOperand(NumOperands-3).getImm();
if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
- Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+ Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
return OffField;
- int Offset = isAM2
- ? ARM_AM::getAM2Offset(OffField)
- : (isAM3 ? ARM_AM::getAM3Offset(OffField)
- : ARM_AM::getAM5Offset(OffField) * 4);
- if (isAM2) {
- if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
- Offset = -Offset;
- } else if (isAM3) {
+ int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+ : ARM_AM::getAM5Offset(OffField) * 4;
+ if (isAM3) {
if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
Offset = -Offset;
} else {
@@ -847,35 +1028,24 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
static void InsertLDR_STR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- int OffImm, bool isDef,
+ int Offset, bool isDef,
DebugLoc dl, unsigned NewOpc,
unsigned Reg, bool RegDeadKill, bool RegUndef,
unsigned BaseReg, bool BaseKill, bool BaseUndef,
- unsigned OffReg, bool OffKill, bool OffUndef,
+ bool OffKill, bool OffUndef,
ARMCC::CondCodes Pred, unsigned PredReg,
const TargetInstrInfo *TII, bool isT2) {
- int Offset = OffImm;
- if (!isT2) {
- if (OffImm < 0)
- Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
- else
- Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
- }
if (isDef) {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
.addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
.addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
- if (!isT2)
- MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
} else {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
.addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
.addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
- if (!isT2)
- MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
}
}
@@ -906,23 +1076,21 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
unsigned BaseReg = BaseOp.getReg();
bool BaseKill = BaseOp.isKill();
bool BaseUndef = BaseOp.isUndef();
- unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
int OffImm = getMemoryOpOffset(MI);
unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
- if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
+ if (OddRegNum > EvenRegNum && OffImm == 0) {
// Ascending register numbers and no offset. It's safe to change it to a
// ldm or stm.
unsigned NewOpc = (isLd)
- ? (isT2 ? ARM::t2LDM : ARM::LDM)
- : (isT2 ? ARM::t2STM : ARM::STM);
+ ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
+ : (isT2 ? ARM::t2STMIA : ARM::STMIA);
if (isLd) {
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
.addReg(BaseReg, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addImm(Pred).addReg(PredReg)
.addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
.addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
@@ -930,7 +1098,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
} else {
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
.addReg(BaseReg, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addImm(Pred).addReg(PredReg)
.addReg(EvenReg,
getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
@@ -941,28 +1108,24 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
NewBBI = llvm::prior(MBBI);
} else {
// Split into two instructions.
- assert((!isT2 || !OffReg) &&
- "Thumb2 ldrd / strd does not encode offset register!");
unsigned NewOpc = (isLd)
- ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
- : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
+ ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+ : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
DebugLoc dl = MBBI->getDebugLoc();
// If this is a load and base register is killed, it may have been
// re-defed by the load, make sure the first load does not clobber it.
if (isLd &&
(BaseKill || OffKill) &&
- (TRI->regsOverlap(EvenReg, BaseReg) ||
- (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
- assert(!TRI->regsOverlap(OddReg, BaseReg) &&
- (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
+ (TRI->regsOverlap(EvenReg, BaseReg))) {
+ assert(!TRI->regsOverlap(OddReg, BaseReg));
InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
OddReg, OddDeadKill, false,
- BaseReg, false, BaseUndef, OffReg, false, OffUndef,
+ BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
NewBBI = llvm::prior(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
- BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
} else {
if (OddReg == EvenReg && EvenDeadKill) {
@@ -974,12 +1137,12 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
}
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, EvenUndef,
- BaseReg, false, BaseUndef, OffReg, false, OffUndef,
+ BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
NewBBI = llvm::prior(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
OddReg, OddDeadKill, OddUndef,
- BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
+ BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
}
if (isLd)
@@ -1158,17 +1321,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
return NumMerges > 0;
}
-namespace {
- struct OffsetCompare {
- bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
- int LOffset = getMemoryOpOffset(LHS);
- int ROffset = getMemoryOpOffset(RHS);
- assert(LHS == RHS || LOffset != ROffset);
- return LOffset > ROffset;
- }
- };
-}
-
/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
/// directly restore the value of LR into pc.
@@ -1182,20 +1334,25 @@ namespace {
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
if (MBB.empty()) return false;
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
if (MBBI != MBB.begin() &&
(MBBI->getOpcode() == ARM::BX_RET ||
MBBI->getOpcode() == ARM::tBX_RET ||
MBBI->getOpcode() == ARM::MOVPCLR)) {
MachineInstr *PrevMI = prior(MBBI);
- if (PrevMI->getOpcode() == ARM::LDM_UPD ||
- PrevMI->getOpcode() == ARM::t2LDM_UPD) {
+ unsigned Opcode = PrevMI->getOpcode();
+ if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
+ Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
+ Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
if (MO.getReg() != ARM::LR)
return false;
- unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
+ unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
+ assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
+ Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
PrevMI->setDesc(TII->get(NewOpc));
MO.setReg(ARM::PC);
+ PrevMI->copyImplicitOps(&*MBBI);
MBB.erase(MBBI);
return true;
}
@@ -1216,7 +1373,8 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
++MFI) {
MachineBasicBlock &MBB = *MFI;
Modified |= LoadStoreMultipleOpti(MBB);
- Modified |= MergeReturnIntoLDM(MBB);
+ if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
+ Modified |= MergeReturnIntoLDM(MBB);
}
delete RS;
@@ -1250,7 +1408,7 @@ namespace {
bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
unsigned &NewOpc, unsigned &EvenReg,
unsigned &OddReg, unsigned &BaseReg,
- unsigned &OffReg, int &Offset,
+ int &Offset,
unsigned &PredReg, ARMCC::CondCodes &Pred,
bool &isT2);
bool RescheduleOps(MachineBasicBlock *MBB,
@@ -1292,7 +1450,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
if (I->isDebugValue() || MemOps.count(&*I))
continue;
const TargetInstrDesc &TID = I->getDesc();
- if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
+ if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects())
return false;
if (isLd && TID.mayStore())
return false;
@@ -1330,8 +1488,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
DebugLoc &dl,
unsigned &NewOpc, unsigned &EvenReg,
unsigned &OddReg, unsigned &BaseReg,
- unsigned &OffReg, int &Offset,
- unsigned &PredReg,
+ int &Offset, unsigned &PredReg,
ARMCC::CondCodes &Pred,
bool &isT2) {
// Make sure we're allowed to generate LDRD/STRD.
@@ -1341,9 +1498,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
// FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
unsigned Scale = 1;
unsigned Opcode = Op0->getOpcode();
- if (Opcode == ARM::LDR)
+ if (Opcode == ARM::LDRi12)
NewOpc = ARM::LDRD;
- else if (Opcode == ARM::STR)
+ else if (Opcode == ARM::STRi12)
NewOpc = ARM::STRD;
else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
NewOpc = ARM::t2LDRDi8;
@@ -1356,12 +1513,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
} else
return false;
- // Make sure the offset registers match.
- if (!isT2 &&
- (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
- return false;
-
- // Must sure the base address satisfies i64 ld / st alignment requirement.
+ // Make sure the base address satisfies i64 ld / st alignment requirement.
if (!Op0->hasOneMemOperand() ||
!(*Op0->memoperands_begin())->getValue() ||
(*Op0->memoperands_begin())->isVolatile())
@@ -1370,7 +1522,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
unsigned Align = (*Op0->memoperands_begin())->getAlignment();
const Function *Func = MF->getFunction();
unsigned ReqAlign = STI->hasV6Ops()
- ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
+ ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
: 8; // Pre-v6 need 8-byte align
if (Align < ReqAlign)
return false;
@@ -1404,13 +1556,22 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
if (EvenReg == OddReg)
return false;
BaseReg = Op0->getOperand(1).getReg();
- if (!isT2)
- OffReg = Op0->getOperand(2).getReg();
Pred = llvm::getInstrPredicate(Op0, PredReg);
dl = Op0->getDebugLoc();
return true;
}
+namespace {
+ struct OffsetCompare {
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ int LOffset = getMemoryOpOffset(LHS);
+ int ROffset = getMemoryOpOffset(RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ }
+ };
+}
+
bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
SmallVector<MachineInstr*, 4> &Ops,
unsigned Base, bool isLd,
@@ -1493,14 +1654,14 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
MachineInstr *Op0 = Ops.back();
MachineInstr *Op1 = Ops[Ops.size()-2];
unsigned EvenReg = 0, OddReg = 0;
- unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
+ unsigned BaseReg = 0, PredReg = 0;
ARMCC::CondCodes Pred = ARMCC::AL;
bool isT2 = false;
unsigned NewOpc = 0;
int Offset = 0;
DebugLoc dl;
if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
- EvenReg, OddReg, BaseReg, OffReg,
+ EvenReg, OddReg, BaseReg,
Offset, PredReg, Pred, isT2)) {
Ops.pop_back();
Ops.pop_back();
@@ -1512,8 +1673,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
.addReg(EvenReg, RegState::Define)
.addReg(OddReg, RegState::Define)
.addReg(BaseReg);
+ // FIXME: We're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming LDRi12s.
if (!isT2)
- MIB.addReg(OffReg);
+ MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
++NumLDRDFormed;
} else {
@@ -1522,8 +1686,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
.addReg(EvenReg)
.addReg(OddReg)
.addReg(BaseReg);
+ // FIXME: We're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming STRi12s.
if (!isT2)
- MIB.addReg(OffReg);
+ MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
++NumSTRDFormed;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp
new file mode 100644
index 0000000..6d7b485
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCCodeEmitter.cpp
@@ -0,0 +1,1230 @@
+//===-- ARM/ARMMCCodeEmitter.cpp - Convert ARM code to machine code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMFixupKinds.h"
+#include "ARMInstrInfo.h"
+#include "ARMMCExpr.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
+
+namespace {
+class ARMMCCodeEmitter : public MCCodeEmitter {
+ ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const ARMSubtarget *Subtarget;
+ MCContext &Ctx;
+
+public:
+ ARMMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
+ : TM(tm), TII(*TM.getInstrInfo()),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()), Ctx(ctx) {
+ }
+
+ ~ARMMCCodeEmitter() {}
+
+ unsigned getMachineSoImmOpValue(unsigned SoImm) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ unsigned getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getHiLo16ImmOpValue - Return the encoding for the hi / low 16-bit of
+ /// the specified operand. This is used for operands with :lower16: and
+ /// :upper16: prefixes.
+ uint32_t getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ bool EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx,
+ unsigned &Reg, unsigned &Imm,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBLTargetOpValue - Return encoding info for Thumb immediate
+ /// BL branch target.
+ uint32_t getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+ /// BLX branch target.
+ uint32_t getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+ uint32_t getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getBranchTargetOpValue - Return encoding info for 24-bit immediate
+ /// branch target.
+ uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+ /// immediate Thumb2 direct branch target.
+ uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate
+ /// branch target.
+ uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAdrLabelOpValue - Return encoding info for 12-bit immediate
+ /// ADR label target.
+ uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ uint32_t getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12'
+ /// operand.
+ uint32_t getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getThumbAddrModeRegRegOpValue - Return encoding for 'reg + reg' operand.
+ uint32_t getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups)const;
+
+ /// getT2AddrModeImm8s4OpValue - Return encoding info for 'reg +/- imm8<<2'
+ /// operand.
+ uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+ /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
+ /// operand as needed by load/store instructions.
+ uint32_t getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getLdStmModeOpValue - Return encoding for load/store multiple mode.
+ uint32_t getLdStmModeOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm();
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::da: return 0;
+ case ARM_AM::ia: return 1;
+ case ARM_AM::db: return 2;
+ case ARM_AM::ib: return 3;
+ }
+ }
+ /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+ ///
+ unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
+ switch (ShOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::no_shift:
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::asr: return 2;
+ case ARM_AM::ror:
+ case ARM_AM::rrx: return 3;
+ }
+ return 0;
+ }
+
+ /// getAddrMode2OpValue - Return encoding for addrmode2 operands.
+ uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands.
+ uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands.
+ uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode3OpValue - Return encoding for addrmode3 operands.
+ uint32_t getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModeThumbSPOpValue - Return encoding info for 'reg +/- imm12'
+ /// operand.
+ uint32_t getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+ uint32_t getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+ uint32_t getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand.
+ uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getCCOutOpValue - Return encoding of the 's' bit.
+ unsigned getCCOutOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // The operand is either reg0 or CPSR. The 's' bit is encoded as '0' or
+ // '1' respectively.
+ return MI.getOperand(Op).getReg() == ARM::CPSR;
+ }
+
+ /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+ unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned SoImm = MI.getOperand(Op).getImm();
+ int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+ assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+ // Encode rotate_imm.
+ unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+ << ARMII::SoRotImmShift;
+
+ // Encode immed_8.
+ Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+ return Binary;
+ }
+
+ /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+ unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned SoImm = MI.getOperand(Op).getImm();
+ unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm);
+ assert(Encoded != ~0U && "Not a Thumb2 so_imm value?");
+ return Encoded;
+ }
+
+ unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getSORegOpValue - Return an encoded so_reg shifted register value.
+ unsigned getSORegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getRotImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ switch (MI.getOperand(Op).getImm()) {
+ default: assert (0 && "Not a valid rot_imm value!");
+ case 0: return 0;
+ case 8: return 1;
+ case 16: return 2;
+ case 24: return 3;
+ }
+ }
+
+ unsigned getImmMinusOneOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 1;
+ }
+
+ unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 64 - MI.getOperand(Op).getImm();
+ }
+
+ unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getMsbOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+ unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+ unsigned NEONThumb2DupPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+
+ unsigned VFPThumb2PostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const;
+
+ void EmitByte(unsigned char C, raw_ostream &OS) const {
+ OS << (char)C;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
+ // Output the constant in little endian byte order.
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, OS);
+ Val >>= 8;
+ }
+ }
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createARMMCCodeEmitter(const Target &, TargetMachine &TM,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(TM, Ctx);
+}
+
+/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (Subtarget->isThumb2()) {
+ // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved
+ // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are
+ // set to 1111.
+ unsigned Bit24 = EncodedValue & 0x01000000;
+ unsigned Bit28 = Bit24 << 4;
+ EncodedValue &= 0xEFFFFFFF;
+ EncodedValue |= Bit28;
+ EncodedValue |= 0x0F000000;
+ }
+
+ return EncodedValue;
+}
+
+/// NEONThumb2LoadStorePostEncoder - Post-process encoded NEON load/store
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (Subtarget->isThumb2()) {
+ EncodedValue &= 0xF0FFFFFF;
+ EncodedValue |= 0x09000000;
+ }
+
+ return EncodedValue;
+}
+
+/// NEONThumb2DupPostEncoder - Post-process encoded NEON vdup
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
+ unsigned EncodedValue) const {
+ if (Subtarget->isThumb2()) {
+ EncodedValue &= 0x00FFFFFF;
+ EncodedValue |= 0xEE000000;
+ }
+
+ return EncodedValue;
+}
+
+/// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite
+/// them to their Thumb2 form if we are currently in Thumb2 mode.
+unsigned ARMMCCodeEmitter::
+VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const {
+ if (Subtarget->isThumb2()) {
+ EncodedValue &= 0x0FFFFFFF;
+ EncodedValue |= 0xE0000000;
+ }
+ return EncodedValue;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ unsigned RegNo = getARMRegisterNumbering(Reg);
+
+ // Q registers are encoded as 2x their register number.
+ switch (Reg) {
+ default:
+ return RegNo;
+ case ARM::Q0: case ARM::Q1: case ARM::Q2: case ARM::Q3:
+ case ARM::Q4: case ARM::Q5: case ARM::Q6: case ARM::Q7:
+ case ARM::Q8: case ARM::Q9: case ARM::Q10: case ARM::Q11:
+ case ARM::Q12: case ARM::Q13: case ARM::Q14: case ARM::Q15:
+ return 2 * RegNo;
+ }
+ } else if (MO.isImm()) {
+ return static_cast<unsigned>(MO.getImm());
+ } else if (MO.isFPImm()) {
+ return static_cast<unsigned>(APFloat(MO.getFPImm())
+ .bitcastToAPInt().getHiBits(32).getLimitedValue());
+ }
+
+ llvm_unreachable("Unable to encode MCOperand!");
+ return 0;
+}
+
+/// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand.
+bool ARMMCCodeEmitter::
+EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
+ unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+
+ Reg = getARMRegisterNumbering(MO.getReg());
+
+ int32_t SImm = MO1.getImm();
+ bool isAdd = true;
+
+ // Special value for #-0
+ if (SImm == INT32_MIN)
+ SImm = 0;
+
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (SImm < 0) {
+ SImm = -SImm;
+ isAdd = false;
+ }
+
+ Imm = SImm;
+ return isAdd;
+}
+
+/// getBranchTargetOpValue - Helper function to get the branch target operand,
+/// which is either an immediate or requires a fixup.
+static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ unsigned FixupKind,
+ SmallVectorImpl<MCFixup> &Fixups) {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+
+ // If the destination is an immediate, we have nothing to do.
+ if (MO.isImm()) return MO.getImm();
+ assert(MO.isExpr() && "Unexpected branch target type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(FixupKind);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ // All of the information is in the fixup.
+ return 0;
+}
+
+/// getThumbBLTargetOpValue - Return encoding info for immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, Fixups);
+}
+
+/// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+/// BLX branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, Fixups);
+}
+
+/// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, Fixups);
+}
+
+/// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, Fixups);
+}
+
+/// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups);
+}
+
+/// Return true if this branch has a non-always predication
+static bool HasConditionalBranch(const MCInst &MI) {
+ int NumOp = MI.getNumOperands();
+ if (NumOp >= 2) {
+ for (int i = 0; i < NumOp-1; ++i) {
+ const MCOperand &MCOp1 = MI.getOperand(i);
+ const MCOperand &MCOp2 = MI.getOperand(i + 1);
+ if (MCOp1.isImm() && MCOp2.isReg() &&
+ (MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) {
+ if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // FIXME: This really, really shouldn't use TargetMachine. We don't want
+ // coupling between MC and TM anywhere we can help it.
+ if (Subtarget->isThumb2())
+ return
+ ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups);
+ return getARMBranchTargetOpValue(MI, OpIdx, Fixups);
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (HasConditionalBranch(MI))
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_condbranch, Fixups);
+ return ::getBranchTargetOpValue(MI, OpIdx,
+ ARM::fixup_arm_uncondbranch, Fixups);
+}
+
+
+
+
+/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+/// immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Val =
+ ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+ bool I = (Val & 0x800000);
+ bool J1 = (Val & 0x400000);
+ bool J2 = (Val & 0x200000);
+ if (I ^ J1)
+ Val &= ~0x400000;
+ else
+ Val |= 0x400000;
+
+ if (I ^ J2)
+ Val &= ~0x200000;
+ else
+ Val |= 0x200000;
+
+ return Val;
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
+ Fixups);
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
+ Fixups);
+}
+
+/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
+ Fixups);
+}
+
+/// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg'
+/// operand.
+uint32_t ARMMCCodeEmitter::
+getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &) const {
+ // [Rn, Rm]
+ // {5-3} = Rm
+ // {2-0} = Rn
+ const MCOperand &MO1 = MI.getOperand(OpIdx);
+ const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
+ unsigned Rn = getARMRegisterNumbering(MO1.getReg());
+ unsigned Rm = getARMRegisterNumbering(MO2.getReg());
+ return (Rm << 3) | Rn;
+}
+
+/// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {17-13} = reg
+ // {12} = (U)nsigned (add == '1', sub == '0')
+ // {11-0} = imm12
+ unsigned Reg, Imm12;
+ bool isAdd = true;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Imm12 = 0;
+ isAdd = false ; // 'U' bit is set as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+
+ MCFixupKind Kind;
+ if (Subtarget->isThumb2())
+ Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else
+ isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups);
+
+ uint32_t Binary = Imm12 & 0xfff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 12);
+ Binary |= (Reg << 13);
+ return Binary;
+}
+
+/// getT2AddrModeImm8s4OpValue - Return encoding info for
+/// 'reg +/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd = true;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false ; // 'U' bit is set as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else
+ isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+
+ uint32_t Binary = (Imm8 >> 2) & 0xff;
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
+// FIXME: This routine assumes that a binary
+// expression will always result in a PCRel expression
+// In reality, its only true if one or more subexpressions
+// is itself a PCRel (i.e. "." in asm or some other pcrel construct)
+// but this is good enough for now.
+static bool EvaluateAsPCRel(const MCExpr *Expr) {
+ switch (Expr->getKind()) {
+ default: assert(0 && "Unexpected expression type");
+ case MCExpr::SymbolRef: return false;
+ case MCExpr::Binary: return true;
+ }
+}
+
+uint32_t
+ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {20-16} = imm{15-12}
+ // {11-0} = imm{11-0}
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (MO.isImm())
+ // Hi / lo 16 bits already extracted during earlier passes.
+ return static_cast<unsigned>(MO.getImm());
+
+ // Handle :upper16: and :lower16: assembly prefixes.
+ const MCExpr *E = MO.getExpr();
+ if (E->getKind() == MCExpr::Target) {
+ const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E);
+ E = ARM16Expr->getSubExpr();
+
+ MCFixupKind Kind;
+ switch (ARM16Expr->getKind()) {
+ default: assert(0 && "Unsupported ARMFixup");
+ case ARMMCExpr::VK_ARM_HI16:
+ if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(Subtarget->isThumb2()
+ ? ARM::fixup_t2_movt_hi16_pcrel
+ : ARM::fixup_arm_movt_hi16_pcrel);
+ else
+ Kind = MCFixupKind(Subtarget->isThumb2()
+ ? ARM::fixup_t2_movt_hi16
+ : ARM::fixup_arm_movt_hi16);
+ break;
+ case ARMMCExpr::VK_ARM_LO16:
+ if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E))
+ Kind = MCFixupKind(Subtarget->isThumb2()
+ ? ARM::fixup_t2_movw_lo16_pcrel
+ : ARM::fixup_arm_movw_lo16_pcrel);
+ else
+ Kind = MCFixupKind(Subtarget->isThumb2()
+ ? ARM::fixup_t2_movw_lo16
+ : ARM::fixup_arm_movw_lo16);
+ break;
+ }
+ Fixups.push_back(MCFixup::Create(0, E, Kind));
+ return 0;
+ };
+
+ llvm_unreachable("Unsupported MCExpr type in MCOperand!");
+ return 0;
+}
+
+uint32_t ARMMCCodeEmitter::
+getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Rm = getARMRegisterNumbering(MO1.getReg());
+ unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
+ bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
+ ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
+ unsigned SBits = getShiftOp(ShOp);
+
+ // {16-13} = Rn
+ // {12} = isAdd
+ // {11-0} = shifter
+ // {3-0} = Rm
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+ uint32_t Binary = Rm;
+ Binary |= Rn << 13;
+ Binary |= SBits << 5;
+ Binary |= ShImm << 7;
+ if (isAdd)
+ Binary |= 1 << 12;
+ return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {17-14} Rn
+ // {13} 1 == imm12, 0 == Rm
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
+ Binary |= Rn << 14;
+ return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {13} 1 == imm12, 0 == Rm
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ unsigned Imm = MO1.getImm();
+ bool isAdd = ARM_AM::getAM2Op(Imm) == ARM_AM::add;
+ bool isReg = MO.getReg() != 0;
+ uint32_t Binary = ARM_AM::getAM2Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm12
+ if (isReg) {
+ ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
+ Binary <<= 7; // Shift amount is bits [11:7]
+ Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
+ Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0]
+ }
+ return Binary | (isAdd << 12) | (isReg << 13);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {9} 1 == imm8, 0 == Rm
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ unsigned Imm = MO1.getImm();
+ bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+ bool isImm = MO.getReg() == 0;
+ uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+ if (!isImm)
+ Imm8 = getARMRegisterNumbering(MO.getReg());
+ return Imm8 | (isAdd << 8) | (isImm << 9);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Imm = MO2.getImm();
+ bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+ bool isImm = MO1.getReg() == 0;
+ uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+ // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+ if (!isImm)
+ Imm8 = getARMRegisterNumbering(MO1.getReg());
+ return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
+}
+
+/// getAddrModeThumbSPOpValue - Encode the t_addrmode_sp operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // [SP, #imm]
+ // {7-0} = imm8
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ assert(MI.getOperand(OpIdx).getReg() == ARM::SP &&
+ "Unexpected base register!");
+
+ // The immediate is already shifted for the implicit zeroes, so no change
+ // here.
+ return MO1.getImm() & 0xff;
+}
+
+/// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // [Rn, #imm]
+ // {7-3} = imm5
+ // {2-0} = Rn
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ unsigned Rn = getARMRegisterNumbering(MO.getReg());
+ unsigned Imm5 = MO1.getImm();
+ return ((Imm5 & 0x1f) << 3) | Rn;
+}
+
+/// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups);
+}
+
+/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // {12-9} = reg
+ // {8} = (U)nsigned (add == '1', sub == '0')
+ // {7-0} = imm8
+ unsigned Reg, Imm8;
+ bool isAdd;
+ // If The first operand isn't a register, we have a label reference.
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC.
+ Imm8 = 0;
+ isAdd = false; // 'U' bit is handled as part of the fixup.
+
+ assert(MO.isExpr() && "Unexpected machine operand type!");
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind;
+ if (Subtarget->isThumb2())
+ Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+ else
+ Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+ ++MCNumCPRelocations;
+ } else {
+ EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+ isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add;
+ }
+
+ uint32_t Binary = ARM_AM::getAM5Offset(Imm8);
+ // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 8);
+ Binary |= (Reg << 9);
+ return Binary;
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is either Rs, the amount to shift by, or reg0 in which
+ // case the imm contains the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 1 if reg shift, 0 if imm shift
+ // {6-5} = type
+ // If reg shift:
+ // {11-8} = Rs
+ // {7} = 0
+ // else (imm shift)
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ const MCOperand &MO2 = MI.getOperand(OpIdx + 2);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+ // Encode Rm.
+ unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ unsigned Rs = MO1.getReg();
+ if (Rs) {
+ // Set shift operand (bit[7:4]).
+ // LSL - 0001
+ // LSR - 0011
+ // ASR - 0101
+ // ROR - 0111
+ // RRX - 0110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x1; break;
+ case ARM_AM::lsr: SBits = 0x3; break;
+ case ARM_AM::asr: SBits = 0x5; break;
+ case ARM_AM::ror: SBits = 0x7; break;
+ case ARM_AM::rrx: SBits = 0x6; break;
+ }
+ } else {
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+ }
+
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode the shift operation Rs or shift_imm (except rrx).
+ if (Rs) {
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+ }
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+ const MCOperand &MO2 = MI.getOperand(OpNum+1);
+ const MCOperand &MO3 = MI.getOperand(OpNum+2);
+
+ // Encoded as [Rn, Rm, imm].
+ // FIXME: Needs fixup support.
+ unsigned Value = getARMRegisterNumbering(MO1.getReg());
+ Value <<= 4;
+ Value |= getARMRegisterNumbering(MO2.getReg());
+ Value <<= 2;
+ Value |= MO3.getImm();
+
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+ const MCOperand &MO2 = MI.getOperand(OpNum+1);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = getARMRegisterNumbering(MO1.getReg());
+
+ // Even though the immediate is 8 bits long, we need 9 bits in order
+ // to represent the (inverse of the) sign bit.
+ Value <<= 9;
+ int32_t tmp = (int32_t)MO2.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 256; // Set the ADD bit
+ Value |= tmp & 255;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = 0;
+ int32_t tmp = (int32_t)MO1.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 256; // Set the ADD bit
+ Value |= tmp & 255;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO1 = MI.getOperand(OpNum);
+
+ // FIXME: Needs fixup support.
+ unsigned Value = 0;
+ int32_t tmp = (int32_t)MO1.getImm();
+ if (tmp < 0)
+ tmp = abs(tmp);
+ else
+ Value |= 4096; // Set the ADD bit
+ Value |= tmp & 4095;
+ return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+ // shifted. The second is the amount to shift by.
+ //
+ // {3-0} = Rm.
+ // {4} = 0
+ // {6-5} = type
+ // {11-7} = imm
+
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+ // Encode Rm.
+ unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: llvm_unreachable("Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO1.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // 10 bits. lower 5 bits are are the lsb of the mask, high five bits are the
+ // msb of the mask.
+ const MCOperand &MO = MI.getOperand(Op);
+ uint32_t v = ~MO.getImm();
+ uint32_t lsb = CountTrailingZeros_32(v);
+ uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1;
+ assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
+ return lsb | (msb << 5);
+}
+
+unsigned ARMMCCodeEmitter::
+getMsbOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // MSB - 5 bits.
+ uint32_t lsb = MI.getOperand(Op-1).getImm();
+ uint32_t width = MI.getOperand(Op).getImm();
+ uint32_t msb = lsb+width-1;
+ assert (width != 0 && msb < 32 && "Illegal bit width!");
+ return msb;
+}
+
+unsigned ARMMCCodeEmitter::
+getRegisterListOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // VLDM/VSTM:
+ // {12-8} = Vd
+ // {7-0} = Number of registers
+ //
+ // LDM/STM:
+ // {15-0} = Bitfield of GPRs.
+ unsigned Reg = MI.getOperand(Op).getReg();
+ bool SPRRegs = ARM::SPRRegClass.contains(Reg);
+ bool DPRRegs = ARM::DPRRegClass.contains(Reg);
+
+ unsigned Binary = 0;
+
+ if (SPRRegs || DPRRegs) {
+ // VLDM/VSTM
+ unsigned RegNo = getARMRegisterNumbering(Reg);
+ unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
+ Binary |= (RegNo & 0x1f) << 8;
+ if (SPRRegs)
+ Binary |= NumRegs;
+ else
+ Binary |= NumRegs * 2;
+ } else {
+ for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
+ unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg());
+ Binary |= 1 << RegNo;
+ }
+ }
+
+ return Binary;
+}
+
+/// getAddrMode6AddressOpValue - Encode an addrmode6 register number along
+/// with the alignment operand.
+unsigned ARMMCCodeEmitter::
+getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8: Align = 0x01; break;
+ case 16: Align = 0x02; break;
+ case 32: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+/// getAddrMode6DupAddressOpValue - Encode an addrmode6 register number and
+/// alignment operand for use in VLD-dup instructions. This is the same as
+/// getAddrMode6AddressOpValue except for the alignment encoding, which is
+/// different for VLD4-dup.
+unsigned ARMMCCodeEmitter::
+getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &Reg = MI.getOperand(Op);
+ const MCOperand &Imm = MI.getOperand(Op + 1);
+
+ unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+ unsigned Align = 0;
+
+ switch (Imm.getImm()) {
+ default: break;
+ case 2:
+ case 4:
+ case 8: Align = 0x01; break;
+ case 16: Align = 0x03; break;
+ }
+
+ return RegNo | (Align << 4);
+}
+
+unsigned ARMMCCodeEmitter::
+getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(Op);
+ if (MO.getReg() == 0) return 0x0D;
+ return MO.getReg();
+}
+
+void ARMMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Pseudo instructions don't get encoded.
+ const TargetInstrDesc &Desc = TII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+ if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo)
+ return;
+ int Size;
+ // Basic size info comes from the TSFlags field.
+ switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+ default: llvm_unreachable("Unexpected instruction size!");
+ case ARMII::Size2Bytes: Size = 2; break;
+ case ARMII::Size4Bytes: Size = 4; break;
+ }
+ uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+ // Thumb 32-bit wide instructions need to emit the high order halfword
+ // first.
+ if (Subtarget->isThumb() && Size == 4) {
+ EmitConstant(Binary >> 16, 2, OS);
+ EmitConstant(Binary & 0xffff, 2, OS);
+ } else
+ EmitConstant(Binary, Size, OS);
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+#include "ARMGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp
new file mode 100644
index 0000000..2727ba8
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCExpr.cpp
@@ -0,0 +1,73 @@
+//===-- ARMMCExpr.cpp - ARM specific MC expression classes ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armmcexpr"
+#include "ARMMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+using namespace llvm;
+
+const ARMMCExpr*
+ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) ARMMCExpr(Kind, Expr);
+}
+
+void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
+ switch (Kind) {
+ default: assert(0 && "Invalid kind!");
+ case VK_ARM_HI16: OS << ":upper16:"; break;
+ case VK_ARM_LO16: OS << ":lower16:"; break;
+ }
+
+ const MCExpr *Expr = getSubExpr();
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << '(';
+ Expr->print(OS);
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ OS << ')';
+}
+
+bool
+ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ return false;
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ assert(0 && "Can't handle nested target expr!");
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbols_(BE->getLHS(), Asm);
+ AddValueSymbols_(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const {
+ AddValueSymbols_(getSubExpr(), Asm);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/ARMMCExpr.h
new file mode 100644
index 0000000..d42f766
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMMCExpr.h
@@ -0,0 +1,73 @@
+//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCEXPR_H
+#define ARMMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class ARMMCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_ARM_None,
+ VK_ARM_HI16, // The R_ARM_MOVT_ABS relocation (:upper16: in the .s file)
+ VK_ARM_LO16 // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the .s file)
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+
+ explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
+ : Kind(_Kind), Expr(_Expr) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx);
+
+ static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_ARM_HI16, Expr, Ctx);
+ }
+
+ static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) {
+ return Create(VK_ARM_LO16, Expr, Ctx);
+ }
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// @}
+
+ void PrintImpl(raw_ostream &OS) const;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const;
+ void AddValueSymbols(MCAssembler *) const;
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+
+ static bool classof(const ARMMCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index ab2b06b..59d6050 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -12,122 +12,69 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMMCInstLower.h"
-//#include "llvm/CodeGen/MachineModuleInfoImpls.h"
-#include "llvm/CodeGen/AsmPrinter.h"
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "ARMMCExpr.h"
+#include "llvm/Constants.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-//#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
using namespace llvm;
-#if 0
-const ARMSubtarget &ARMMCInstLower::getSubtarget() const {
- return AsmPrinter.getSubtarget();
-}
-
-MachineModuleInfoMachO &ARMMCInstLower::getMachOMMI() const {
- assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin");
- return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>();
-}
-#endif
-
-MCSymbol *ARMMCInstLower::
-GetGlobalAddressSymbol(const MachineOperand &MO) const {
- // FIXME: HANDLE PLT references how??
- switch (MO.getTargetFlags()) {
- default: assert(0 && "Unknown target flag on GV operand");
- case 0: break;
- }
-
- return Printer.Mang->getSymbol(MO.getGlobal());
-}
-
-MCSymbol *ARMMCInstLower::
-GetExternalSymbolSymbol(const MachineOperand &MO) const {
- // FIXME: HANDLE PLT references how??
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+ ARMAsmPrinter &Printer) {
+ MCContext &Ctx = Printer.OutContext;
+ const MCExpr *Expr;
switch (MO.getTargetFlags()) {
- default: assert(0 && "Unknown target flag on GV operand");
- case 0: break;
+ default: {
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+ switch (MO.getTargetFlags()) {
+ default:
+ assert(0 && "Unknown target flag on symbol operand");
+ case 0:
+ break;
+ case ARMII::MO_LO16:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+ Expr = ARMMCExpr::CreateLower16(Expr, Ctx);
+ break;
+ case ARMII::MO_HI16:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+ Expr = ARMMCExpr::CreateUpper16(Expr, Ctx);
+ break;
+ }
+ break;
}
-
- return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
-}
-
-
-MCSymbol *ARMMCInstLower::
-GetJumpTableSymbol(const MachineOperand &MO) const {
- SmallString<256> Name;
- raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
- << Printer.getFunctionNumber() << '_' << MO.getIndex();
-
-#if 0
- switch (MO.getTargetFlags()) {
- default: llvm_unreachable("Unknown target flag on GV operand");
+ case ARMII::MO_PLT:
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, Ctx);
+ break;
}
-#endif
-
- // Create a symbol for the name.
- return Ctx.GetOrCreateSymbol(Name.str());
-}
-MCSymbol *ARMMCInstLower::
-GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
- SmallString<256> Name;
- raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
- << Printer.getFunctionNumber() << '_' << MO.getIndex();
-
-#if 0
- switch (MO.getTargetFlags()) {
- default: llvm_unreachable("Unknown target flag on GV operand");
- }
-#endif
-
- // Create a symbol for the name.
- return Ctx.GetOrCreateSymbol(Name.str());
-}
-
-MCOperand ARMMCInstLower::
-LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
- // FIXME: We would like an efficient form for this, so we don't have to do a
- // lot of extra uniquing.
- const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
-
-#if 0
- switch (MO.getTargetFlags()) {
- default: llvm_unreachable("Unknown target flag on GV operand");
- }
-#endif
-
if (!MO.isJTI() && MO.getOffset())
Expr = MCBinaryExpr::CreateAdd(Expr,
MCConstantExpr::Create(MO.getOffset(), Ctx),
Ctx);
return MCOperand::CreateExpr(Expr);
-}
+}
-void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ ARMAsmPrinter &AP) {
OutMI.setOpcode(MI->getOpcode());
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
-
+
MCOperand MCOp;
switch (MO.getType()) {
default:
MI->dump();
assert(0 && "unknown operand type");
case MachineOperand::MO_Register:
- // Ignore all implicit register operands.
- if (MO.isImplicit()) continue;
+ // Ignore all non-CPSR implicit register operands.
+ if (MO.isImplicit() && MO.getReg() != ARM::CPSR) continue;
assert(!MO.getSubReg() && "Subregs should be eliminated!");
MCOp = MCOperand::CreateReg(MO.getReg());
break;
@@ -136,27 +83,33 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
case MachineOperand::MO_MachineBasicBlock:
MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
- MO.getMBB()->getSymbol(), Ctx));
+ MO.getMBB()->getSymbol(), AP.OutContext));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ MCOp = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP);
break;
case MachineOperand::MO_ExternalSymbol:
- MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ MCOp = GetSymbolRef(MO,
+ AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
break;
case MachineOperand::MO_JumpTableIndex:
- MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+ MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
break;
case MachineOperand::MO_ConstantPoolIndex:
- MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+ MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
break;
case MachineOperand::MO_BlockAddress:
- MCOp = LowerSymbolOperand(MO, Printer.GetBlockAddressSymbol(
- MO.getBlockAddress()));
+ MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
break;
+ case MachineOperand::MO_FPImmediate: {
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ bool ignored;
+ Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+ break;
+ }
}
-
+
OutMI.addOperand(MCOp);
}
-
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 514c26b..138f0c2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -22,8 +22,8 @@
namespace llvm {
-/// ARMFunctionInfo - This class is derived from MachineFunction private
-/// ARM target-specific information for each MachineFunction.
+/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
+/// contains private ARM-specific information for each MachineFunction.
class ARMFunctionInfo : public MachineFunctionInfo {
/// isThumb - True if this function is compiled under Thumb mode.
@@ -79,15 +79,11 @@ class ARMFunctionInfo : public MachineFunctionInfo {
BitVector GPRCS2Frames;
BitVector DPRCSFrames;
- /// SpilledCSRegs - A BitVector mask of all spilled callee-saved registers.
- ///
- BitVector SpilledCSRegs;
-
/// JumpTableUId - Unique id for jumptables.
///
unsigned JumpTableUId;
- unsigned ConstPoolEntryUId;
+ unsigned PICLabelUId;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex;
@@ -95,6 +91,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// HasITBlocks - True if IT blocks have been inserted.
bool HasITBlocks;
+ /// CPEClones - Track constant pool entries clones created by Constant Island
+ /// pass.
+ DenseMap<unsigned, unsigned> CPEClones;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -104,8 +104,8 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
- JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
- HasITBlocks(false) {}
+ JumpTableUId(0), PICLabelUId(0),
+ VarArgsFrameIndex(0), HasITBlocks(false) {}
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
@@ -115,9 +115,8 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
- SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
- JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
- HasITBlocks(false) {}
+ JumpTableUId(0), PICLabelUId(0),
+ VarArgsFrameIndex(0), HasITBlocks(false) {}
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -207,18 +206,6 @@ public:
}
}
- void setCSRegisterIsSpilled(unsigned Reg) {
- SpilledCSRegs.set(Reg);
- }
-
- bool isCSRegisterSpilled(unsigned Reg) const {
- return SpilledCSRegs[Reg];
- }
-
- const BitVector &getSpilledCSRegisters() const {
- return SpilledCSRegs;
- }
-
unsigned createJumpTableUId() {
return JumpTableUId++;
}
@@ -227,16 +214,16 @@ public:
return JumpTableUId;
}
- void initConstPoolEntryUId(unsigned UId) {
- ConstPoolEntryUId = UId;
+ void initPICLabelUId(unsigned UId) {
+ PICLabelUId = UId;
}
- unsigned getNumConstPoolEntries() const {
- return ConstPoolEntryUId;
+ unsigned getNumPICLabels() const {
+ return PICLabelUId;
}
- unsigned createConstPoolEntryUId() {
- return ConstPoolEntryUId++;
+ unsigned createPICLabelUId() {
+ return PICLabelUId++;
}
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
@@ -244,6 +231,19 @@ public:
bool hasITBlocks() const { return HasITBlocks; }
void setHasITBlocks(bool h) { HasITBlocks = h; }
+
+ void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
+ if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
+ assert(0 && "Duplicate entries!");
+ }
+
+ unsigned getOriginalCPIdx(unsigned CloneIdx) const {
+ DenseMap<unsigned, unsigned>::const_iterator I = CPEClones.find(CloneIdx);
+ if (I != CPEClones.end())
+ return I->second;
+ else
+ return -1U;
+ }
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
index 5ff7c38..edecc4b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
+++ b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h
@@ -21,6566 +21,6566 @@
// This table is 6561*4 = 26244 bytes in size.
static const unsigned PerfectShuffleTable[6561+1] = {
- 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS
- 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
- 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
- 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
- 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
- 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
- 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
- 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
- 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS
- 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
- 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS
- 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
- 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
- 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
- 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
- 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
- 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
- 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS
- 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
- 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
- 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
- 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
- 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
- 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
- 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
- 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
- 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
- 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
- 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
- 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
- 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
- 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
- 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
- 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
- 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
- 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
- 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
- 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
- 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
- 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
- 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
- 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
- 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
- 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
- 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
- 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
- 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
- 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
- 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
- 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
- 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
- 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
- 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
- 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
- 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
- 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
- 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
- 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
- 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
- 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
- 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
- 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
- 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
- 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
- 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
- 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
- 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
- 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
- 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
- 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
- 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
- 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
- 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS
- 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS
- 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
- 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
- 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
- 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
- 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
- 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
- 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS
- 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
- 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
- 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
- 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
- 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
- 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
- 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
- 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
- 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
- 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
- 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
- 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
- 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
- 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
- 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
- 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
- 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
- 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
- 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
- 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
- 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
- 835584U, // <0,1,2,3>: Cost 0 copy LHS
- 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
- 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
- 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
- 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
- 835584U, // <0,1,2,u>: Cost 0 copy LHS
- 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
- 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
- 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
- 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
- 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
- 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
- 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
- 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
- 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
- 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
- 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
- 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
- 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
- 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
- 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
- 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
- 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
- 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
- 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
- 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
- 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
- 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
- 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
- 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
- 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
- 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
- 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
- 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
- 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
- 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
- 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
- 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
- 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
- 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
- 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
- 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
- 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
- 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
- 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
- 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
- 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
- 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
- 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
- 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
- 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
- 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
- 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
- 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
- 835584U, // <0,1,u,3>: Cost 0 copy LHS
- 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
- 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
- 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
- 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
- 835584U, // <0,1,u,u>: Cost 0 copy LHS
- 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
- 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
- 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
- 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
- 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
- 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
- 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
- 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
- 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
- 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
- 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
- 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
- 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
- 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
- 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
- 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
- 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
- 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
- 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
- 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
- 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
- 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
- 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
- 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
- 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
- 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
- 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
- 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
- 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
- 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
- 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
- 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
- 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
- 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
- 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
- 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
- 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
- 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
- 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
- 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
- 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
- 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
- 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
- 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
- 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
- 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
- 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
- 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
- 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
- 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
- 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
- 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
- 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
- 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
- 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
- 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
- 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
- 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
- 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
- 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
- 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
- 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
- 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
- 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
- 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
- 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
- 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
- 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
- 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
- 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
- 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
- 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
- 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
- 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
- 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
- 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
- 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
- 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
- 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
- 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
- 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
- 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
- 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
- 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
- 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
- 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
- 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
- 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
- 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
- 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
- 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
- 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
- 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
- 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
- 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
- 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
- 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
- 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
- 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
- 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
- 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
- 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
- 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
- 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
- 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
- 2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
- 2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
- 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
- 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
- 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
- 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
- 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
- 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
- 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
- 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
- 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
- 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
- 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
- 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
- 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
- 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
- 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
- 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
- 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
- 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
- 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
- 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
- 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
- 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
- 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
- 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
- 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
- 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
- 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
- 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
- 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
- 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
- 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
- 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
- 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
- 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
- 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
- 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
- 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
- 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
- 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
- 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
- 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
- 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
- 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
- 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
- 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
- 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
- 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
- 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
- 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
- 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
- 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
- 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
- 2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
- 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
- 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
- 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
- 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
- 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
- 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
- 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
- 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
- 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
- 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
- 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
- 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
- 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
- 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
- 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
- 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
- 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS
- 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
- 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
- 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS
- 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
- 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
- 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
- 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
- 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
- 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
- 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
- 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
- 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
- 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
- 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
- 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
- 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
- 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
- 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
- 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
- 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
- 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
- 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
- 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
- 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
- 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
- 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
- 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
- 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
- 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
- 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
- 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
- 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
- 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
- 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
- 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
- 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
- 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
- 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
- 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
- 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
- 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
- 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
- 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
- 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
- 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
- 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
- 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
- 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
- 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
- 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
- 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
- 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
- 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
- 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
- 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
- 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
- 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
- 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
- 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
- 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
- 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
- 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
- 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS
- 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
- 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
- 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
- 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
- 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
- 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
- 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
- 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
- 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
- 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
- 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
- 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
- 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
- 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
- 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
- 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
- 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
- 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
- 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
- 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
- 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
- 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
- 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
- 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
- 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
- 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
- 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
- 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
- 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
- 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
- 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
- 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
- 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
- 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
- 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
- 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
- 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
- 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
- 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
- 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
- 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
- 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
- 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
- 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
- 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
- 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
- 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
- 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
- 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
- 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
- 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
- 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
- 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
- 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
- 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
- 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
- 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
- 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
- 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
- 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
- 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
- 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
- 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
- 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
- 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
- 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
- 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
- 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
- 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
- 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
- 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
- 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
- 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
- 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
- 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
- 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
- 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
- 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
- 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
- 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
- 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
- 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
- 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
- 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
- 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
- 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
- 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
- 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
- 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
- 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
- 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
- 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
- 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
- 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
- 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
- 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
- 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
- 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
- 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
- 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
- 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
- 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
- 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
- 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
- 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
- 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
- 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
- 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
- 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
- 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
- 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
- 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
- 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
- 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
- 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
- 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
- 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
- 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
- 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
- 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
- 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
- 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
- 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
- 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
- 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
- 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
- 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
- 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
- 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
- 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
- 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
- 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
- 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
- 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
- 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
- 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
- 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
- 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
- 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
- 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
- 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
- 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
- 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
- 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
- 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
- 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
- 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
- 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
- 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
- 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
- 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
- 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
- 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
- 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
- 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
- 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
- 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
- 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
- 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
- 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
- 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
- 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
- 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
- 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
- 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
- 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
- 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
- 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
- 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
- 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
- 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
- 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
- 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
- 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
- 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
- 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
- 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
- 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
- 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
- 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
- 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
- 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
- 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
- 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
- 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
- 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
- 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
- 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
- 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
- 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
- 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
- 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
- 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
- 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
- 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
- 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
- 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
- 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
- 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
- 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
- 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
- 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
- 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
- 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
- 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
- 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
- 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
- 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
- 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
- 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
- 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
- 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
- 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
- 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
- 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
- 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
- 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
- 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
- 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
- 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
- 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
- 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
- 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
- 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
- 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
- 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
- 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
- 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
- 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
- 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
- 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
- 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
- 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
- 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
- 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
- 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
- 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
- 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
- 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
- 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
- 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
- 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
- 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
- 2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
- 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
- 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
- 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS
- 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
- 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
- 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
- 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
- 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
- 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
- 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
- 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS
- 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
- 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS
- 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
- 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
- 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
- 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS
- 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
- 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
- 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS
- 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
- 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
- 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
- 835584U, // <0,u,2,3>: Cost 0 copy LHS
- 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
- 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
- 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
- 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
- 835584U, // <0,u,2,u>: Cost 0 copy LHS
- 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
- 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
- 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
- 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
- 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
- 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
- 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
- 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
- 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
- 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
- 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
- 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
- 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
- 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
- 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
- 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
- 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
- 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
- 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
- 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
- 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
- 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
- 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
- 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
- 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
- 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
- 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
- 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
- 2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
- 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
- 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
- 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
- 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
- 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
- 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
- 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
- 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
- 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
- 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
- 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
- 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
- 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
- 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
- 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
- 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
- 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS
- 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
- 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
- 835584U, // <0,u,u,3>: Cost 0 copy LHS
- 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
- 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
- 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
- 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
- 835584U, // <0,u,u,u>: Cost 0 copy LHS
- 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
- 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
- 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
- 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
- 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
- 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
- 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
- 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
- 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
- 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
- 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
- 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
- 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
- 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
- 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
- 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
- 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
- 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
- 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
- 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
- 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
- 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
- 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
- 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
- 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
- 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
- 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
- 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
- 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
- 67944550U, // <1,0,3,2>: Cost 1 vrev LHS
- 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
- 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
- 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
- 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
- 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
- 68386972U, // <1,0,3,u>: Cost 1 vrev LHS
- 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
- 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
- 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
- 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
- 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
- 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
- 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
- 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
- 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
- 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
- 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
- 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
- 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
- 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
- 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
- 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
- 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
- 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
- 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
- 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
- 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
- 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
- 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
- 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
- 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
- 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
- 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
- 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
- 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
- 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
- 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
- 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
- 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
- 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
- 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
- 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
- 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
- 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
- 67985515U, // <1,0,u,2>: Cost 1 vrev LHS
- 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
- 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
- 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
- 2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
- 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
- 68427937U, // <1,0,u,u>: Cost 1 vrev LHS
- 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
- 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
- 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
- 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
- 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
- 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
- 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
- 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
- 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
- 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
- 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS
- 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
- 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
- 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
- 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
- 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
- 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
- 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS
- 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
- 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
- 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
- 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
- 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
- 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
- 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
- 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
- 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
- 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
- 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
- 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
- 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
- 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
- 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
- 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
- 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
- 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
- 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
- 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
- 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
- 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
- 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
- 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
- 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
- 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
- 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
- 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
- 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
- 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
- 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
- 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
- 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
- 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
- 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
- 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
- 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
- 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
- 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
- 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
- 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
- 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
- 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
- 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
- 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
- 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
- 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
- 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
- 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
- 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
- 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
- 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
- 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
- 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
- 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
- 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS
- 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
- 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
- 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
- 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
- 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
- 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
- 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS
- 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
- 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
- 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
- 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
- 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
- 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
- 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
- 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
- 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
- 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
- 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
- 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
- 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
- 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
- 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
- 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
- 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
- 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
- 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
- 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
- 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
- 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
- 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
- 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
- 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
- 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
- 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
- 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS
- 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
- 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
- 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
- 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS
- 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
- 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
- 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
- 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS
- 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
- 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
- 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
- 2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
- 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
- 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
- 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
- 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
- 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
- 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
- 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
- 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
- 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
- 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
- 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
- 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
- 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
- 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
- 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
- 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
- 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
- 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
- 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
- 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
- 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
- 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
- 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
- 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
- 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
- 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
- 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
- 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
- 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
- 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
- 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
- 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
- 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS
- 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
- 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
- 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
- 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS
- 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
- 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
- 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
- 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS
- 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
- 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
- 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
- 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
- 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
- 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
- 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
- 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
- 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
- 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
- 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
- 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
- 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
- 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
- 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
- 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
- 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
- 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
- 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
- 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
- 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
- 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
- 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
- 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
- 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
- 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
- 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
- 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
- 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
- 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
- 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
- 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
- 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
- 2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
- 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
- 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
- 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
- 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
- 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
- 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
- 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
- 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
- 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
- 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
- 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
- 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
- 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
- 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
- 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
- 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
- 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
- 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
- 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
- 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
- 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
- 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
- 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
- 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
- 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
- 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
- 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
- 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
- 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
- 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
- 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
- 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
- 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
- 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
- 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
- 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
- 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
- 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
- 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
- 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
- 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
- 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
- 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
- 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
- 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
- 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
- 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
- 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
- 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
- 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
- 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
- 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
- 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
- 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
- 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
- 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
- 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
- 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
- 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
- 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
- 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
- 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
- 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
- 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
- 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
- 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
- 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
- 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
- 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
- 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
- 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
- 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
- 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
- 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
- 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
- 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
- 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
- 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
- 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
- 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
- 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
- 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
- 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
- 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
- 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
- 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
- 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
- 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
- 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
- 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
- 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
- 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
- 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
- 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
- 2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
- 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
- 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
- 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
- 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
- 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
- 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
- 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
- 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
- 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
- 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
- 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
- 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
- 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
- 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
- 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
- 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
- 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
- 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
- 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
- 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
- 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
- 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
- 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
- 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
- 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
- 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
- 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
- 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
- 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
- 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
- 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
- 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
- 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
- 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
- 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
- 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
- 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
- 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
- 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
- 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
- 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
- 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
- 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
- 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
- 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
- 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
- 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
- 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
- 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
- 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
- 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
- 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
- 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
- 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
- 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
- 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
- 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
- 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
- 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
- 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
- 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
- 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
- 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
- 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
- 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
- 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
- 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
- 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
- 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
- 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
- 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
- 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
- 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
- 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
- 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
- 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
- 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
- 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
- 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
- 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
- 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
- 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
- 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
- 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
- 2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
- 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
- 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
- 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
- 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
- 2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
- 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
- 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
- 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
- 2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
- 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
- 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
- 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
- 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
- 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
- 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
- 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
- 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
- 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
- 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
- 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
- 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
- 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
- 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
- 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
- 1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
- 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
- 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
- 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
- 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
- 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
- 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
- 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
- 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
- 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
- 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
- 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
- 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
- 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
- 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
- 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
- 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
- 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
- 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
- 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
- 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
- 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
- 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
- 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
- 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
- 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
- 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
- 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
- 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
- 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
- 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
- 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
- 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
- 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
- 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
- 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
- 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
- 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
- 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
- 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
- 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
- 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
- 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
- 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
- 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
- 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
- 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
- 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
- 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
- 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
- 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
- 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
- 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
- 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
- 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
- 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
- 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
- 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
- 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
- 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
- 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
- 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
- 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
- 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
- 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
- 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
- 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
- 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
- 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
- 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
- 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
- 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
- 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
- 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
- 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
- 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
- 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
- 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
- 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
- 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
- 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
- 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
- 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
- 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
- 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
- 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
- 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
- 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
- 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
- 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
- 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
- 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
- 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
- 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
- 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
- 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
- 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
- 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
- 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
- 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
- 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
- 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
- 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
- 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
- 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
- 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
- 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
- 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
- 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
- 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
- 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
- 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
- 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
- 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
- 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
- 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
- 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
- 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
- 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
- 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
- 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
- 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
- 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
- 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
- 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
- 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
- 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
- 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
- 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
- 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
- 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
- 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
- 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
- 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
- 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
- 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
- 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
- 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
- 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
- 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
- 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
- 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
- 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
- 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
- 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
- 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
- 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
- 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
- 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
- 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
- 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
- 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
- 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
- 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
- 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
- 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
- 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
- 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
- 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
- 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
- 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
- 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
- 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
- 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
- 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
- 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
- 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
- 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
- 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
- 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
- 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
- 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
- 1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
- 2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
- 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
- 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
- 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
- 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS
- 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
- 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
- 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
- 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
- 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
- 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
- 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS
- 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
- 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
- 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
- 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
- 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
- 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
- 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
- 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
- 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
- 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS
- 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
- 115726126U, // <1,u,3,2>: Cost 1 vrev LHS
- 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
- 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS
- 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
- 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
- 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
- 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS
- 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
- 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
- 2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
- 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
- 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
- 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
- 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
- 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
- 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
- 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
- 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
- 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
- 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
- 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
- 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
- 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
- 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
- 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
- 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
- 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
- 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
- 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
- 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
- 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
- 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
- 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
- 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
- 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
- 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
- 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
- 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
- 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
- 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
- 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
- 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
- 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
- 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS
- 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS
- 115767091U, // <1,u,u,2>: Cost 1 vrev LHS
- 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
- 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS
- 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
- 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
- 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
- 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS
- 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
- 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
- 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
- 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
- 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
- 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
- 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
- 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
- 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
- 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
- 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
- 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
- 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
- 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
- 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
- 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
- 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
- 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
- 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
- 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
- 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
- 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
- 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
- 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
- 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
- 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
- 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
- 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
- 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
- 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
- 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
- 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
- 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
- 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
- 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
- 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
- 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
- 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
- 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
- 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
- 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
- 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
- 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
- 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
- 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
- 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
- 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
- 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
- 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
- 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
- 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
- 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
- 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
- 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
- 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
- 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
- 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
- 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
- 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
- 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
- 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
- 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
- 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
- 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
- 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
- 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
- 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
- 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
- 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
- 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
- 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
- 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
- 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
- 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
- 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
- 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
- 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
- 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
- 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
- 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
- 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
- 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
- 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
- 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
- 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
- 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
- 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
- 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
- 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
- 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
- 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
- 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
- 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
- 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
- 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
- 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
- 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
- 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
- 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
- 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
- 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
- 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
- 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
- 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
- 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
- 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
- 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
- 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
- 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
- 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
- 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
- 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
- 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
- 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
- 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
- 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
- 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
- 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
- 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
- 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
- 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
- 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
- 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
- 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
- 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
- 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
- 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
- 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
- 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
- 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
- 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
- 3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
- 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
- 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
- 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
- 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
- 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
- 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
- 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
- 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
- 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
- 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
- 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
- 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
- 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
- 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
- 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
- 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
- 3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
- 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
- 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
- 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
- 2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
- 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
- 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
- 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
- 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
- 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
- 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
- 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
- 2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
- 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
- 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
- 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
- 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
- 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
- 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
- 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
- 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
- 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
- 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
- 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
- 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
- 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
- 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
- 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
- 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
- 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
- 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
- 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
- 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
- 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
- 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS
- 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
- 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
- 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
- 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
- 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
- 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS
- 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
- 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
- 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
- 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS
- 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
- 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
- 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
- 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
- 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS
- 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
- 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
- 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
- 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
- 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
- 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
- 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
- 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
- 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
- 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
- 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
- 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
- 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
- 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
- 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
- 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
- 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
- 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
- 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
- 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
- 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
- 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
- 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
- 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
- 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
- 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
- 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
- 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
- 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
- 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
- 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
- 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
- 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
- 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
- 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
- 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
- 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
- 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
- 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS
- 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS
- 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
- 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
- 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
- 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
- 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS
- 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
- 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS
- 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
- 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
- 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
- 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
- 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
- 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
- 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS
- 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
- 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
- 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
- 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
- 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
- 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
- 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
- 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
- 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
- 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
- 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
- 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
- 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
- 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
- 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
- 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
- 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
- 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
- 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
- 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
- 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
- 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
- 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
- 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
- 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
- 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
- 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
- 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
- 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
- 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
- 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
- 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
- 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS
- 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
- 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
- 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS
- 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
- 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
- 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
- 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
- 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
- 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
- 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
- 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
- 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
- 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
- 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
- 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
- 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
- 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
- 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
- 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
- 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
- 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
- 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
- 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
- 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
- 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
- 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
- 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
- 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
- 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
- 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
- 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
- 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS
- 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
- 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
- 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
- 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS
- 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
- 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
- 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS
- 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
- 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
- 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
- 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
- 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
- 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
- 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
- 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
- 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
- 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
- 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
- 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
- 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
- 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
- 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
- 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
- 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
- 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
- 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
- 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
- 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
- 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
- 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
- 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
- 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
- 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
- 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
- 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
- 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
- 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
- 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
- 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
- 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
- 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
- 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
- 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
- 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
- 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
- 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
- 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
- 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
- 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
- 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
- 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
- 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
- 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
- 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
- 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
- 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
- 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
- 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
- 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
- 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
- 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
- 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
- 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
- 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
- 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
- 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
- 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
- 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
- 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
- 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
- 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
- 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
- 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
- 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
- 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
- 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
- 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
- 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
- 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
- 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
- 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
- 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
- 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
- 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
- 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
- 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
- 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
- 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
- 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
- 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
- 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
- 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
- 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
- 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
- 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
- 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
- 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
- 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
- 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
- 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
- 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
- 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
- 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
- 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
- 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
- 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
- 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
- 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
- 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
- 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
- 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
- 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
- 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
- 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
- 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
- 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
- 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
- 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
- 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
- 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
- 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
- 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
- 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
- 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
- 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
- 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
- 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
- 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
- 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
- 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
- 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
- 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
- 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
- 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
- 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
- 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
- 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
- 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
- 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
- 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
- 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
- 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
- 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
- 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
- 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
- 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
- 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
- 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
- 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
- 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
- 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
- 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
- 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
- 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
- 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
- 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
- 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
- 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
- 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
- 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
- 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
- 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
- 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
- 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
- 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
- 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
- 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
- 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
- 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
- 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
- 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
- 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
- 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
- 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
- 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
- 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
- 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
- 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
- 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
- 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
- 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
- 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
- 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
- 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
- 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
- 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
- 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
- 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
- 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
- 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
- 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
- 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
- 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
- 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
- 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
- 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
- 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
- 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
- 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
- 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
- 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
- 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
- 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
- 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS
- 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS
- 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
- 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
- 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
- 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
- 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
- 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
- 2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
- 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
- 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
- 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
- 2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
- 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
- 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
- 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
- 2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
- 2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
- 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
- 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
- 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
- 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
- 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
- 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
- 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
- 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
- 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
- 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
- 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
- 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
- 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
- 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
- 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
- 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
- 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
- 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
- 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
- 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
- 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
- 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
- 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
- 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
- 1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
- 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
- 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
- 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS
- 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS
- 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
- 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
- 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
- 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
- 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
- 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
- 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
- 2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
- 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
- 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
- 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
- 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
- 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
- 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
- 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
- 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
- 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
- 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
- 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
- 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
- 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
- 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
- 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
- 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
- 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
- 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
- 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
- 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
- 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
- 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
- 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
- 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
- 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
- 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
- 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
- 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
- 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
- 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
- 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
- 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
- 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
- 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
- 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
- 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
- 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
- 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
- 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
- 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
- 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
- 2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
- 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
- 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
- 2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
- 2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
- 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
- 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
- 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
- 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
- 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
- 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
- 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
- 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
- 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
- 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
- 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
- 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
- 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
- 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
- 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
- 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
- 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
- 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
- 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
- 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
- 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
- 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
- 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
- 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
- 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
- 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
- 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
- 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
- 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS
- 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
- 1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
- 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
- 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
- 1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
- 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
- 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS
- 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
- 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
- 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
- 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
- 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
- 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
- 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
- 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
- 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
- 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
- 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
- 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS
- 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
- 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
- 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
- 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
- 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
- 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS
- 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
- 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
- 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
- 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS
- 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
- 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
- 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
- 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS
- 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS
- 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
- 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
- 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
- 1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
- 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
- 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS
- 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
- 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
- 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS
- 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
- 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
- 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
- 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
- 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
- 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
- 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
- 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
- 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
- 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
- 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
- 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
- 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
- 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
- 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
- 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
- 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
- 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
- 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
- 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
- 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
- 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
- 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
- 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
- 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
- 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
- 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
- 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
- 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS
- 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS
- 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS
- 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
- 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS
- 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
- 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS
- 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS
- 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
- 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
- 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
- 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
- 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
- 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
- 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
- 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
- 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
- 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
- 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
- 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS
- 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
- 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
- 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
- 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
- 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
- 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS
- 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
- 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
- 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
- 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
- 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
- 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
- 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
- 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
- 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
- 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
- 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
- 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
- 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
- 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
- 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
- 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
- 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
- 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
- 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
- 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
- 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
- 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
- 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
- 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
- 2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
- 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
- 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
- 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
- 2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
- 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
- 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
- 2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
- 2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
- 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
- 2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
- 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
- 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
- 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
- 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
- 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
- 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
- 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
- 2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
- 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
- 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
- 2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
- 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
- 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
- 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
- 2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
- 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
- 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
- 2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
- 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
- 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
- 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
- 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS
- 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
- 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
- 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
- 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
- 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
- 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS
- 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
- 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
- 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
- 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
- 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
- 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
- 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
- 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
- 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
- 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
- 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
- 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
- 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
- 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
- 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
- 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
- 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
- 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
- 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
- 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
- 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
- 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
- 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
- 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
- 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
- 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
- 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
- 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
- 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
- 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
- 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
- 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
- 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
- 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
- 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
- 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
- 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
- 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
- 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
- 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
- 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
- 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
- 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
- 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
- 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
- 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
- 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
- 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
- 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
- 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
- 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
- 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
- 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
- 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
- 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
- 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
- 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
- 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
- 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
- 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
- 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
- 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
- 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
- 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
- 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
- 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
- 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
- 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
- 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
- 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
- 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
- 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
- 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
- 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
- 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
- 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
- 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
- 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
- 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
- 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
- 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
- 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
- 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
- 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
- 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
- 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
- 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
- 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
- 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
- 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
- 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
- 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
- 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
- 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
- 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
- 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
- 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
- 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
- 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
- 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
- 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
- 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
- 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
- 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
- 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
- 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
- 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
- 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
- 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
- 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
- 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
- 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
- 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
- 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
- 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
- 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
- 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
- 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
- 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
- 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
- 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
- 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
- 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
- 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
- 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
- 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
- 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
- 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
- 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
- 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
- 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
- 2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
- 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
- 2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
- 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
- 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
- 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
- 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
- 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
- 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
- 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
- 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
- 2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
- 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
- 2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
- 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
- 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
- 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
- 2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
- 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
- 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
- 2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
- 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
- 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
- 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
- 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
- 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
- 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
- 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
- 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
- 2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
- 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
- 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
- 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
- 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
- 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
- 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
- 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
- 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
- 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
- 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
- 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
- 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
- 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
- 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
- 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
- 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
- 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
- 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
- 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
- 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
- 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
- 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
- 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
- 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
- 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
- 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
- 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
- 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
- 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
- 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
- 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
- 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS
- 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
- 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
- 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
- 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
- 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS
- 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
- 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
- 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
- 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
- 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
- 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
- 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
- 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
- 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
- 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
- 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
- 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
- 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
- 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
- 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
- 2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
- 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
- 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
- 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
- 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
- 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
- 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
- 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
- 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
- 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
- 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
- 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
- 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
- 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
- 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
- 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
- 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
- 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
- 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
- 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
- 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
- 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
- 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
- 1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
- 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS
- 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
- 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
- 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
- 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
- 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS
- 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
- 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
- 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
- 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
- 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
- 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
- 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
- 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
- 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
- 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
- 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
- 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
- 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
- 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
- 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
- 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
- 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
- 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
- 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
- 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
- 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
- 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
- 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
- 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
- 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
- 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
- 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
- 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
- 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
- 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
- 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
- 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
- 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
- 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
- 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
- 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
- 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
- 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
- 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
- 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
- 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
- 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
- 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
- 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
- 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
- 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
- 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
- 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
- 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
- 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
- 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
- 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS
- 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
- 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS
- 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
- 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
- 2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
- 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
- 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
- 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
- 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
- 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
- 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
- 2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
- 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
- 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
- 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
- 2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
- 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
- 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
- 2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
- 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
- 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
- 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
- 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
- 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
- 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
- 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
- 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS
- 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
- 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS
- 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
- 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
- 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
- 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
- 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
- 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
- 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
- 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
- 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
- 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
- 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
- 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
- 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
- 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
- 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
- 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
- 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
- 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
- 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
- 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
- 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
- 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
- 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
- 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
- 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
- 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
- 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
- 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
- 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
- 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
- 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
- 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
- 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
- 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
- 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
- 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
- 2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
- 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
- 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
- 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
- 2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
- 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
- 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
- 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
- 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
- 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
- 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
- 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
- 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
- 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
- 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
- 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
- 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
- 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
- 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
- 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
- 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
- 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
- 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
- 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
- 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
- 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
- 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
- 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
- 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
- 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
- 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
- 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
- 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
- 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
- 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
- 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
- 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
- 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
- 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
- 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
- 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
- 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
- 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
- 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
- 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
- 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
- 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
- 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
- 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
- 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
- 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
- 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
- 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
- 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
- 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
- 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
- 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
- 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
- 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
- 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
- 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
- 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
- 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
- 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
- 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
- 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
- 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
- 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
- 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
- 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
- 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
- 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
- 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
- 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
- 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
- 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
- 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
- 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
- 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
- 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
- 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
- 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
- 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
- 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
- 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
- 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
- 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
- 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
- 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
- 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
- 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
- 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
- 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
- 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
- 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
- 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
- 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
- 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
- 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
- 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
- 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
- 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
- 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
- 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
- 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
- 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
- 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
- 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
- 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
- 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
- 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
- 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
- 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
- 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
- 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
- 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
- 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
- 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
- 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
- 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
- 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
- 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
- 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
- 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
- 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
- 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
- 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
- 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
- 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
- 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
- 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
- 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
- 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
- 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
- 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
- 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
- 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
- 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
- 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
- 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
- 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
- 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
- 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
- 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
- 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
- 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
- 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
- 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
- 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
- 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
- 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
- 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
- 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
- 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
- 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
- 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
- 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
- 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
- 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
- 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
- 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
- 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
- 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
- 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
- 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
- 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
- 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
- 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
- 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
- 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
- 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
- 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
- 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
- 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
- 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
- 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
- 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
- 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
- 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
- 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
- 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
- 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
- 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
- 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
- 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
- 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
- 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
- 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
- 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
- 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
- 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
- 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
- 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
- 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
- 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
- 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
- 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
- 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
- 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
- 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
- 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
- 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
- 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
- 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
- 1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
- 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
- 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
- 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
- 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
- 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
- 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
- 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
- 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
- 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
- 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
- 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
- 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
- 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
- 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS
- 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
- 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
- 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
- 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
- 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
- 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS
- 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
- 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
- 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
- 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
- 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
- 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
- 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
- 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
- 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
- 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
- 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
- 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
- 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS
- 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
- 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
- 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
- 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
- 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS
- 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
- 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
- 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
- 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
- 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
- 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
- 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
- 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
- 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
- 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
- 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
- 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
- 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
- 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
- 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
- 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS
- 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
- 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS
- 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
- 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
- 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
- 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
- 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
- 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
- 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
- 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
- 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
- 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
- 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
- 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
- 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
- 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
- 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
- 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
- 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
- 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
- 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
- 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
- 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS
- 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS
- 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
- 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
- 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS
- 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
- 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS
- 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
- 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
- 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
- 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
- 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
- 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
- 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
- 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
- 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
- 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
- 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
- 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
- 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
- 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
- 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
- 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
- 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
- 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
- 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
- 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
- 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
- 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
- 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
- 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
- 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
- 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
- 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
- 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
- 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
- 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
- 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
- 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
- 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
- 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
- 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
- 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
- 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
- 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
- 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
- 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
- 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
- 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
- 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
- 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
- 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
- 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
- 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS
- 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
- 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
- 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
- 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
- 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
- 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
- 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS
- 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
- 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
- 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
- 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
- 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
- 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
- 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
- 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
- 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
- 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
- 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
- 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
- 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
- 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
- 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
- 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
- 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
- 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
- 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
- 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS
- 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
- 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
- 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
- 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
- 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
- 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
- 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
- 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
- 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
- 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
- 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
- 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
- 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
- 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
- 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
- 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
- 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
- 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
- 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
- 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
- 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
- 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
- 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
- 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
- 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
- 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
- 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
- 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
- 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
- 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
- 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
- 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
- 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
- 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
- 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
- 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
- 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
- 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
- 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
- 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
- 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
- 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
- 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
- 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
- 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
- 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
- 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
- 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
- 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
- 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
- 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
- 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
- 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
- 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
- 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
- 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
- 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
- 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
- 2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
- 2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
- 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
- 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
- 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
- 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
- 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
- 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
- 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
- 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
- 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
- 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
- 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
- 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
- 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
- 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
- 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
- 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
- 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
- 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
- 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
- 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
- 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
- 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
- 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
- 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
- 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
- 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
- 2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
- 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
- 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
- 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
- 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
- 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
- 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
- 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
- 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
- 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
- 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
- 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
- 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
- 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
- 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
- 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
- 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
- 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
- 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
- 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
- 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
- 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
- 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
- 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
- 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
- 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
- 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
- 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
- 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
- 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
- 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
- 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
- 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
- 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
- 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
- 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
- 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
- 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
- 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
- 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
- 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
- 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
- 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
- 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
- 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
- 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
- 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
- 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
- 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
- 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
- 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
- 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
- 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
- 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
- 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
- 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
- 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
- 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
- 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
- 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
- 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
- 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
- 2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
- 2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
- 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
- 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
- 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
- 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
- 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
- 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
- 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
- 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
- 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
- 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
- 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
- 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
- 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
- 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
- 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
- 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
- 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
- 2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
- 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
- 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
- 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
- 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
- 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
- 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
- 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
- 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
- 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
- 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
- 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
- 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
- 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
- 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
- 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
- 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
- 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
- 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
- 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
- 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
- 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
- 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
- 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
- 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
- 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
- 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
- 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
- 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
- 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
- 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
- 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
- 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
- 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
- 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
- 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
- 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
- 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
- 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
- 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
- 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
- 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
- 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
- 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
- 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
- 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
- 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
- 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
- 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
- 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
- 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
- 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
- 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
- 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
- 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
- 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
- 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
- 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
- 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
- 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
- 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
- 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
- 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
- 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
- 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
- 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
- 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
- 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
- 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
- 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
- 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
- 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
- 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
- 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
- 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
- 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
- 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
- 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
- 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
- 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
- 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
- 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
- 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
- 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
- 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
- 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
- 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
- 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
- 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
- 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
- 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
- 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
- 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
- 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
- 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
- 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
- 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
- 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
- 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
- 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
- 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
- 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
- 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
- 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
- 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
- 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
- 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
- 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
- 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
- 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
- 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
- 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
- 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
- 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
- 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
- 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
- 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
- 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
- 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
- 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
- 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
- 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
- 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
- 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS
- 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
- 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
- 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
- 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS
- 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
- 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
- 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
- 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
- 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
- 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS
- 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
- 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
- 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
- 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
- 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
- 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
- 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
- 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
- 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
- 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
- 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
- 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
- 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
- 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
- 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
- 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
- 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
- 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
- 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
- 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
- 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
- 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
- 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
- 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
- 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
- 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS
- 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS
- 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
- 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
- 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS
- 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
- 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
- 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
- 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
- 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
- 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
- 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
- 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
- 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
- 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
- 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
- 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
- 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
- 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
- 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
- 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
- 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
- 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
- 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
- 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
- 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
- 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
- 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
- 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
- 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
- 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
- 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
- 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
- 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
- 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
- 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
- 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
- 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
- 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
- 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
- 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
- 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
- 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
- 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
- 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
- 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
- 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
- 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
- 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
- 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
- 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
- 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
- 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
- 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
- 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
- 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
- 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
- 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
- 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
- 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
- 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
- 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
- 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
- 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
- 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
- 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
- 27705344U, // <4,5,6,7>: Cost 0 copy RHS
- 27705344U, // <4,5,6,u>: Cost 0 copy RHS
- 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
- 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
- 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
- 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
- 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
- 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
- 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
- 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
- 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
- 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
- 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
- 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
- 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
- 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
- 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
- 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
- 27705344U, // <4,5,u,7>: Cost 0 copy RHS
- 27705344U, // <4,5,u,u>: Cost 0 copy RHS
- 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
- 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
- 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
- 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
- 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
- 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
- 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
- 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
- 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
- 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
- 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
- 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
- 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
- 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
- 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
- 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
- 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
- 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
- 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
- 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
- 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
- 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
- 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
- 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
- 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
- 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
- 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
- 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
- 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
- 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
- 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
- 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
- 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
- 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
- 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
- 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
- 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
- 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
- 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
- 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
- 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
- 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
- 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
- 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
- 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
- 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
- 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
- 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
- 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
- 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
- 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
- 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
- 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
- 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
- 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
- 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
- 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
- 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
- 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
- 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
- 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
- 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
- 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
- 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
- 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
- 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
- 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
- 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
- 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
- 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
- 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
- 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
- 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
- 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
- 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
- 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
- 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
- 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
- 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
- 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
- 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
- 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
- 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
- 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
- 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
- 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
- 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
- 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
- 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
- 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
- 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
- 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
- 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
- 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
- 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
- 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
- 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
- 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
- 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
- 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
- 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
- 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
- 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
- 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
- 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
- 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
- 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
- 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
- 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
- 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
- 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
- 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
- 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
- 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
- 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
- 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
- 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
- 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
- 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
- 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
- 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
- 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
- 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
- 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
- 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
- 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
- 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
- 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
- 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
- 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
- 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
- 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
- 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
- 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
- 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
- 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
- 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
- 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
- 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
- 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
- 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
- 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
- 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
- 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
- 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
- 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
- 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
- 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
- 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
- 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
- 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
- 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
- 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
- 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
- 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
- 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
- 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
- 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
- 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
- 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
- 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
- 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
- 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
- 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
- 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
- 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
- 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
- 2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
- 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
- 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
- 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
- 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
- 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
- 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
- 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
- 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
- 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
- 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
- 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
- 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
- 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
- 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
- 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
- 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
- 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
- 2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
- 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
- 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
- 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
- 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
- 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
- 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
- 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
- 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
- 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
- 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
- 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
- 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
- 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
- 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
- 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
- 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
- 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS
- 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
- 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
- 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
- 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS
- 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
- 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS
- 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
- 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
- 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
- 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS
- 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
- 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
- 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
- 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
- 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
- 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
- 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
- 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
- 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
- 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
- 27705344U, // <4,u,6,7>: Cost 0 copy RHS
- 27705344U, // <4,u,6,u>: Cost 0 copy RHS
- 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
- 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
- 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
- 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
- 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
- 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
- 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
- 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
- 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
- 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
- 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
- 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
- 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
- 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS
- 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
- 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
- 27705344U, // <4,u,u,7>: Cost 0 copy RHS
- 27705344U, // <4,u,u,u>: Cost 0 copy RHS
- 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
- 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
- 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
- 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
- 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
- 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
- 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
- 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
- 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
- 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
- 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
- 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
- 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
- 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
- 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
- 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
- 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
- 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
- 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
- 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
- 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
- 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
- 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
- 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
- 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
- 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
- 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
- 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
- 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
- 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
- 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
- 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
- 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
- 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
- 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
- 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
- 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
- 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
- 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
- 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
- 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
- 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
- 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
- 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
- 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
- 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
- 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
- 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
- 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
- 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
- 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
- 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
- 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
- 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
- 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
- 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
- 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
- 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
- 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
- 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
- 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
- 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
- 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
- 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
- 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
- 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
- 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
- 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
- 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
- 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
- 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
- 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
- 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
- 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
- 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
- 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
- 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
- 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
- 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
- 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
- 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
- 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
- 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
- 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
- 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
- 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
- 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
- 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
- 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
- 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
- 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
- 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
- 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
- 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
- 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
- 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
- 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
- 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
- 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
- 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
- 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
- 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
- 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
- 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
- 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
- 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
- 3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
- 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
- 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
- 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
- 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
- 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
- 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
- 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
- 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
- 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
- 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
- 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
- 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
- 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
- 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
- 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
- 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
- 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
- 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
- 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
- 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
- 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
- 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
- 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
- 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
- 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
- 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
- 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
- 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
- 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
- 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
- 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
- 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
- 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
- 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
- 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
- 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
- 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
- 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
- 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
- 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
- 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
- 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
- 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
- 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
- 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
- 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
- 1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
- 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
- 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
- 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
- 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
- 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
- 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
- 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
- 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
- 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
- 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
- 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
- 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
- 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
- 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
- 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
- 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
- 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
- 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
- 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
- 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
- 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
- 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
- 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
- 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
- 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
- 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
- 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
- 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
- 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
- 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
- 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
- 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
- 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
- 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
- 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
- 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
- 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
- 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
- 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
- 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
- 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
- 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
- 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
- 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
- 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
- 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
- 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
- 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
- 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
- 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
- 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
- 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
- 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
- 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
- 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
- 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
- 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
- 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
- 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
- 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
- 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
- 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
- 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
- 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
- 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
- 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
- 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
- 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
- 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
- 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
- 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
- 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
- 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
- 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
- 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
- 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
- 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
- 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
- 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
- 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
- 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
- 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
- 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
- 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
- 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
- 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
- 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
- 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
- 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
- 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
- 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
- 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
- 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
- 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
- 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
- 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
- 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
- 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
- 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
- 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
- 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
- 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
- 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
- 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
- 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
- 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
- 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
- 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
- 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
- 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
- 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
- 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
- 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
- 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
- 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
- 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
- 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
- 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
- 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
- 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
- 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
- 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
- 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
- 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
- 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
- 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
- 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
- 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
- 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
- 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
- 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
- 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
- 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
- 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
- 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
- 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
- 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
- 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
- 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
- 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
- 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
- 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
- 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
- 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
- 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
- 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
- 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
- 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
- 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
- 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
- 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
- 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
- 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
- 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
- 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
- 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
- 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
- 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
- 2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
- 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
- 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
- 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
- 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
- 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
- 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
- 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
- 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
- 2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
- 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
- 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
- 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
- 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
- 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
- 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
- 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
- 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
- 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
- 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
- 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
- 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
- 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
- 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
- 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
- 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
- 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
- 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
- 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
- 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
- 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
- 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
- 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
- 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
- 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
- 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
- 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
- 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
- 2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
- 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
- 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
- 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
- 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
- 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
- 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
- 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
- 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
- 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
- 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
- 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
- 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
- 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
- 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
- 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
- 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
- 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
- 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
- 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
- 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
- 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
- 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
- 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
- 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
- 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
- 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
- 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
- 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
- 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
- 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
- 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
- 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
- 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
- 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
- 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
- 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
- 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
- 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
- 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
- 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
- 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
- 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
- 94817590U, // <5,4,7,6>: Cost 1 vrev RHS
- 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
- 94965064U, // <5,4,7,u>: Cost 1 vrev RHS
- 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
- 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
- 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
- 2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
- 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
- 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
- 94825783U, // <5,4,u,6>: Cost 1 vrev RHS
- 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
- 94973257U, // <5,4,u,u>: Cost 1 vrev RHS
- 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
- 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
- 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
- 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
- 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
- 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
- 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
- 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
- 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
- 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
- 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
- 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
- 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
- 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
- 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
- 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
- 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
- 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
- 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
- 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
- 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
- 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
- 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
- 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
- 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
- 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
- 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
- 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
- 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
- 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
- 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
- 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
- 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
- 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
- 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
- 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
- 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
- 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
- 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
- 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
- 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
- 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
- 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
- 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
- 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
- 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
- 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
- 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
- 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
- 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
- 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS
- 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
- 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
- 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS
- 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
- 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
- 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
- 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
- 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
- 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
- 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
- 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
- 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
- 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
- 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
- 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
- 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
- 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
- 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
- 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
- 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
- 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
- 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
- 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
- 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
- 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
- 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
- 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS
- 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
- 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
- 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS
- 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
- 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
- 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
- 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
- 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
- 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
- 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
- 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
- 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
- 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
- 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
- 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
- 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
- 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
- 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
- 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
- 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
- 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
- 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
- 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
- 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
- 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
- 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
- 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
- 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
- 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
- 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
- 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
- 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
- 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
- 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
- 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
- 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
- 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
- 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
- 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
- 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
- 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
- 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
- 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
- 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
- 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
- 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
- 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
- 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
- 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
- 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
- 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
- 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
- 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
- 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
- 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
- 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
- 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
- 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
- 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
- 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
- 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
- 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
- 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
- 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
- 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
- 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
- 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS
- 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
- 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
- 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
- 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS
- 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
- 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
- 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
- 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS
- 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS
- 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
- 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
- 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
- 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS
- 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
- 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
- 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
- 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS
- 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
- 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
- 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
- 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
- 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
- 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
- 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
- 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
- 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
- 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
- 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
- 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
- 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
- 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
- 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
- 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
- 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
- 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
- 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
- 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
- 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
- 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
- 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
- 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
- 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
- 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
- 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
- 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
- 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
- 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
- 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
- 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
- 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
- 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
- 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
- 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
- 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
- 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
- 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
- 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
- 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
- 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
- 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
- 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
- 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
- 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
- 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
- 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
- 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
- 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
- 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
- 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
- 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
- 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
- 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
- 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
- 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
- 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
- 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
- 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
- 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
- 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
- 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
- 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
- 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
- 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
- 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
- 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
- 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
- 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
- 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
- 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
- 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
- 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
- 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
- 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
- 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
- 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
- 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
- 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
- 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
- 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
- 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
- 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
- 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
- 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
- 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
- 2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
- 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
- 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
- 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
- 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
- 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
- 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
- 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
- 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
- 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
- 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
- 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
- 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
- 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
- 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
- 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
- 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
- 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
- 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
- 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
- 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
- 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
- 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
- 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
- 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
- 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
- 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
- 2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
- 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
- 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
- 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
- 1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
- 2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
- 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
- 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
- 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
- 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
- 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
- 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
- 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
- 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
- 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
- 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
- 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
- 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS
- 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
- 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
- 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS
- 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
- 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
- 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
- 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
- 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
- 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
- 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
- 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
- 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
- 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS
- 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
- 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
- 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
- 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS
- 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
- 118708378U, // <5,u,7,6>: Cost 1 vrev RHS
- 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
- 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS
- 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS
- 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
- 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
- 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
- 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS
- 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS
- 118716571U, // <5,u,u,6>: Cost 1 vrev RHS
- 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
- 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS
- 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
- 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
- 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
- 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
- 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
- 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
- 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
- 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
- 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
- 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
- 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
- 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
- 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
- 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
- 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
- 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
- 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
- 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
- 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
- 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
- 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
- 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
- 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
- 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
- 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
- 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
- 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
- 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
- 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
- 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
- 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
- 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
- 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
- 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
- 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
- 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
- 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
- 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
- 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
- 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
- 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
- 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
- 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
- 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
- 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
- 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
- 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
- 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
- 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
- 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
- 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
- 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
- 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
- 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
- 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
- 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
- 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
- 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
- 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
- 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
- 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
- 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
- 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
- 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
- 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
- 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
- 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
- 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
- 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
- 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
- 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
- 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
- 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
- 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
- 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
- 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
- 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
- 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
- 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
- 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
- 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
- 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
- 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
- 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
- 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
- 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
- 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
- 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
- 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
- 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
- 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
- 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
- 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
- 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
- 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
- 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
- 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
- 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
- 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
- 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
- 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
- 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
- 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
- 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
- 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
- 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
- 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
- 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
- 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
- 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
- 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
- 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
- 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
- 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
- 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
- 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
- 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
- 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
- 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
- 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
- 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
- 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
- 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
- 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
- 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
- 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
- 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
- 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
- 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
- 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
- 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
- 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
- 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
- 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
- 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
- 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
- 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
- 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
- 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
- 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
- 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
- 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
- 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
- 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
- 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
- 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
- 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
- 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
- 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
- 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
- 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
- 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
- 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
- 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
- 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
- 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
- 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
- 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
- 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
- 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
- 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
- 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
- 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
- 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
- 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
- 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
- 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
- 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
- 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
- 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
- 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
- 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
- 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
- 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
- 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
- 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
- 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
- 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
- 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
- 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
- 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
- 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
- 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
- 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
- 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
- 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
- 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
- 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
- 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
- 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
- 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
- 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
- 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
- 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
- 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
- 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
- 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
- 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
- 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
- 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
- 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
- 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
- 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
- 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
- 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
- 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
- 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
- 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
- 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
- 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
- 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
- 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
- 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
- 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
- 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
- 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
- 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
- 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
- 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
- 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
- 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
- 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
- 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
- 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
- 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
- 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
- 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
- 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
- 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS
- 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
- 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
- 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
- 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
- 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS
- 1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
- 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
- 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
- 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS
- 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
- 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
- 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
- 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
- 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS
- 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
- 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
- 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
- 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
- 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
- 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
- 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
- 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
- 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
- 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
- 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
- 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
- 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
- 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
- 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
- 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
- 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
- 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
- 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
- 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
- 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
- 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
- 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
- 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
- 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
- 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
- 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
- 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
- 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
- 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
- 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
- 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
- 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
- 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
- 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
- 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
- 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
- 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
- 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
- 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
- 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
- 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
- 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
- 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
- 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
- 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
- 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
- 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
- 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
- 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
- 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
- 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
- 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
- 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
- 2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
- 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
- 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
- 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
- 2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
- 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
- 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
- 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
- 2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
- 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
- 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
- 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
- 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
- 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
- 2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
- 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
- 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
- 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
- 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
- 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
- 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
- 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
- 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
- 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
- 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
- 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
- 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
- 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
- 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
- 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
- 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
- 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
- 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
- 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
- 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
- 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
- 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
- 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
- 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
- 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
- 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
- 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
- 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
- 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
- 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
- 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
- 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
- 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
- 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
- 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
- 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
- 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
- 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
- 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
- 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
- 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
- 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
- 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
- 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
- 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
- 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
- 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
- 2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
- 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
- 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
- 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
- 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
- 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
- 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
- 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
- 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
- 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
- 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
- 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
- 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
- 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
- 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
- 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
- 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
- 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
- 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
- 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
- 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
- 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
- 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
- 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
- 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
- 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
- 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
- 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
- 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
- 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
- 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
- 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
- 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
- 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
- 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
- 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
- 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
- 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
- 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
- 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
- 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
- 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
- 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
- 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
- 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
- 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
- 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
- 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
- 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
- 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
- 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
- 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
- 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
- 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
- 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
- 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
- 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
- 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
- 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
- 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
- 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
- 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
- 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
- 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
- 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
- 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
- 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
- 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
- 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
- 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
- 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
- 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
- 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
- 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
- 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
- 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
- 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
- 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
- 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
- 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
- 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
- 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
- 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
- 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
- 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
- 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
- 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
- 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
- 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
- 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
- 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
- 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
- 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
- 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
- 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
- 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
- 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
- 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
- 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
- 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
- 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
- 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
- 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
- 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
- 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
- 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
- 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
- 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
- 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
- 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
- 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
- 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
- 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
- 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
- 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
- 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
- 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
- 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
- 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
- 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
- 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
- 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
- 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
- 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
- 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
- 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
- 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
- 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
- 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
- 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
- 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
- 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
- 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
- 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
- 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
- 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
- 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
- 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
- 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
- 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
- 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
- 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
- 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
- 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
- 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
- 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
- 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
- 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
- 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
- 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
- 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
- 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
- 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
- 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
- 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
- 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
- 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
- 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
- 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
- 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
- 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
- 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
- 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
- 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
- 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
- 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
- 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
- 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
- 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
- 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
- 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
- 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
- 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
- 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
- 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
- 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
- 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
- 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
- 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
- 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
- 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
- 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
- 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
- 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
- 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
- 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
- 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
- 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS
- 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
- 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS
- 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
- 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
- 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
- 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
- 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
- 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
- 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
- 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS
- 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS
- 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
- 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
- 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
- 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
- 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
- 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
- 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS
- 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS
- 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS
- 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
- 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS
- 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
- 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
- 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
- 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
- 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
- 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
- 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS
- 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
- 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
- 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
- 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
- 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
- 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
- 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
- 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
- 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
- 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
- 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
- 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
- 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
- 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
- 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
- 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
- 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
- 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
- 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
- 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
- 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
- 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
- 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
- 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
- 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
- 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
- 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
- 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
- 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
- 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
- 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
- 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
- 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS
- 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
- 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
- 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS
- 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
- 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
- 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
- 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
- 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
- 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
- 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
- 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
- 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
- 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
- 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
- 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
- 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
- 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
- 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
- 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
- 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
- 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
- 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
- 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
- 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
- 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
- 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
- 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
- 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
- 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
- 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
- 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
- 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS
- 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
- 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
- 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
- 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS
- 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
- 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
- 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS
- 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
- 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS
- 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
- 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
- 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
- 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
- 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
- 1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
- 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS
- 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
- 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
- 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
- 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
- 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
- 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
- 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
- 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
- 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
- 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
- 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
- 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
- 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
- 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
- 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
- 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
- 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
- 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
- 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
- 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
- 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
- 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
- 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
- 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
- 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
- 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
- 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
- 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
- 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
- 1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
- 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
- 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
- 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS
- 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
- 1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
- 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS
- 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
- 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
- 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
- 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
- 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
- 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
- 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
- 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
- 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
- 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
- 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
- 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
- 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
- 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
- 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
- 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS
- 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
- 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS
- 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
- 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
- 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
- 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS
- 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
- 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
- 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
- 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS
- 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
- 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
- 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS
- 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
- 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
- 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
- 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS
- 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS
- 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS
- 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS
- 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
- 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
- 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
- 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
- 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
- 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
- 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
- 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
- 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
- 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
- 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
- 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS
- 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
- 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
- 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
- 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
- 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
- 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS
- 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
- 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
- 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
- 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
- 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
- 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
- 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
- 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
- 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
- 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
- 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
- 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
- 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
- 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
- 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
- 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
- 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
- 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
- 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
- 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
- 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
- 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
- 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
- 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
- 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
- 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
- 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
- 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
- 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
- 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
- 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
- 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
- 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
- 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
- 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
- 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
- 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
- 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
- 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
- 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
- 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
- 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
- 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
- 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
- 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
- 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
- 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
- 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
- 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
- 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
- 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
- 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
- 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
- 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
- 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
- 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
- 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS
- 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
- 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
- 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
- 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
- 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
- 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS
- 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
- 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
- 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
- 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
- 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
- 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
- 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
- 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
- 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
- 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
- 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
- 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
- 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
- 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
- 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
- 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
- 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
- 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
- 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
- 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
- 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
- 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
- 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
- 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
- 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
- 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
- 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
- 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
- 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
- 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
- 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
- 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
- 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
- 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
- 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
- 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
- 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
- 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
- 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
- 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
- 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
- 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
- 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
- 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
- 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
- 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
- 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
- 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
- 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
- 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
- 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
- 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
- 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
- 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
- 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
- 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
- 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
- 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
- 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
- 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
- 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
- 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
- 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
- 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
- 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
- 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
- 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
- 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
- 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
- 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
- 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
- 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
- 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
- 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
- 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
- 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
- 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
- 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
- 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
- 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
- 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
- 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
- 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
- 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
- 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
- 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
- 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
- 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
- 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
- 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
- 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
- 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
- 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
- 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
- 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
- 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
- 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
- 2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
- 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
- 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
- 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
- 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
- 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
- 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
- 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
- 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
- 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
- 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
- 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
- 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
- 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
- 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
- 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
- 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
- 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
- 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
- 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
- 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
- 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
- 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
- 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
- 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
- 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
- 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
- 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
- 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
- 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
- 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
- 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
- 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
- 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
- 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
- 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
- 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
- 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
- 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
- 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
- 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
- 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
- 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
- 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
- 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
- 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
- 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
- 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
- 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
- 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
- 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
- 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
- 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
- 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
- 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
- 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
- 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
- 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
- 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
- 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
- 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
- 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
- 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
- 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
- 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
- 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
- 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
- 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
- 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
- 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
- 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
- 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
- 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
- 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
- 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
- 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
- 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
- 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
- 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
- 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
- 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
- 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
- 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
- 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
- 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
- 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
- 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
- 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
- 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
- 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
- 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
- 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
- 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
- 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
- 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
- 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
- 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
- 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
- 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
- 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
- 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
- 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
- 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
- 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
- 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
- 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
- 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
- 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
- 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
- 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
- 2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
- 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
- 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
- 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
- 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
- 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
- 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
- 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
- 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
- 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
- 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
- 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
- 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
- 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
- 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
- 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
- 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
- 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
- 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
- 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
- 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
- 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
- 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
- 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
- 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
- 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
- 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
- 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
- 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
- 1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
- 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
- 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
- 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
- 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
- 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
- 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
- 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
- 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
- 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
- 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
- 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
- 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
- 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
- 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
- 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
- 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
- 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
- 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
- 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
- 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
- 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
- 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
- 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
- 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
- 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
- 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
- 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
- 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
- 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
- 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
- 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
- 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
- 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
- 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
- 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
- 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
- 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
- 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
- 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
- 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
- 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
- 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
- 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
- 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
- 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
- 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
- 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
- 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
- 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
- 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
- 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
- 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
- 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
- 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
- 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
- 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
- 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
- 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS
- 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
- 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS
- 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
- 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
- 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
- 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
- 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
- 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
- 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
- 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
- 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
- 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
- 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
- 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
- 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
- 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
- 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
- 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
- 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
- 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
- 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
- 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
- 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
- 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
- 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
- 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
- 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS
- 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
- 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS
- 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
- 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
- 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
- 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
- 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
- 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
- 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
- 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
- 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
- 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
- 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
- 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
- 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
- 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
- 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
- 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
- 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
- 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
- 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
- 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
- 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
- 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
- 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
- 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
- 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
- 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
- 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
- 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
- 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
- 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
- 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
- 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
- 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
- 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
- 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
- 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
- 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
- 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
- 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
- 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
- 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
- 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
- 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
- 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
- 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
- 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
- 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
- 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
- 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
- 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
- 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
- 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
- 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
- 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
- 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
- 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
- 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
- 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
- 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
- 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
- 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
- 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
- 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
- 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
- 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
- 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
- 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
- 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
- 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
- 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
- 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
- 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
- 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
- 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
- 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
- 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
- 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
- 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
- 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
- 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
- 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
- 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
- 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
- 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
- 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
- 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
- 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
- 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
- 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
- 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
- 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
- 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
- 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
- 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
- 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
- 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
- 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
- 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
- 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
- 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
- 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
- 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
- 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
- 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
- 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
- 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
- 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
- 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
- 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
- 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
- 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
- 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
- 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
- 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
- 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
- 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
- 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
- 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
- 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
- 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
- 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
- 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
- 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
- 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
- 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
- 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
- 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
- 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
- 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
- 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
- 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
- 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
- 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
- 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
- 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
- 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
- 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
- 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
- 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
- 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
- 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
- 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
- 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
- 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
- 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
- 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
- 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
- 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
- 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
- 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
- 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
- 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
- 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
- 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
- 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
- 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
- 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
- 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
- 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
- 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
- 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
- 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
- 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
- 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
- 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
- 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
- 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
- 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
- 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
- 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
- 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
- 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
- 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
- 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
- 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
- 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
- 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
- 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
- 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
- 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
- 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
- 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
- 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
- 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
- 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
- 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
- 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
- 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
- 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
- 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
- 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
- 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
- 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
- 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
- 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
- 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
- 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
- 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
- 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
- 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
- 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
- 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
- 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
- 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
- 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
- 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
- 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
- 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
- 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
- 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
- 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
- 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
- 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
- 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
- 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
- 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
- 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
- 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
- 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
- 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
- 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
- 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
- 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
- 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
- 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
- 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
- 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
- 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
- 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
- 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
- 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
- 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
- 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS
- 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS
- 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
- 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
- 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
- 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
- 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
- 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
- 1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
- 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS
- 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS
- 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
- 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
- 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
- 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
- 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
- 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
- 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
- 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
- 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
- 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
- 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
- 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS
- 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
- 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
- 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
- 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
- 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
- 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS
- 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
- 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
- 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
- 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
- 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
- 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
- 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
- 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
- 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
- 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
- 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
- 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
- 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
- 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
- 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
- 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
- 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
- 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
- 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
- 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
- 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
- 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
- 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
- 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
- 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
- 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
- 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
- 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
- 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
- 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
- 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
- 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
- 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
- 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS
- 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
- 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS
- 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
- 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
- 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
- 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
- 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
- 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
- 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
- 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
- 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
- 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
- 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
- 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
- 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
- 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
- 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
- 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
- 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS
- 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS
- 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
- 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
- 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS
- 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
- 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
- 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
- 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS
- 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS
- 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS
- 135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS
- 1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
- 1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
- 2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
- 1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
- 2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
- 2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
- 2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
- 135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS
- 1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
- 1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS
- 537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS
- 2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
- 1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
- 2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
- 1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
- 2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
- 537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS
- 1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
- 2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
- 1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
- 2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
- 1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
- 2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
- 2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
- 2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
- 1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
- 2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
- 2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
- 72589981U, // <u,0,3,2>: Cost 1 vrev LHS
- 2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
- 2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
- 2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
- 2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
- 2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
- 73032403U, // <u,0,3,u>: Cost 1 vrev LHS
- 2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
- 1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
- 1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
- 3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
- 2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
- 1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
- 2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
- 2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
- 1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
- 2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
- 1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS
- 2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
- 2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
- 2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
- 2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
- 1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
- 2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
- 1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
- 2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
- 2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
- 1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
- 2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
- 2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
- 2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
- 2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
- 2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
- 1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
- 2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
- 2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
- 2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
- 2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
- 2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
- 2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
- 2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
- 2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
- 2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
- 135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS
- 1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
- 537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS
- 2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
- 1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
- 1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
- 1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
- 2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
- 537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS
- 1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
- 1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
- 2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
- 1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
- 2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
- 2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
- 2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
- 2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
- 1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
- 1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
- 202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS
- 2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
- 1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
- 1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
- 2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
- 2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
- 2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
- 202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS
- 1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
- 2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
- 2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
- 835584U, // <u,1,2,3>: Cost 0 copy LHS
- 1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
- 2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
- 2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
- 1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
- 835584U, // <u,1,2,u>: Cost 0 copy LHS
- 1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
- 1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
- 2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
- 2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
- 1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
- 1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
- 2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
- 2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
- 1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
- 1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
- 2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
- 2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
- 1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
- 2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
- 1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
- 2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
- 2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
- 1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
- 1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
- 2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
- 2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
- 1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
- 1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
- 2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
- 2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
- 2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
- 1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
- 2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
- 2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
- 2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
- 2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
- 2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
- 2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
- 2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
- 1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
- 1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
- 2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
- 2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
- 2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
- 2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
- 2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
- 2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
- 2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
- 2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
- 2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
- 1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
- 202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS
- 2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
- 835584U, // <u,1,u,3>: Cost 0 copy LHS
- 1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
- 1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
- 2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
- 1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
- 835584U, // <u,1,u,u>: Cost 0 copy LHS
- 1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
- 1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
- 1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
- 2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
- 2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
- 2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
- 2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
- 2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
- 1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
- 1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
- 2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
- 2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
- 2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
- 2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
- 2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
- 2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
- 2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
- 1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
- 1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
- 2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
- 269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS
- 1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
- 1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
- 2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
- 2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
- 2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
- 269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS
- 408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS
- 1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
- 1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
- 1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS
- 408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS
- 1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
- 1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
- 1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
- 408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS
- 1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
- 2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
- 2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
- 2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
- 2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
- 1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
- 1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
- 2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
- 1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
- 2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
- 2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
- 2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
- 2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
- 1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
- 2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
- 2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
- 2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
- 1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
- 1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
- 2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
- 2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
- 1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
- 1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
- 2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
- 2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
- 2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
- 1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
- 1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
- 2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
- 2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
- 1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS
- 2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
- 2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
- 2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
- 2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
- 1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS
- 408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS
- 1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
- 269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS
- 1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
- 408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS
- 1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
- 1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
- 1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
- 408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS
- 1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
- 471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS
- 1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
- 2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
- 1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
- 2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
- 2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
- 2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
- 471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS
- 1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
- 1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
- 1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
- 1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
- 2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
- 1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
- 2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
- 2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
- 1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
- 1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
- 1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
- 1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
- 1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
- 1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
- 2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
- 1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
- 2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
- 1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
- 1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
- 1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
- 1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
- 336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS
- 1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
- 2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
- 2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
- 2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
- 336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS
- 1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
- 1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
- 2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
- 2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
- 1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
- 471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS
- 1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
- 2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
- 471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS
- 2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
- 1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
- 2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
- 2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
- 1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
- 1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
- 1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
- 1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
- 1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
- 2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
- 2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
- 1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
- 2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
- 2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
- 1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
- 1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
- 1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
- 1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
- 1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
- 1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
- 1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
- 2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
- 1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
- 2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
- 2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
- 1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
- 1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
- 1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
- 471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS
- 1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
- 336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS
- 1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
- 471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS
- 1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
- 1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
- 471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS
- 2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
- 1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
- 2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
- 2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
- 2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
- 1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
- 1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
- 3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
- 1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
- 2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
- 2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
- 1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
- 2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
- 2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
- 1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS
- 2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
- 2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
- 1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
- 2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
- 2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
- 2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
- 2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
- 2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
- 2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
- 1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
- 2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
- 1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
- 2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
- 2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
- 2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
- 2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
- 2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
- 2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
- 2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
- 3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
- 2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
- 1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
- 2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
- 2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
- 2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
- 161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS
- 1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
- 1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
- 2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
- 161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS
- 1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
- 2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
- 1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
- 2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
- 1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
- 1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS
- 537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS
- 2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
- 537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS
- 1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
- 2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
- 2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
- 2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
- 1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
- 2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
- 1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
- 2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
- 1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
- 2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
- 2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
- 2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
- 2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
- 2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
- 2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
- 96808489U, // <u,4,7,6>: Cost 1 vrev RHS
- 2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
- 96955963U, // <u,4,7,u>: Cost 1 vrev RHS
- 1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
- 1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
- 1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
- 2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
- 161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS
- 1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
- 537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS
- 2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
- 537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS
- 2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
- 1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
- 2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
- 2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
- 1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
- 2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
- 2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
- 1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
- 1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
- 1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
- 2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
- 2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
- 2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
- 1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
- 2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
- 2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
- 1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
- 1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
- 2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
- 2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
- 2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
- 1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
- 2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
- 2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
- 2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
- 3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
- 1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
- 2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
- 2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
- 2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
- 2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
- 2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
- 3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
- 2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
- 2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
- 2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
- 2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
- 2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
- 2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
- 2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
- 1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
- 1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
- 2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
- 1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
- 1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
- 1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
- 2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
- 2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
- 2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
- 1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
- 229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS
- 2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
- 1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
- 229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS
- 1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
- 2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
- 2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
- 1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
- 1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
- 2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
- 2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
- 27705344U, // <u,5,6,7>: Cost 0 copy RHS
- 27705344U, // <u,5,6,u>: Cost 0 copy RHS
- 1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
- 1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
- 2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
- 2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
- 1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
- 1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
- 2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
- 2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
- 1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
- 1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
- 1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
- 2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
- 1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
- 1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
- 229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS
- 2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
- 27705344U, // <u,5,u,7>: Cost 0 copy RHS
- 27705344U, // <u,5,u,u>: Cost 0 copy RHS
- 2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
- 1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
- 1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
- 2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
- 1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
- 2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
- 2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
- 2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
- 1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
- 1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
- 2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
- 2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
- 2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
- 2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
- 2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
- 2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
- 2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
- 1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
- 1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
- 2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
- 2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
- 2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
- 1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
- 2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
- 2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
- 1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
- 1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
- 2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
- 2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
- 2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
- 2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
- 1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
- 2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
- 3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
- 1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS
- 1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS
- 2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
- 2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
- 2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
- 2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
- 1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
- 1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
- 1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
- 2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
- 1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
- 2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
- 2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
- 2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
- 2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
- 1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
- 2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
- 2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
- 2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
- 1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
- 1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
- 2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
- 2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
- 2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
- 1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
- 2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
- 296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS
- 1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
- 296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS
- 432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS
- 1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
- 1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
- 1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
- 432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS
- 1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
- 1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
- 1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS
- 432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS
- 432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS
- 1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
- 1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
- 1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
- 432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS
- 1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
- 296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS
- 1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS
- 432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS
- 1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
- 497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS
- 1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
- 2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
- 1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
- 1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
- 2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
- 2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
- 497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS
- 1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
- 1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
- 1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
- 1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
- 2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
- 1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
- 2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
- 2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
- 1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
- 2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
- 1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
- 1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
- 1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
- 2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
- 2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
- 1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
- 2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
- 1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
- 1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
- 2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
- 2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
- 1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
- 1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
- 1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
- 1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
- 2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
- 1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
- 1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
- 2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
- 2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
- 2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
- 1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
- 497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS
- 1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
- 2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
- 497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS
- 2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
- 1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
- 2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
- 2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
- 1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
- 1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
- 1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
- 1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
- 1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
- 1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
- 2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
- 1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
- 2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
- 1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
- 1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
- 1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
- 1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
- 1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
- 1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
- 2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
- 2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
- 2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
- 1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
- 1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
- 1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
- 363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS
- 363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS
- 1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
- 497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS
- 1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
- 1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
- 1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
- 497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS
- 1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
- 363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS
- 497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS
- 135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS
- 471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS
- 1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
- 1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
- 1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
- 1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
- 1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
- 1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
- 471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS
- 1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
- 202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS
- 537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS
- 1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
- 1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
- 1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
- 1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
- 1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
- 537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS
- 1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
- 1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
- 269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS
- 835584U, // <u,u,2,3>: Cost 0 copy LHS
- 1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
- 2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
- 1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
- 1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
- 835584U, // <u,u,2,u>: Cost 0 copy LHS
- 408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS
- 1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
- 120371557U, // <u,u,3,2>: Cost 1 vrev LHS
- 336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS
- 408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS
- 1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
- 1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
- 1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS
- 408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS
- 1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
- 1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
- 1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
- 1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
- 161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS
- 471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS
- 1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
- 1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
- 471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS
- 1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
- 1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
- 1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
- 1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
- 1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
- 229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS
- 537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS
- 1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
- 537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS
- 1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
- 2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
- 1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
- 1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
- 1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
- 1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
- 296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS
- 27705344U, // <u,u,6,7>: Cost 0 copy RHS
- 27705344U, // <u,u,6,u>: Cost 0 copy RHS
- 432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS
- 1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
- 1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
- 1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS
- 432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS
- 1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
- 120699277U, // <u,u,7,6>: Cost 1 vrev RHS
- 363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS
- 432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS
- 408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS
- 471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS
- 537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS
- 835584U, // <u,u,u,3>: Cost 0 copy LHS
- 408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS
- 471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS
- 537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS
- 27705344U, // <u,u,u,7>: Cost 0 copy RHS
- 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS
+ 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
+ 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
+ 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+ 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
+ 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
+ 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
+ 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS
+ 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
+ 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS
+ 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+ 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
+ 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
+ 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
+ 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
+ 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
+ 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS
+ 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
+ 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
+ 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
+ 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
+ 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
+ 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
+ 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
+ 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+ 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
+ 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+ 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
+ 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
+ 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
+ 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
+ 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
+ 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
+ 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
+ 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+ 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
+ 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+ 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+ 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
+ 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
+ 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+ 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
+ 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
+ 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
+ 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
+ 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
+ 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
+ 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
+ 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
+ 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
+ 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
+ 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
+ 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
+ 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
+ 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
+ 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
+ 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
+ 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
+ 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
+ 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
+ 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+ 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
+ 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
+ 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
+ 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
+ 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
+ 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
+ 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
+ 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS
+ 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS
+ 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
+ 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
+ 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
+ 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
+ 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+ 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
+ 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS
+ 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
+ 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
+ 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
+ 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
+ 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
+ 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
+ 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
+ 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
+ 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
+ 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
+ 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
+ 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
+ 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
+ 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
+ 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
+ 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
+ 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
+ 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
+ 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
+ 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
+ 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
+ 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
+ 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
+ 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
+ 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
+ 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
+ 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
+ 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
+ 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
+ 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
+ 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
+ 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
+ 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
+ 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
+ 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
+ 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
+ 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
+ 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
+ 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+ 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
+ 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
+ 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
+ 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
+ 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
+ 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
+ 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
+ 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
+ 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
+ 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
+ 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
+ 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
+ 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
+ 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
+ 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
+ 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
+ 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
+ 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
+ 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
+ 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
+ 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
+ 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
+ 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
+ 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
+ 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
+ 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
+ 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
+ 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
+ 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
+ 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
+ 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
+ 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
+ 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
+ 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+ 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
+ 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
+ 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
+ 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
+ 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
+ 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
+ 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
+ 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
+ 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
+ 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
+ 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
+ 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
+ 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+ 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
+ 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
+ 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
+ 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
+ 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
+ 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
+ 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
+ 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
+ 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+ 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
+ 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
+ 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
+ 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
+ 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
+ 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
+ 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+ 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
+ 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+ 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
+ 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+ 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
+ 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
+ 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+ 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
+ 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
+ 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
+ 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
+ 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
+ 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
+ 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+ 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
+ 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
+ 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
+ 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
+ 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
+ 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
+ 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
+ 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
+ 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
+ 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
+ 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
+ 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
+ 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
+ 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
+ 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
+ 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
+ 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
+ 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+ 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
+ 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
+ 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
+ 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
+ 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
+ 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
+ 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
+ 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+ 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
+ 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
+ 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
+ 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+ 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
+ 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
+ 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
+ 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+ 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
+ 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
+ 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
+ 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
+ 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
+ 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
+ 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
+ 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
+ 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
+ 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
+ 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
+ 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
+ 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
+ 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
+ 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
+ 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
+ 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
+ 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
+ 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
+ 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
+ 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
+ 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
+ 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+ 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
+ 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+ 2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
+ 2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
+ 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
+ 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
+ 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
+ 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
+ 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
+ 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
+ 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
+ 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
+ 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
+ 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
+ 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
+ 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
+ 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
+ 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
+ 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
+ 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
+ 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
+ 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
+ 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
+ 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
+ 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
+ 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
+ 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
+ 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
+ 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
+ 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
+ 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
+ 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
+ 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
+ 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
+ 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
+ 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
+ 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
+ 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
+ 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
+ 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
+ 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
+ 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
+ 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
+ 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
+ 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
+ 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
+ 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
+ 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
+ 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
+ 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
+ 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
+ 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
+ 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
+ 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+ 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
+ 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+ 2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
+ 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
+ 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
+ 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
+ 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
+ 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
+ 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
+ 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
+ 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
+ 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
+ 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
+ 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
+ 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
+ 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
+ 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
+ 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS
+ 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
+ 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+ 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS
+ 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
+ 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
+ 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
+ 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
+ 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
+ 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
+ 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
+ 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+ 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
+ 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
+ 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
+ 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
+ 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
+ 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
+ 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
+ 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
+ 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
+ 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
+ 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
+ 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
+ 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
+ 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
+ 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
+ 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
+ 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
+ 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
+ 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
+ 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
+ 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
+ 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
+ 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
+ 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
+ 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
+ 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
+ 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
+ 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
+ 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
+ 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
+ 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
+ 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
+ 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
+ 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
+ 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
+ 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
+ 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
+ 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
+ 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
+ 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
+ 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+ 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
+ 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
+ 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+ 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
+ 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
+ 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
+ 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
+ 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
+ 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS
+ 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
+ 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
+ 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
+ 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
+ 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
+ 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
+ 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
+ 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
+ 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
+ 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
+ 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
+ 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
+ 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
+ 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
+ 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
+ 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
+ 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
+ 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
+ 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
+ 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
+ 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
+ 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
+ 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
+ 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
+ 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
+ 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
+ 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
+ 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
+ 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
+ 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
+ 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
+ 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
+ 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
+ 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
+ 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
+ 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
+ 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
+ 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
+ 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
+ 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
+ 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
+ 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
+ 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
+ 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
+ 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
+ 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
+ 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
+ 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
+ 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
+ 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
+ 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
+ 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
+ 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
+ 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
+ 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
+ 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
+ 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
+ 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
+ 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
+ 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
+ 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
+ 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
+ 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
+ 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
+ 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
+ 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
+ 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
+ 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
+ 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
+ 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
+ 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
+ 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
+ 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
+ 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
+ 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
+ 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
+ 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
+ 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
+ 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
+ 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
+ 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
+ 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
+ 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
+ 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
+ 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
+ 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
+ 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
+ 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
+ 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
+ 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
+ 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
+ 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
+ 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
+ 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
+ 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
+ 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
+ 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
+ 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+ 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
+ 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
+ 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
+ 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
+ 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
+ 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
+ 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
+ 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
+ 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
+ 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
+ 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
+ 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
+ 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
+ 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
+ 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
+ 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
+ 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
+ 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
+ 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
+ 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
+ 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
+ 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
+ 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
+ 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
+ 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
+ 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
+ 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
+ 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
+ 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
+ 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
+ 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
+ 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
+ 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
+ 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+ 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
+ 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
+ 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
+ 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
+ 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
+ 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
+ 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
+ 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
+ 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
+ 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
+ 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
+ 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
+ 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
+ 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
+ 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
+ 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
+ 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
+ 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
+ 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
+ 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
+ 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
+ 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
+ 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
+ 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
+ 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
+ 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
+ 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
+ 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
+ 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
+ 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
+ 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
+ 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
+ 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
+ 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
+ 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
+ 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
+ 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
+ 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
+ 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
+ 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
+ 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
+ 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
+ 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
+ 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
+ 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
+ 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
+ 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
+ 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
+ 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
+ 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
+ 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
+ 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
+ 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
+ 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
+ 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
+ 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
+ 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
+ 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
+ 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
+ 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
+ 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
+ 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
+ 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
+ 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
+ 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
+ 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
+ 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
+ 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
+ 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
+ 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
+ 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
+ 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
+ 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
+ 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
+ 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
+ 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
+ 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
+ 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
+ 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
+ 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
+ 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
+ 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
+ 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
+ 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
+ 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
+ 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
+ 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
+ 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
+ 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
+ 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
+ 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
+ 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
+ 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
+ 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
+ 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
+ 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
+ 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
+ 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
+ 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
+ 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
+ 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
+ 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
+ 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
+ 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
+ 2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
+ 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
+ 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
+ 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS
+ 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
+ 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
+ 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
+ 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
+ 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
+ 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+ 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
+ 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS
+ 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
+ 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS
+ 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+ 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+ 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
+ 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS
+ 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
+ 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+ 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS
+ 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
+ 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
+ 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
+ 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
+ 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
+ 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
+ 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+ 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
+ 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+ 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
+ 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+ 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+ 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
+ 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+ 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
+ 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+ 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+ 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
+ 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
+ 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
+ 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
+ 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
+ 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
+ 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
+ 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
+ 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
+ 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
+ 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
+ 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
+ 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
+ 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+ 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
+ 2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
+ 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
+ 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
+ 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
+ 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
+ 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
+ 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
+ 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
+ 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
+ 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
+ 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
+ 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
+ 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
+ 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
+ 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
+ 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
+ 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
+ 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS
+ 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
+ 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
+ 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
+ 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
+ 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
+ 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
+ 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
+ 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
+ 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
+ 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
+ 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
+ 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
+ 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
+ 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
+ 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
+ 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
+ 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
+ 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
+ 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
+ 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
+ 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
+ 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
+ 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
+ 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
+ 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
+ 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
+ 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
+ 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
+ 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
+ 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
+ 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+ 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
+ 67944550U, // <1,0,3,2>: Cost 1 vrev LHS
+ 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
+ 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
+ 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
+ 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
+ 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
+ 68386972U, // <1,0,3,u>: Cost 1 vrev LHS
+ 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
+ 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
+ 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
+ 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
+ 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
+ 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
+ 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
+ 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
+ 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
+ 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
+ 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
+ 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
+ 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
+ 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
+ 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
+ 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
+ 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
+ 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
+ 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
+ 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
+ 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
+ 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
+ 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
+ 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
+ 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
+ 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
+ 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
+ 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
+ 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
+ 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
+ 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
+ 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
+ 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
+ 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
+ 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+ 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
+ 67985515U, // <1,0,u,2>: Cost 1 vrev LHS
+ 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
+ 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
+ 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
+ 2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
+ 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
+ 68427937U, // <1,0,u,u>: Cost 1 vrev LHS
+ 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
+ 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
+ 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
+ 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
+ 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
+ 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
+ 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
+ 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
+ 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
+ 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS
+ 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
+ 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
+ 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
+ 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
+ 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+ 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS
+ 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
+ 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
+ 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
+ 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
+ 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
+ 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
+ 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
+ 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
+ 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
+ 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
+ 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
+ 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
+ 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
+ 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
+ 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
+ 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
+ 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
+ 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
+ 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
+ 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
+ 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
+ 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
+ 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
+ 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
+ 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
+ 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
+ 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
+ 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
+ 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
+ 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
+ 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
+ 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
+ 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+ 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
+ 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
+ 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
+ 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
+ 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
+ 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
+ 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
+ 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
+ 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
+ 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
+ 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
+ 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+ 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
+ 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
+ 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
+ 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
+ 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
+ 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
+ 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+ 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS
+ 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
+ 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
+ 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
+ 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
+ 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+ 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS
+ 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
+ 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
+ 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
+ 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
+ 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
+ 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
+ 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
+ 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
+ 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
+ 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
+ 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
+ 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
+ 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
+ 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
+ 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
+ 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
+ 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
+ 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
+ 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
+ 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
+ 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
+ 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
+ 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
+ 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
+ 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
+ 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
+ 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
+ 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS
+ 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
+ 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
+ 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
+ 2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
+ 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
+ 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
+ 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
+ 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
+ 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
+ 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
+ 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
+ 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+ 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
+ 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
+ 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
+ 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
+ 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
+ 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
+ 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
+ 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
+ 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
+ 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
+ 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
+ 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
+ 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
+ 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
+ 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
+ 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
+ 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
+ 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
+ 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
+ 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
+ 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
+ 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
+ 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
+ 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
+ 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
+ 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS
+ 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
+ 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
+ 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
+ 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
+ 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
+ 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
+ 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
+ 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
+ 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
+ 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
+ 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
+ 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
+ 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
+ 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
+ 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
+ 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
+ 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
+ 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
+ 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+ 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
+ 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+ 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+ 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+ 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
+ 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+ 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
+ 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+ 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
+ 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
+ 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
+ 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
+ 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
+ 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+ 2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
+ 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
+ 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
+ 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
+ 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
+ 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
+ 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
+ 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
+ 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
+ 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
+ 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
+ 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
+ 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
+ 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
+ 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
+ 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
+ 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
+ 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
+ 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
+ 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
+ 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
+ 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
+ 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
+ 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
+ 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
+ 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
+ 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+ 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+ 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
+ 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
+ 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
+ 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
+ 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
+ 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
+ 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
+ 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
+ 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
+ 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
+ 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
+ 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
+ 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
+ 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
+ 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
+ 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
+ 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
+ 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
+ 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
+ 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
+ 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
+ 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
+ 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
+ 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
+ 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
+ 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
+ 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
+ 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
+ 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
+ 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
+ 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
+ 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+ 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
+ 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
+ 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
+ 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
+ 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
+ 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
+ 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
+ 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+ 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
+ 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
+ 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
+ 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
+ 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
+ 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
+ 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
+ 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
+ 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
+ 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
+ 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
+ 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
+ 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
+ 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
+ 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
+ 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
+ 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
+ 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
+ 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
+ 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
+ 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
+ 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
+ 2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
+ 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
+ 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
+ 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+ 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
+ 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
+ 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
+ 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
+ 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
+ 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
+ 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
+ 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+ 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
+ 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
+ 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
+ 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
+ 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
+ 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
+ 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
+ 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
+ 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
+ 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
+ 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
+ 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
+ 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
+ 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
+ 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
+ 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
+ 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+ 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
+ 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
+ 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
+ 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
+ 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
+ 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
+ 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
+ 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
+ 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
+ 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
+ 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
+ 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
+ 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
+ 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
+ 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
+ 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
+ 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
+ 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
+ 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
+ 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
+ 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
+ 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
+ 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
+ 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
+ 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
+ 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
+ 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
+ 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
+ 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
+ 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
+ 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
+ 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
+ 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
+ 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
+ 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
+ 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
+ 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
+ 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
+ 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
+ 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
+ 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
+ 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
+ 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
+ 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
+ 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
+ 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
+ 2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
+ 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
+ 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
+ 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
+ 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
+ 2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
+ 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
+ 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
+ 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
+ 2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
+ 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
+ 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
+ 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
+ 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
+ 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
+ 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
+ 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
+ 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
+ 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
+ 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
+ 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
+ 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
+ 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
+ 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
+ 1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
+ 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
+ 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
+ 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
+ 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
+ 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
+ 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
+ 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
+ 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
+ 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
+ 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
+ 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
+ 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
+ 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
+ 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
+ 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
+ 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
+ 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
+ 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
+ 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
+ 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+ 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
+ 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
+ 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
+ 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
+ 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
+ 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
+ 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
+ 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
+ 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
+ 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
+ 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
+ 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
+ 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
+ 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
+ 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
+ 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+ 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
+ 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
+ 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
+ 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
+ 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
+ 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
+ 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
+ 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
+ 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
+ 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
+ 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
+ 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
+ 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
+ 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
+ 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
+ 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
+ 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
+ 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
+ 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
+ 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
+ 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
+ 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
+ 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
+ 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
+ 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
+ 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
+ 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
+ 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
+ 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
+ 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
+ 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
+ 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
+ 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
+ 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
+ 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
+ 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
+ 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
+ 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
+ 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
+ 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
+ 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
+ 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
+ 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
+ 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+ 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
+ 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
+ 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
+ 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
+ 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
+ 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
+ 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
+ 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
+ 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
+ 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
+ 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
+ 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
+ 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
+ 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
+ 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
+ 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
+ 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
+ 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
+ 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
+ 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
+ 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
+ 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
+ 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
+ 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
+ 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
+ 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
+ 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
+ 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
+ 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
+ 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
+ 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
+ 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
+ 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
+ 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
+ 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
+ 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
+ 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
+ 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
+ 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
+ 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
+ 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
+ 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
+ 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
+ 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
+ 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
+ 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
+ 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
+ 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
+ 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
+ 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
+ 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
+ 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
+ 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
+ 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
+ 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
+ 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
+ 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
+ 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
+ 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
+ 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
+ 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
+ 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
+ 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
+ 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
+ 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
+ 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
+ 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
+ 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
+ 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
+ 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
+ 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
+ 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
+ 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
+ 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
+ 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
+ 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
+ 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
+ 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
+ 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
+ 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
+ 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
+ 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
+ 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
+ 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
+ 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
+ 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
+ 1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
+ 2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
+ 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
+ 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
+ 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+ 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS
+ 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+ 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
+ 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+ 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
+ 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+ 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+ 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS
+ 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+ 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
+ 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+ 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+ 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+ 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+ 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+ 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+ 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+ 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 115726126U, // <1,u,3,2>: Cost 1 vrev LHS
+ 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
+ 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
+ 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
+ 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS
+ 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
+ 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
+ 2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
+ 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
+ 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
+ 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
+ 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
+ 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
+ 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
+ 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
+ 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
+ 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+ 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+ 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
+ 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+ 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
+ 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
+ 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
+ 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
+ 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
+ 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
+ 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
+ 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
+ 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
+ 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+ 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+ 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
+ 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
+ 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
+ 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
+ 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
+ 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
+ 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
+ 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
+ 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
+ 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS
+ 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS
+ 115767091U, // <1,u,u,2>: Cost 1 vrev LHS
+ 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
+ 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS
+ 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
+ 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+ 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
+ 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS
+ 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
+ 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
+ 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
+ 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
+ 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
+ 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
+ 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
+ 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
+ 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
+ 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
+ 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
+ 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
+ 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
+ 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
+ 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
+ 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
+ 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
+ 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
+ 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
+ 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
+ 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
+ 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
+ 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
+ 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
+ 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
+ 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+ 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
+ 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
+ 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
+ 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
+ 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
+ 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
+ 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
+ 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
+ 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
+ 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
+ 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
+ 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
+ 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
+ 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
+ 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+ 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
+ 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
+ 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
+ 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
+ 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
+ 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
+ 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
+ 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
+ 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
+ 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+ 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
+ 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
+ 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
+ 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
+ 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
+ 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
+ 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
+ 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
+ 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
+ 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
+ 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
+ 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
+ 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
+ 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
+ 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
+ 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
+ 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
+ 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+ 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
+ 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
+ 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
+ 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
+ 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
+ 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+ 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+ 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
+ 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
+ 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
+ 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
+ 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
+ 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
+ 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
+ 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
+ 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
+ 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
+ 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
+ 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
+ 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
+ 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
+ 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
+ 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
+ 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
+ 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
+ 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
+ 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
+ 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
+ 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
+ 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
+ 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
+ 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
+ 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
+ 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
+ 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
+ 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
+ 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
+ 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
+ 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
+ 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
+ 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
+ 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+ 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
+ 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
+ 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
+ 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
+ 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
+ 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
+ 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
+ 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
+ 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
+ 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
+ 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
+ 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
+ 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
+ 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
+ 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
+ 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
+ 3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
+ 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
+ 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
+ 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
+ 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
+ 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
+ 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
+ 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
+ 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
+ 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
+ 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
+ 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
+ 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
+ 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
+ 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
+ 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
+ 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
+ 3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
+ 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
+ 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
+ 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
+ 2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
+ 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
+ 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
+ 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
+ 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
+ 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
+ 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
+ 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+ 2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
+ 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
+ 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
+ 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
+ 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
+ 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
+ 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
+ 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
+ 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
+ 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
+ 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
+ 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
+ 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
+ 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
+ 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
+ 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
+ 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
+ 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
+ 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
+ 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
+ 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS
+ 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
+ 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
+ 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
+ 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+ 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS
+ 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
+ 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
+ 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
+ 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS
+ 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
+ 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
+ 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
+ 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
+ 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS
+ 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
+ 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
+ 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
+ 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
+ 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
+ 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
+ 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
+ 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
+ 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
+ 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
+ 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
+ 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
+ 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
+ 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
+ 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
+ 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
+ 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
+ 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
+ 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
+ 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
+ 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
+ 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
+ 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
+ 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
+ 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
+ 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
+ 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
+ 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
+ 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
+ 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
+ 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
+ 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
+ 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
+ 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
+ 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
+ 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS
+ 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS
+ 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
+ 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
+ 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
+ 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+ 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS
+ 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
+ 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+ 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+ 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
+ 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
+ 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
+ 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
+ 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
+ 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+ 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
+ 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
+ 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
+ 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+ 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
+ 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
+ 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
+ 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
+ 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
+ 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+ 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
+ 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
+ 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
+ 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+ 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
+ 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
+ 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
+ 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
+ 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
+ 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
+ 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+ 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
+ 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+ 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
+ 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
+ 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+ 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
+ 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
+ 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
+ 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
+ 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+ 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+ 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
+ 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+ 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
+ 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
+ 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
+ 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
+ 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
+ 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
+ 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
+ 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
+ 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+ 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
+ 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+ 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
+ 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
+ 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
+ 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
+ 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
+ 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
+ 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
+ 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
+ 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
+ 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
+ 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
+ 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
+ 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
+ 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
+ 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
+ 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
+ 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
+ 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
+ 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
+ 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
+ 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
+ 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
+ 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
+ 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+ 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
+ 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+ 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
+ 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
+ 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
+ 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
+ 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
+ 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
+ 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
+ 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
+ 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
+ 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
+ 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
+ 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
+ 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
+ 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
+ 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
+ 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+ 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
+ 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
+ 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
+ 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
+ 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+ 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
+ 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
+ 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
+ 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
+ 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
+ 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
+ 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
+ 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
+ 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
+ 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
+ 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+ 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
+ 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
+ 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
+ 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
+ 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
+ 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+ 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
+ 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
+ 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
+ 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
+ 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
+ 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
+ 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+ 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
+ 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
+ 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
+ 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
+ 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
+ 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
+ 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
+ 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
+ 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
+ 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
+ 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
+ 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
+ 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
+ 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
+ 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
+ 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
+ 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
+ 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
+ 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
+ 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
+ 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
+ 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
+ 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
+ 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
+ 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
+ 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
+ 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
+ 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
+ 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
+ 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
+ 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
+ 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
+ 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+ 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
+ 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
+ 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
+ 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
+ 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
+ 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
+ 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
+ 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
+ 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
+ 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
+ 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
+ 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
+ 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
+ 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
+ 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
+ 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
+ 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
+ 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
+ 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
+ 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
+ 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
+ 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
+ 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
+ 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
+ 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
+ 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
+ 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
+ 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
+ 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
+ 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
+ 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
+ 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
+ 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
+ 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
+ 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
+ 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
+ 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
+ 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
+ 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
+ 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
+ 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
+ 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
+ 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
+ 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
+ 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
+ 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
+ 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
+ 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
+ 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
+ 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
+ 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
+ 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
+ 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
+ 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
+ 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
+ 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
+ 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
+ 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
+ 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
+ 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
+ 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
+ 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
+ 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
+ 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+ 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS
+ 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS
+ 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
+ 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
+ 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
+ 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
+ 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
+ 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
+ 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
+ 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
+ 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
+ 2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
+ 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
+ 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
+ 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
+ 2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
+ 2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
+ 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
+ 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
+ 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
+ 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
+ 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
+ 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
+ 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
+ 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
+ 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
+ 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
+ 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
+ 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
+ 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
+ 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
+ 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
+ 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
+ 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
+ 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
+ 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
+ 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
+ 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
+ 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
+ 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
+ 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+ 1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
+ 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
+ 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
+ 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS
+ 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS
+ 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
+ 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
+ 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
+ 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
+ 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
+ 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
+ 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
+ 2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
+ 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
+ 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
+ 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
+ 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
+ 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
+ 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
+ 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
+ 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
+ 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+ 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
+ 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
+ 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
+ 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
+ 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
+ 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
+ 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
+ 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
+ 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
+ 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
+ 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
+ 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
+ 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
+ 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
+ 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
+ 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
+ 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
+ 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
+ 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
+ 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
+ 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
+ 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
+ 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
+ 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
+ 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
+ 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
+ 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
+ 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
+ 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
+ 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
+ 2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
+ 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
+ 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
+ 2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
+ 2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
+ 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
+ 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
+ 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
+ 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
+ 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
+ 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
+ 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
+ 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
+ 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
+ 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
+ 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
+ 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
+ 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
+ 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
+ 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
+ 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
+ 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
+ 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
+ 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
+ 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
+ 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
+ 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
+ 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
+ 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
+ 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
+ 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
+ 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
+ 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+ 1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
+ 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+ 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
+ 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
+ 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
+ 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+ 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS
+ 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
+ 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+ 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS
+ 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
+ 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
+ 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS
+ 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
+ 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
+ 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS
+ 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS
+ 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
+ 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
+ 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
+ 1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
+ 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
+ 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+ 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS
+ 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
+ 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+ 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
+ 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
+ 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+ 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
+ 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
+ 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+ 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
+ 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
+ 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+ 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
+ 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+ 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
+ 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
+ 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+ 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+ 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+ 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
+ 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+ 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
+ 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS
+ 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS
+ 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS
+ 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
+ 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS
+ 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+ 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS
+ 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS
+ 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
+ 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
+ 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
+ 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
+ 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
+ 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
+ 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
+ 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
+ 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
+ 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
+ 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
+ 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
+ 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
+ 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
+ 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+ 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
+ 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
+ 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
+ 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
+ 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+ 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
+ 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
+ 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
+ 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
+ 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
+ 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
+ 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
+ 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
+ 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
+ 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
+ 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
+ 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
+ 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
+ 2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
+ 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
+ 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
+ 2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
+ 2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
+ 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
+ 2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
+ 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
+ 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
+ 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
+ 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
+ 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
+ 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
+ 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
+ 2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
+ 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
+ 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
+ 2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
+ 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
+ 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
+ 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
+ 2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
+ 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
+ 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
+ 2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
+ 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
+ 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
+ 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+ 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
+ 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
+ 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
+ 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS
+ 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
+ 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
+ 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
+ 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
+ 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
+ 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
+ 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
+ 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
+ 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
+ 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
+ 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
+ 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
+ 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
+ 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
+ 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
+ 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+ 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
+ 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
+ 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
+ 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+ 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
+ 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
+ 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
+ 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
+ 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
+ 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
+ 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
+ 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
+ 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
+ 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
+ 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
+ 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
+ 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
+ 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
+ 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
+ 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
+ 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
+ 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
+ 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
+ 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
+ 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
+ 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
+ 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+ 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
+ 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
+ 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+ 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
+ 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
+ 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
+ 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
+ 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
+ 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
+ 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
+ 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
+ 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
+ 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
+ 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
+ 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
+ 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
+ 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+ 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
+ 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
+ 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+ 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+ 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
+ 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
+ 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
+ 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+ 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
+ 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
+ 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
+ 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
+ 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
+ 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
+ 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
+ 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
+ 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
+ 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
+ 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
+ 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
+ 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
+ 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
+ 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
+ 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
+ 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
+ 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
+ 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
+ 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
+ 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
+ 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
+ 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
+ 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
+ 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
+ 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
+ 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
+ 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
+ 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+ 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
+ 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
+ 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
+ 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
+ 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
+ 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
+ 2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
+ 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+ 2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
+ 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
+ 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
+ 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
+ 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
+ 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
+ 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
+ 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
+ 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
+ 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
+ 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
+ 2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
+ 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
+ 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
+ 2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
+ 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
+ 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
+ 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
+ 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
+ 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
+ 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
+ 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
+ 2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
+ 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
+ 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
+ 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
+ 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
+ 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
+ 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
+ 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
+ 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
+ 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
+ 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
+ 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
+ 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
+ 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
+ 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
+ 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
+ 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
+ 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
+ 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
+ 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
+ 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
+ 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
+ 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
+ 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
+ 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
+ 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
+ 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
+ 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
+ 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
+ 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
+ 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
+ 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
+ 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
+ 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
+ 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS
+ 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
+ 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
+ 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
+ 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
+ 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
+ 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
+ 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
+ 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
+ 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
+ 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
+ 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
+ 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
+ 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
+ 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
+ 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
+ 2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
+ 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
+ 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
+ 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
+ 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
+ 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
+ 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
+ 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
+ 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
+ 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
+ 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
+ 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
+ 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
+ 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
+ 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
+ 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
+ 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
+ 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
+ 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
+ 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
+ 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
+ 1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
+ 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS
+ 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
+ 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
+ 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
+ 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+ 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS
+ 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
+ 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
+ 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
+ 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
+ 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
+ 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
+ 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
+ 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
+ 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
+ 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
+ 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
+ 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
+ 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
+ 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
+ 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+ 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
+ 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
+ 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
+ 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
+ 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
+ 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
+ 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
+ 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
+ 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
+ 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
+ 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
+ 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
+ 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
+ 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
+ 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
+ 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
+ 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
+ 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
+ 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
+ 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
+ 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
+ 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
+ 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
+ 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+ 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
+ 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
+ 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
+ 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
+ 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+ 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
+ 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
+ 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
+ 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
+ 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
+ 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS
+ 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
+ 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
+ 2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
+ 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
+ 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
+ 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
+ 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
+ 2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
+ 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
+ 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
+ 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
+ 2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
+ 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
+ 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
+ 2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
+ 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
+ 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
+ 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
+ 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
+ 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
+ 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
+ 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
+ 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
+ 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS
+ 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
+ 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
+ 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
+ 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
+ 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
+ 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
+ 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
+ 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
+ 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
+ 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
+ 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
+ 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
+ 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
+ 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
+ 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
+ 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
+ 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
+ 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
+ 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
+ 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
+ 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
+ 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
+ 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
+ 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
+ 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
+ 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
+ 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
+ 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
+ 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
+ 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
+ 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
+ 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
+ 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
+ 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
+ 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
+ 2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
+ 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
+ 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
+ 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
+ 2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
+ 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
+ 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+ 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
+ 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
+ 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
+ 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
+ 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
+ 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
+ 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
+ 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
+ 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
+ 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
+ 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
+ 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
+ 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
+ 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
+ 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
+ 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
+ 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
+ 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
+ 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
+ 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
+ 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
+ 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
+ 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
+ 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
+ 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
+ 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
+ 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
+ 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
+ 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
+ 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
+ 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
+ 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
+ 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+ 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
+ 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
+ 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
+ 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
+ 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
+ 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
+ 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
+ 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
+ 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
+ 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+ 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
+ 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
+ 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
+ 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
+ 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
+ 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
+ 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
+ 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
+ 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
+ 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
+ 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
+ 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
+ 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
+ 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
+ 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
+ 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
+ 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
+ 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
+ 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
+ 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
+ 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
+ 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
+ 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
+ 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
+ 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
+ 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
+ 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+ 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
+ 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
+ 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
+ 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
+ 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
+ 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
+ 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
+ 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
+ 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
+ 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
+ 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
+ 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
+ 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
+ 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
+ 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
+ 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
+ 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
+ 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
+ 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
+ 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
+ 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
+ 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
+ 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
+ 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
+ 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
+ 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
+ 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
+ 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
+ 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
+ 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
+ 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
+ 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
+ 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
+ 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
+ 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
+ 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
+ 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
+ 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
+ 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
+ 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
+ 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
+ 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
+ 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
+ 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
+ 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
+ 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
+ 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
+ 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
+ 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
+ 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
+ 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
+ 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
+ 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
+ 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
+ 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
+ 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
+ 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
+ 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
+ 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
+ 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
+ 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
+ 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
+ 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
+ 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
+ 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
+ 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
+ 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
+ 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
+ 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
+ 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
+ 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
+ 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
+ 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
+ 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
+ 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
+ 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
+ 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
+ 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
+ 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
+ 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
+ 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
+ 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
+ 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
+ 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
+ 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
+ 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
+ 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
+ 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
+ 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
+ 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
+ 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
+ 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
+ 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
+ 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
+ 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
+ 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
+ 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
+ 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
+ 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
+ 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
+ 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
+ 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
+ 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
+ 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
+ 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
+ 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
+ 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
+ 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
+ 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
+ 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
+ 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
+ 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
+ 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
+ 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
+ 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
+ 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
+ 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
+ 1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
+ 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
+ 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+ 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
+ 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
+ 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
+ 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
+ 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
+ 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+ 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
+ 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
+ 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
+ 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
+ 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
+ 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
+ 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
+ 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
+ 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
+ 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
+ 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
+ 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
+ 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
+ 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
+ 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
+ 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
+ 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
+ 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
+ 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS
+ 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
+ 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
+ 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+ 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS
+ 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
+ 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
+ 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
+ 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+ 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
+ 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
+ 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
+ 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
+ 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
+ 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
+ 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
+ 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
+ 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+ 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
+ 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS
+ 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
+ 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
+ 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
+ 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+ 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
+ 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+ 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
+ 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
+ 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
+ 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
+ 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+ 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
+ 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+ 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
+ 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+ 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
+ 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
+ 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
+ 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS
+ 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS
+ 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
+ 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
+ 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS
+ 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
+ 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS
+ 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
+ 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
+ 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
+ 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
+ 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
+ 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
+ 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
+ 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
+ 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
+ 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
+ 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
+ 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
+ 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
+ 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
+ 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
+ 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
+ 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
+ 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
+ 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
+ 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
+ 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
+ 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
+ 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+ 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
+ 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
+ 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
+ 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+ 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
+ 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
+ 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
+ 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
+ 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
+ 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
+ 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+ 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
+ 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
+ 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
+ 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
+ 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
+ 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
+ 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
+ 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
+ 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
+ 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
+ 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS
+ 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
+ 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
+ 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
+ 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
+ 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
+ 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
+ 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS
+ 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
+ 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
+ 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
+ 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
+ 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
+ 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
+ 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
+ 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
+ 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
+ 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
+ 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
+ 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
+ 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
+ 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
+ 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
+ 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
+ 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
+ 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
+ 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
+ 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS
+ 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
+ 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
+ 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
+ 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+ 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
+ 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
+ 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
+ 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
+ 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
+ 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
+ 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
+ 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
+ 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
+ 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
+ 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
+ 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
+ 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
+ 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
+ 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
+ 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
+ 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
+ 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
+ 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
+ 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
+ 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
+ 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
+ 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
+ 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
+ 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
+ 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
+ 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
+ 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
+ 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
+ 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
+ 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
+ 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
+ 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
+ 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
+ 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
+ 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
+ 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
+ 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
+ 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
+ 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
+ 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
+ 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
+ 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
+ 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
+ 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
+ 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
+ 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
+ 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
+ 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
+ 2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
+ 2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+ 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
+ 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
+ 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
+ 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
+ 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
+ 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
+ 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
+ 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
+ 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
+ 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
+ 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
+ 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
+ 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
+ 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
+ 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
+ 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
+ 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
+ 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+ 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
+ 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
+ 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
+ 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
+ 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
+ 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
+ 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+ 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
+ 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
+ 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
+ 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
+ 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
+ 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
+ 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
+ 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
+ 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
+ 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
+ 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
+ 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
+ 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
+ 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
+ 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
+ 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
+ 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
+ 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+ 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
+ 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
+ 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
+ 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
+ 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
+ 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
+ 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
+ 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
+ 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
+ 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
+ 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
+ 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
+ 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
+ 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
+ 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
+ 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
+ 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
+ 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
+ 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
+ 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
+ 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
+ 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
+ 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
+ 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
+ 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
+ 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
+ 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
+ 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
+ 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
+ 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
+ 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
+ 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
+ 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
+ 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
+ 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
+ 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
+ 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
+ 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
+ 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
+ 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
+ 2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
+ 2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
+ 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
+ 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
+ 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
+ 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
+ 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
+ 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
+ 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
+ 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
+ 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+ 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
+ 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
+ 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
+ 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
+ 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
+ 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
+ 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
+ 2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
+ 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
+ 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
+ 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
+ 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
+ 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
+ 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
+ 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
+ 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
+ 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
+ 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
+ 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
+ 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
+ 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
+ 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
+ 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
+ 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
+ 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
+ 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
+ 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
+ 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
+ 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
+ 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
+ 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
+ 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
+ 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
+ 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
+ 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
+ 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
+ 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
+ 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
+ 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
+ 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+ 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
+ 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
+ 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
+ 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
+ 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
+ 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
+ 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
+ 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
+ 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
+ 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
+ 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
+ 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
+ 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
+ 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
+ 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
+ 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
+ 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
+ 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
+ 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
+ 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
+ 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
+ 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
+ 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
+ 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
+ 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
+ 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
+ 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
+ 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
+ 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
+ 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
+ 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
+ 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
+ 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
+ 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
+ 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
+ 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
+ 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
+ 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
+ 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
+ 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
+ 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
+ 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
+ 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
+ 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
+ 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
+ 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
+ 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
+ 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
+ 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
+ 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
+ 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+ 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
+ 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
+ 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
+ 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
+ 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
+ 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
+ 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
+ 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
+ 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
+ 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
+ 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
+ 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
+ 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
+ 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
+ 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
+ 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
+ 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
+ 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
+ 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
+ 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
+ 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
+ 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
+ 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
+ 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
+ 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
+ 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
+ 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+ 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
+ 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
+ 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
+ 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
+ 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+ 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
+ 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
+ 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
+ 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
+ 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS
+ 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
+ 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
+ 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+ 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS
+ 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
+ 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
+ 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
+ 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
+ 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
+ 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS
+ 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
+ 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
+ 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
+ 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
+ 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
+ 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
+ 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
+ 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
+ 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
+ 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
+ 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
+ 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
+ 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
+ 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
+ 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+ 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
+ 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
+ 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
+ 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+ 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
+ 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
+ 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+ 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
+ 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS
+ 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS
+ 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
+ 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS
+ 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
+ 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
+ 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
+ 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
+ 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
+ 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
+ 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
+ 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
+ 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
+ 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
+ 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
+ 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
+ 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
+ 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
+ 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
+ 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
+ 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
+ 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
+ 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
+ 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
+ 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
+ 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
+ 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
+ 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
+ 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
+ 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
+ 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
+ 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
+ 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
+ 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
+ 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
+ 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
+ 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
+ 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
+ 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
+ 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
+ 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
+ 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
+ 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
+ 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
+ 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
+ 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
+ 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
+ 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
+ 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
+ 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
+ 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
+ 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
+ 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
+ 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
+ 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
+ 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
+ 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
+ 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
+ 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
+ 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
+ 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
+ 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
+ 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
+ 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
+ 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
+ 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
+ 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
+ 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
+ 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
+ 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
+ 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
+ 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
+ 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
+ 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
+ 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
+ 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
+ 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
+ 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
+ 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
+ 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
+ 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
+ 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
+ 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
+ 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
+ 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
+ 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
+ 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
+ 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
+ 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
+ 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
+ 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
+ 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
+ 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
+ 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
+ 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
+ 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
+ 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
+ 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
+ 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
+ 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
+ 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
+ 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
+ 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
+ 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
+ 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
+ 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
+ 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
+ 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
+ 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
+ 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
+ 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
+ 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
+ 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
+ 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
+ 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
+ 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
+ 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
+ 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
+ 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
+ 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
+ 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
+ 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
+ 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
+ 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
+ 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
+ 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
+ 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
+ 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
+ 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
+ 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
+ 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
+ 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
+ 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
+ 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
+ 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
+ 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
+ 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
+ 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
+ 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
+ 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
+ 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
+ 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
+ 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
+ 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+ 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
+ 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+ 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
+ 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
+ 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
+ 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
+ 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
+ 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
+ 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
+ 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
+ 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
+ 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
+ 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
+ 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
+ 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
+ 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
+ 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
+ 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
+ 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
+ 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
+ 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
+ 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
+ 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
+ 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
+ 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
+ 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
+ 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
+ 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
+ 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
+ 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
+ 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
+ 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
+ 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
+ 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
+ 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
+ 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
+ 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
+ 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
+ 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
+ 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
+ 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
+ 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
+ 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
+ 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
+ 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
+ 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
+ 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
+ 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
+ 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
+ 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
+ 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
+ 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
+ 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
+ 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
+ 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
+ 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
+ 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
+ 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
+ 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
+ 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
+ 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
+ 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
+ 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
+ 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
+ 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
+ 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
+ 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
+ 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
+ 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
+ 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
+ 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
+ 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+ 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
+ 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
+ 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
+ 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
+ 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
+ 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
+ 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
+ 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
+ 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
+ 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
+ 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
+ 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
+ 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
+ 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
+ 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
+ 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
+ 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
+ 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+ 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
+ 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
+ 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
+ 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
+ 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
+ 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
+ 2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
+ 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
+ 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
+ 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
+ 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
+ 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
+ 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
+ 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
+ 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
+ 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
+ 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
+ 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+ 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
+ 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
+ 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
+ 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
+ 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
+ 2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
+ 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
+ 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
+ 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
+ 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
+ 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
+ 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
+ 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
+ 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
+ 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
+ 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+ 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
+ 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
+ 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
+ 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
+ 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
+ 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
+ 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS
+ 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
+ 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
+ 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
+ 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS
+ 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+ 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS
+ 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
+ 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
+ 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+ 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS
+ 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
+ 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+ 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
+ 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
+ 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
+ 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
+ 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
+ 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
+ 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
+ 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
+ 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
+ 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
+ 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
+ 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+ 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
+ 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+ 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
+ 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
+ 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
+ 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+ 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
+ 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS
+ 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
+ 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
+ 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
+ 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
+ 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
+ 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
+ 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
+ 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
+ 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
+ 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
+ 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
+ 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
+ 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
+ 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
+ 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
+ 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
+ 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
+ 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
+ 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
+ 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
+ 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
+ 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
+ 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
+ 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+ 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
+ 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
+ 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
+ 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
+ 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
+ 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
+ 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
+ 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
+ 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
+ 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
+ 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
+ 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
+ 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
+ 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
+ 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
+ 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+ 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
+ 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
+ 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
+ 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
+ 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
+ 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
+ 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
+ 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
+ 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
+ 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
+ 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+ 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
+ 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
+ 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
+ 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
+ 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
+ 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
+ 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
+ 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
+ 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
+ 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
+ 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
+ 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
+ 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
+ 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
+ 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
+ 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
+ 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
+ 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
+ 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
+ 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
+ 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
+ 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
+ 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+ 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
+ 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
+ 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
+ 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
+ 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
+ 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
+ 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
+ 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
+ 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
+ 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
+ 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
+ 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
+ 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
+ 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
+ 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
+ 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
+ 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
+ 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
+ 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
+ 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
+ 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
+ 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
+ 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
+ 3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
+ 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
+ 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
+ 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
+ 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
+ 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
+ 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
+ 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
+ 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
+ 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
+ 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
+ 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
+ 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
+ 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
+ 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
+ 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
+ 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
+ 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
+ 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
+ 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
+ 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
+ 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
+ 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
+ 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
+ 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
+ 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
+ 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
+ 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
+ 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
+ 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
+ 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
+ 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
+ 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
+ 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
+ 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
+ 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
+ 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
+ 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
+ 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
+ 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
+ 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
+ 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
+ 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
+ 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
+ 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
+ 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
+ 1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
+ 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
+ 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
+ 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
+ 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
+ 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
+ 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
+ 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
+ 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
+ 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
+ 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
+ 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
+ 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
+ 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
+ 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
+ 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
+ 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
+ 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
+ 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
+ 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
+ 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
+ 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
+ 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
+ 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
+ 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
+ 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
+ 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
+ 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
+ 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
+ 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
+ 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
+ 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
+ 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
+ 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
+ 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
+ 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
+ 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
+ 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
+ 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
+ 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
+ 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
+ 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
+ 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
+ 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
+ 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
+ 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
+ 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
+ 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
+ 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
+ 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
+ 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
+ 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
+ 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
+ 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
+ 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
+ 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
+ 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
+ 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
+ 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
+ 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
+ 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
+ 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
+ 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
+ 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
+ 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
+ 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
+ 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
+ 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
+ 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
+ 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
+ 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
+ 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
+ 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
+ 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
+ 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
+ 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
+ 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
+ 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
+ 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
+ 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
+ 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
+ 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
+ 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
+ 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
+ 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
+ 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
+ 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
+ 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
+ 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
+ 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
+ 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
+ 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
+ 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
+ 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
+ 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
+ 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
+ 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
+ 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
+ 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
+ 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
+ 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
+ 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
+ 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
+ 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
+ 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
+ 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
+ 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
+ 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
+ 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
+ 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
+ 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
+ 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
+ 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
+ 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
+ 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
+ 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
+ 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
+ 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
+ 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
+ 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
+ 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
+ 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
+ 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
+ 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
+ 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
+ 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
+ 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
+ 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
+ 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
+ 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
+ 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
+ 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
+ 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
+ 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
+ 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
+ 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
+ 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
+ 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
+ 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
+ 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
+ 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
+ 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
+ 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
+ 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
+ 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
+ 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
+ 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
+ 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
+ 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
+ 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
+ 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
+ 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
+ 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
+ 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
+ 2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
+ 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
+ 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
+ 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
+ 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
+ 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
+ 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
+ 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
+ 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
+ 2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
+ 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
+ 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
+ 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
+ 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
+ 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
+ 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
+ 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
+ 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
+ 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
+ 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
+ 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
+ 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
+ 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
+ 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
+ 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
+ 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
+ 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
+ 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
+ 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
+ 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
+ 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
+ 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
+ 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
+ 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
+ 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
+ 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
+ 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
+ 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
+ 2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
+ 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
+ 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
+ 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
+ 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
+ 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
+ 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
+ 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
+ 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
+ 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
+ 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
+ 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
+ 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
+ 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
+ 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
+ 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
+ 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
+ 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
+ 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
+ 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
+ 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
+ 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
+ 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
+ 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
+ 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
+ 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
+ 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
+ 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
+ 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
+ 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
+ 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
+ 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
+ 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
+ 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
+ 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
+ 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
+ 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
+ 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
+ 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
+ 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
+ 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
+ 94817590U, // <5,4,7,6>: Cost 1 vrev RHS
+ 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
+ 94965064U, // <5,4,7,u>: Cost 1 vrev RHS
+ 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
+ 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
+ 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
+ 2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
+ 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
+ 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
+ 94825783U, // <5,4,u,6>: Cost 1 vrev RHS
+ 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
+ 94973257U, // <5,4,u,u>: Cost 1 vrev RHS
+ 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
+ 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
+ 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
+ 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
+ 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
+ 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
+ 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
+ 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
+ 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
+ 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
+ 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
+ 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
+ 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
+ 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
+ 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
+ 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
+ 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
+ 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
+ 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
+ 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
+ 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
+ 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
+ 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
+ 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
+ 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
+ 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
+ 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
+ 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
+ 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
+ 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
+ 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
+ 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
+ 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
+ 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
+ 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
+ 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
+ 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
+ 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
+ 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+ 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
+ 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
+ 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
+ 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
+ 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
+ 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
+ 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
+ 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS
+ 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
+ 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
+ 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS
+ 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
+ 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
+ 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
+ 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
+ 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
+ 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
+ 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
+ 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
+ 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
+ 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
+ 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
+ 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
+ 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
+ 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
+ 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
+ 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
+ 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
+ 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
+ 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
+ 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
+ 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
+ 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS
+ 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
+ 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
+ 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS
+ 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
+ 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
+ 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
+ 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
+ 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
+ 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
+ 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
+ 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
+ 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
+ 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
+ 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
+ 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
+ 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
+ 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
+ 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
+ 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+ 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
+ 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
+ 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
+ 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
+ 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
+ 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
+ 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
+ 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
+ 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
+ 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
+ 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
+ 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
+ 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
+ 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
+ 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
+ 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
+ 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
+ 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
+ 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
+ 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
+ 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
+ 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
+ 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
+ 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
+ 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
+ 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
+ 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
+ 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
+ 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
+ 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
+ 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
+ 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
+ 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
+ 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
+ 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
+ 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
+ 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
+ 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
+ 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
+ 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
+ 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
+ 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
+ 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
+ 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+ 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
+ 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
+ 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
+ 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
+ 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS
+ 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
+ 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
+ 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
+ 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS
+ 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
+ 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
+ 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
+ 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
+ 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
+ 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
+ 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
+ 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
+ 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
+ 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
+ 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
+ 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
+ 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
+ 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
+ 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
+ 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
+ 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
+ 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
+ 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
+ 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
+ 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
+ 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
+ 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
+ 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
+ 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
+ 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
+ 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
+ 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
+ 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
+ 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
+ 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
+ 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
+ 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
+ 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
+ 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
+ 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
+ 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
+ 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
+ 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
+ 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+ 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
+ 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
+ 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
+ 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
+ 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
+ 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
+ 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
+ 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
+ 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
+ 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
+ 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
+ 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
+ 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
+ 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
+ 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
+ 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
+ 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+ 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
+ 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
+ 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
+ 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
+ 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
+ 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
+ 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
+ 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
+ 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
+ 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
+ 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
+ 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
+ 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
+ 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
+ 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
+ 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
+ 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
+ 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
+ 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
+ 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
+ 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
+ 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
+ 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
+ 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
+ 2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
+ 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
+ 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
+ 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
+ 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
+ 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
+ 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
+ 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
+ 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
+ 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+ 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+ 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
+ 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+ 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
+ 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
+ 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
+ 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
+ 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
+ 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
+ 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
+ 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
+ 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
+ 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
+ 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
+ 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
+ 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
+ 2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
+ 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
+ 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
+ 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
+ 1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
+ 2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
+ 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
+ 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+ 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
+ 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+ 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
+ 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
+ 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+ 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
+ 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
+ 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
+ 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+ 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS
+ 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+ 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
+ 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS
+ 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
+ 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+ 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
+ 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
+ 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
+ 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
+ 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+ 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+ 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
+ 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
+ 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
+ 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 118708378U, // <5,u,7,6>: Cost 1 vrev RHS
+ 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
+ 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS
+ 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS
+ 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
+ 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+ 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
+ 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS
+ 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS
+ 118716571U, // <5,u,u,6>: Cost 1 vrev RHS
+ 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
+ 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS
+ 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
+ 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
+ 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
+ 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
+ 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
+ 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
+ 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
+ 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
+ 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
+ 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
+ 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
+ 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
+ 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
+ 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
+ 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
+ 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
+ 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
+ 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
+ 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
+ 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
+ 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
+ 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
+ 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
+ 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
+ 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
+ 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
+ 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
+ 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
+ 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
+ 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
+ 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
+ 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
+ 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
+ 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
+ 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
+ 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
+ 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
+ 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
+ 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
+ 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
+ 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
+ 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
+ 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
+ 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
+ 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
+ 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
+ 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
+ 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
+ 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
+ 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
+ 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
+ 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
+ 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
+ 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
+ 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
+ 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
+ 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
+ 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
+ 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
+ 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
+ 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
+ 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
+ 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
+ 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
+ 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
+ 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
+ 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
+ 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
+ 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
+ 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
+ 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
+ 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
+ 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
+ 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
+ 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+ 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
+ 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
+ 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
+ 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
+ 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
+ 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
+ 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
+ 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
+ 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
+ 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
+ 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
+ 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
+ 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
+ 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
+ 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
+ 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
+ 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
+ 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
+ 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
+ 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
+ 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
+ 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
+ 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
+ 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
+ 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
+ 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
+ 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
+ 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
+ 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
+ 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
+ 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
+ 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
+ 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
+ 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
+ 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
+ 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
+ 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
+ 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
+ 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
+ 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
+ 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
+ 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
+ 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
+ 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
+ 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
+ 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
+ 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
+ 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
+ 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
+ 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
+ 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
+ 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
+ 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
+ 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
+ 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
+ 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
+ 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
+ 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
+ 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
+ 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
+ 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
+ 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
+ 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
+ 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
+ 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
+ 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
+ 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
+ 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
+ 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
+ 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
+ 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
+ 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
+ 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
+ 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
+ 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
+ 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
+ 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
+ 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
+ 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
+ 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
+ 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
+ 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
+ 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
+ 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
+ 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
+ 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
+ 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
+ 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
+ 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
+ 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
+ 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
+ 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
+ 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
+ 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
+ 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
+ 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
+ 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
+ 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
+ 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
+ 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
+ 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
+ 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
+ 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
+ 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
+ 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
+ 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
+ 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
+ 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
+ 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
+ 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
+ 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
+ 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
+ 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
+ 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
+ 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
+ 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
+ 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
+ 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
+ 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
+ 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
+ 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
+ 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
+ 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
+ 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
+ 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
+ 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
+ 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
+ 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
+ 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
+ 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+ 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
+ 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
+ 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
+ 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
+ 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
+ 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
+ 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
+ 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
+ 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
+ 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
+ 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
+ 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
+ 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
+ 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS
+ 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
+ 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
+ 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+ 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
+ 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS
+ 1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
+ 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
+ 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
+ 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS
+ 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
+ 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
+ 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
+ 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+ 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS
+ 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
+ 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
+ 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
+ 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
+ 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
+ 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
+ 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
+ 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
+ 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
+ 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
+ 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
+ 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
+ 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
+ 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
+ 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
+ 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
+ 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+ 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
+ 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
+ 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
+ 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
+ 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
+ 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
+ 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
+ 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+ 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
+ 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
+ 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
+ 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
+ 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
+ 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
+ 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
+ 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
+ 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
+ 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
+ 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
+ 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
+ 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
+ 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
+ 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
+ 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
+ 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
+ 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
+ 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
+ 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
+ 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
+ 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
+ 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
+ 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
+ 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
+ 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
+ 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
+ 2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
+ 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
+ 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
+ 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
+ 2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
+ 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
+ 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
+ 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
+ 2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
+ 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
+ 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
+ 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
+ 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
+ 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
+ 2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
+ 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
+ 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
+ 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
+ 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
+ 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
+ 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
+ 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
+ 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
+ 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
+ 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
+ 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
+ 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
+ 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
+ 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
+ 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
+ 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
+ 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
+ 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
+ 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
+ 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
+ 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
+ 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
+ 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
+ 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
+ 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
+ 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
+ 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
+ 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
+ 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
+ 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
+ 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
+ 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
+ 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
+ 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
+ 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
+ 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
+ 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
+ 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
+ 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
+ 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
+ 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
+ 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
+ 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
+ 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
+ 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
+ 2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
+ 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
+ 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
+ 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
+ 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
+ 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
+ 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
+ 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+ 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
+ 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+ 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
+ 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
+ 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
+ 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
+ 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
+ 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
+ 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+ 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
+ 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
+ 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
+ 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
+ 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
+ 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
+ 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
+ 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
+ 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
+ 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
+ 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
+ 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
+ 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
+ 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
+ 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
+ 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
+ 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
+ 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
+ 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
+ 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
+ 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
+ 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
+ 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+ 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
+ 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
+ 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
+ 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
+ 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
+ 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
+ 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
+ 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
+ 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
+ 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
+ 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
+ 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
+ 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
+ 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
+ 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
+ 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
+ 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
+ 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
+ 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
+ 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
+ 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
+ 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
+ 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
+ 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
+ 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
+ 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
+ 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
+ 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
+ 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
+ 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
+ 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
+ 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
+ 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
+ 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
+ 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
+ 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
+ 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
+ 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
+ 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
+ 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
+ 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
+ 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
+ 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
+ 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
+ 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
+ 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
+ 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
+ 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
+ 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
+ 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
+ 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
+ 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
+ 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
+ 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
+ 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
+ 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
+ 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
+ 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
+ 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
+ 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
+ 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
+ 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
+ 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
+ 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
+ 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
+ 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
+ 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
+ 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
+ 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
+ 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
+ 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
+ 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
+ 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
+ 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
+ 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
+ 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
+ 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
+ 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
+ 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
+ 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
+ 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
+ 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
+ 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
+ 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
+ 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
+ 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
+ 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
+ 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
+ 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
+ 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
+ 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
+ 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
+ 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
+ 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
+ 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
+ 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
+ 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
+ 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
+ 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
+ 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
+ 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
+ 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
+ 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
+ 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
+ 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
+ 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
+ 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
+ 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
+ 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
+ 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
+ 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
+ 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
+ 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
+ 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
+ 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
+ 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
+ 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
+ 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+ 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
+ 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
+ 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
+ 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
+ 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
+ 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
+ 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
+ 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
+ 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
+ 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
+ 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
+ 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
+ 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
+ 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
+ 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
+ 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
+ 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS
+ 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
+ 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS
+ 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
+ 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
+ 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
+ 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
+ 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
+ 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
+ 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
+ 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS
+ 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS
+ 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
+ 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
+ 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
+ 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+ 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
+ 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
+ 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS
+ 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS
+ 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS
+ 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
+ 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
+ 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+ 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
+ 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+ 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
+ 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
+ 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
+ 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
+ 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+ 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
+ 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
+ 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
+ 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+ 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+ 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
+ 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
+ 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
+ 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+ 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+ 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+ 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
+ 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
+ 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+ 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+ 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
+ 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
+ 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+ 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
+ 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
+ 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+ 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
+ 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
+ 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
+ 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
+ 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
+ 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
+ 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
+ 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
+ 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS
+ 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS
+ 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
+ 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
+ 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+ 1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
+ 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS
+ 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+ 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
+ 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+ 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+ 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
+ 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
+ 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+ 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
+ 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
+ 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+ 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
+ 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+ 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+ 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
+ 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
+ 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
+ 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+ 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+ 1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
+ 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
+ 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS
+ 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
+ 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS
+ 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
+ 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
+ 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
+ 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+ 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
+ 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+ 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
+ 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
+ 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
+ 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS
+ 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS
+ 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
+ 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
+ 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
+ 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS
+ 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
+ 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
+ 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
+ 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS
+ 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
+ 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
+ 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS
+ 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+ 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
+ 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS
+ 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS
+ 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS
+ 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS
+ 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
+ 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
+ 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
+ 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
+ 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
+ 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
+ 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
+ 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
+ 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
+ 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
+ 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS
+ 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
+ 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
+ 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
+ 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
+ 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
+ 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
+ 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
+ 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
+ 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
+ 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
+ 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
+ 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
+ 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
+ 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
+ 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
+ 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
+ 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
+ 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
+ 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
+ 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
+ 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
+ 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
+ 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
+ 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
+ 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
+ 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
+ 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
+ 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
+ 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
+ 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
+ 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+ 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
+ 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
+ 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
+ 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
+ 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
+ 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
+ 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
+ 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
+ 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
+ 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
+ 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
+ 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
+ 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
+ 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
+ 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
+ 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
+ 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
+ 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
+ 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
+ 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
+ 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
+ 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
+ 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
+ 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
+ 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
+ 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS
+ 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
+ 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
+ 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
+ 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
+ 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
+ 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS
+ 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
+ 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
+ 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
+ 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
+ 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
+ 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
+ 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
+ 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
+ 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
+ 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
+ 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
+ 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
+ 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
+ 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
+ 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
+ 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
+ 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
+ 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
+ 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
+ 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
+ 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
+ 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
+ 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
+ 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
+ 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
+ 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
+ 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
+ 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
+ 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
+ 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
+ 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
+ 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
+ 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
+ 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
+ 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
+ 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
+ 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
+ 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
+ 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
+ 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
+ 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
+ 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
+ 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
+ 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
+ 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
+ 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
+ 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
+ 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
+ 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
+ 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
+ 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
+ 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
+ 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
+ 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
+ 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
+ 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
+ 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
+ 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
+ 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
+ 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
+ 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
+ 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
+ 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
+ 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
+ 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
+ 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
+ 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
+ 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
+ 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
+ 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
+ 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
+ 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
+ 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
+ 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
+ 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
+ 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
+ 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
+ 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
+ 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
+ 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
+ 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
+ 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
+ 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
+ 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
+ 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
+ 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
+ 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
+ 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
+ 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
+ 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
+ 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
+ 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
+ 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
+ 2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
+ 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
+ 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
+ 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
+ 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
+ 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
+ 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
+ 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
+ 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
+ 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
+ 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
+ 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
+ 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
+ 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
+ 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
+ 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
+ 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
+ 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
+ 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
+ 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
+ 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
+ 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
+ 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
+ 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
+ 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
+ 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
+ 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
+ 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
+ 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
+ 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
+ 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
+ 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
+ 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
+ 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
+ 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
+ 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
+ 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
+ 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
+ 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
+ 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
+ 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
+ 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
+ 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
+ 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
+ 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
+ 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
+ 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
+ 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
+ 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
+ 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
+ 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
+ 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
+ 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
+ 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
+ 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
+ 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
+ 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
+ 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
+ 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
+ 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
+ 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
+ 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
+ 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
+ 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
+ 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
+ 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
+ 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
+ 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
+ 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
+ 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
+ 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
+ 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
+ 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
+ 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
+ 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
+ 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
+ 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
+ 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
+ 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
+ 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
+ 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
+ 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
+ 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
+ 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
+ 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
+ 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
+ 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
+ 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
+ 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
+ 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
+ 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
+ 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
+ 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
+ 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
+ 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
+ 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
+ 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
+ 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
+ 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
+ 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
+ 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
+ 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
+ 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
+ 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
+ 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
+ 2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
+ 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
+ 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
+ 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
+ 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
+ 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
+ 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
+ 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
+ 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
+ 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
+ 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
+ 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
+ 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
+ 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
+ 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
+ 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
+ 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
+ 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
+ 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
+ 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
+ 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
+ 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
+ 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
+ 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
+ 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
+ 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
+ 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
+ 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
+ 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
+ 1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
+ 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
+ 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
+ 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
+ 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
+ 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
+ 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
+ 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
+ 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
+ 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
+ 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+ 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
+ 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
+ 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
+ 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
+ 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
+ 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
+ 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
+ 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
+ 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
+ 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
+ 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
+ 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
+ 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
+ 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
+ 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
+ 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
+ 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
+ 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
+ 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
+ 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
+ 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
+ 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
+ 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
+ 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
+ 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
+ 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
+ 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
+ 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
+ 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
+ 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
+ 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
+ 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
+ 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
+ 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
+ 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
+ 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
+ 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
+ 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
+ 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
+ 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
+ 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS
+ 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
+ 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS
+ 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
+ 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
+ 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
+ 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
+ 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
+ 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
+ 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+ 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
+ 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
+ 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
+ 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
+ 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
+ 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
+ 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+ 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
+ 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
+ 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
+ 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
+ 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
+ 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
+ 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
+ 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
+ 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
+ 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS
+ 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
+ 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS
+ 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
+ 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
+ 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
+ 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
+ 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
+ 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
+ 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
+ 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
+ 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
+ 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
+ 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
+ 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
+ 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
+ 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
+ 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
+ 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
+ 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
+ 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
+ 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
+ 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
+ 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
+ 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
+ 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
+ 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
+ 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
+ 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
+ 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
+ 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
+ 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
+ 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
+ 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
+ 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
+ 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
+ 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
+ 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
+ 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
+ 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
+ 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
+ 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
+ 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
+ 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
+ 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
+ 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
+ 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
+ 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
+ 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+ 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
+ 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
+ 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
+ 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
+ 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
+ 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
+ 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+ 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+ 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
+ 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
+ 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
+ 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
+ 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
+ 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
+ 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
+ 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
+ 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
+ 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
+ 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
+ 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
+ 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
+ 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
+ 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
+ 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
+ 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
+ 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
+ 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
+ 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
+ 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
+ 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
+ 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
+ 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
+ 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
+ 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
+ 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
+ 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
+ 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
+ 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
+ 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
+ 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
+ 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
+ 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
+ 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
+ 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
+ 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
+ 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
+ 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
+ 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
+ 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
+ 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
+ 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
+ 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
+ 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
+ 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
+ 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
+ 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
+ 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
+ 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
+ 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
+ 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
+ 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
+ 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
+ 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
+ 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
+ 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
+ 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
+ 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
+ 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
+ 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
+ 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
+ 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
+ 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
+ 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
+ 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
+ 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
+ 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
+ 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
+ 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
+ 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
+ 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
+ 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
+ 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
+ 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
+ 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
+ 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
+ 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
+ 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
+ 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
+ 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
+ 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
+ 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
+ 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
+ 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
+ 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
+ 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
+ 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
+ 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
+ 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
+ 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
+ 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
+ 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
+ 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
+ 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
+ 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
+ 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
+ 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
+ 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
+ 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
+ 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
+ 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
+ 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
+ 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
+ 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
+ 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
+ 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
+ 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
+ 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
+ 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
+ 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
+ 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
+ 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
+ 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
+ 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
+ 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
+ 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
+ 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
+ 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
+ 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
+ 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
+ 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
+ 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
+ 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
+ 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
+ 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
+ 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
+ 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
+ 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
+ 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
+ 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
+ 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
+ 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
+ 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
+ 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
+ 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
+ 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
+ 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
+ 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
+ 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
+ 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
+ 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
+ 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
+ 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
+ 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
+ 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
+ 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
+ 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
+ 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
+ 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
+ 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
+ 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
+ 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
+ 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
+ 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
+ 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
+ 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
+ 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
+ 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
+ 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
+ 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS
+ 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
+ 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
+ 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
+ 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
+ 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
+ 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
+ 1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
+ 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS
+ 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+ 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
+ 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
+ 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
+ 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
+ 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
+ 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
+ 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
+ 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
+ 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+ 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS
+ 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
+ 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
+ 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
+ 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
+ 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS
+ 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+ 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
+ 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+ 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
+ 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+ 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
+ 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
+ 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
+ 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
+ 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+ 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
+ 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+ 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
+ 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
+ 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
+ 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
+ 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
+ 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
+ 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
+ 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
+ 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
+ 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+ 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
+ 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
+ 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
+ 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
+ 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
+ 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
+ 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
+ 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
+ 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
+ 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS
+ 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
+ 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS
+ 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
+ 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
+ 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
+ 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
+ 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+ 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
+ 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
+ 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
+ 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
+ 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
+ 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
+ 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
+ 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
+ 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
+ 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
+ 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
+ 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS
+ 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
+ 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
+ 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS
+ 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
+ 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
+ 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
+ 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS
+ 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS
+ 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS
+ 135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS
+ 1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+ 1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+ 2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+ 1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
+ 2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
+ 2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
+ 2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+ 135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS
+ 1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
+ 1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS
+ 537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS
+ 2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
+ 1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
+ 2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
+ 1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
+ 2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
+ 537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS
+ 1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+ 2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+ 1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
+ 2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+ 1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+ 2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+ 2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
+ 2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+ 1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+ 2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+ 2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+ 72589981U, // <u,0,3,2>: Cost 1 vrev LHS
+ 2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
+ 2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
+ 2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
+ 2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
+ 2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
+ 73032403U, // <u,0,3,u>: Cost 1 vrev LHS
+ 2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
+ 1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+ 1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+ 3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
+ 2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
+ 1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
+ 2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+ 1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+ 2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
+ 1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS
+ 2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+ 2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
+ 2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
+ 2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
+ 1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
+ 2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+ 1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
+ 2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
+ 2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+ 1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
+ 2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
+ 2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
+ 2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
+ 2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
+ 2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
+ 1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
+ 2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
+ 2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
+ 2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
+ 2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
+ 2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
+ 2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
+ 2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
+ 2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
+ 2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
+ 135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS
+ 1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+ 537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS
+ 2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+ 1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+ 1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
+ 1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
+ 2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+ 537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS
+ 1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
+ 1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
+ 2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
+ 1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+ 2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
+ 2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+ 2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
+ 2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
+ 1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+ 1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
+ 202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS
+ 2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
+ 1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+ 1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
+ 2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+ 2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
+ 2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+ 202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS
+ 1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
+ 2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+ 2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
+ 2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+ 2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
+ 1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
+ 1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+ 2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+ 2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
+ 1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
+ 1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+ 2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+ 2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
+ 1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+ 1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
+ 2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
+ 2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
+ 1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
+ 2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
+ 1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
+ 2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
+ 2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
+ 1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
+ 1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
+ 2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
+ 2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
+ 1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+ 1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
+ 2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
+ 2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
+ 2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
+ 1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+ 2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
+ 2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+ 2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+ 2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+ 2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
+ 2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+ 2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
+ 1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
+ 1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
+ 2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
+ 2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
+ 2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
+ 2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
+ 2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
+ 2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
+ 2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
+ 2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
+ 2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
+ 1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
+ 202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS
+ 2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
+ 1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+ 2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+ 1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
+ 1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
+ 2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+ 2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
+ 2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
+ 2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+ 2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
+ 1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
+ 1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
+ 2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
+ 2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
+ 2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+ 2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
+ 2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
+ 2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+ 2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+ 1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
+ 1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
+ 2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
+ 269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS
+ 1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+ 1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
+ 2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+ 2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
+ 2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+ 269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS
+ 408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS
+ 1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+ 1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS
+ 408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS
+ 1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+ 1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS
+ 1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+ 2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
+ 2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
+ 2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+ 2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
+ 1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
+ 2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+ 1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
+ 2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
+ 2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
+ 2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+ 2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+ 1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
+ 2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
+ 2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+ 2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
+ 1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
+ 1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
+ 2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
+ 2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
+ 1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+ 1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
+ 2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+ 2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+ 2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
+ 1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+ 1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
+ 2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
+ 2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
+ 1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS
+ 2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
+ 2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
+ 2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+ 2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
+ 1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS
+ 408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS
+ 1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
+ 269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS
+ 1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+ 408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS
+ 1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
+ 1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
+ 1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+ 408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS
+ 1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+ 471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS
+ 1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
+ 1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
+ 2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+ 2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
+ 471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS
+ 1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+ 1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+ 1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+ 1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+ 2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
+ 1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+ 1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
+ 1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
+ 1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+ 1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+ 1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
+ 2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
+ 1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+ 1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+ 1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
+ 1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
+ 336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS
+ 1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+ 2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+ 2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
+ 2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+ 336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS
+ 1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
+ 1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
+ 2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+ 2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
+ 1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
+ 471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS
+ 1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+ 2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
+ 471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS
+ 2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
+ 1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
+ 2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
+ 2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
+ 1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+ 1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+ 1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+ 1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
+ 1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
+ 2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+ 2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
+ 1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
+ 2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
+ 2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+ 1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
+ 1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+ 1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+ 1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
+ 1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
+ 1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
+ 2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
+ 1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
+ 2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
+ 2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
+ 1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+ 1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
+ 1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+ 471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS
+ 1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+ 336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS
+ 1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+ 471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS
+ 1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+ 1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
+ 471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS
+ 2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
+ 1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
+ 2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+ 2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
+ 1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+ 1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+ 3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+ 1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
+ 2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
+ 2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
+ 1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
+ 2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
+ 2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
+ 1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS
+ 2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+ 2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+ 1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
+ 2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
+ 2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
+ 2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
+ 2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
+ 2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
+ 2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+ 1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
+ 2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+ 1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
+ 2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
+ 2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
+ 2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
+ 2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
+ 2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
+ 2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+ 2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+ 3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
+ 2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+ 1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
+ 2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
+ 2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
+ 2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+ 161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS
+ 1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+ 2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+ 161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS
+ 1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
+ 2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+ 1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
+ 2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
+ 1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
+ 1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS
+ 537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS
+ 2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+ 537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS
+ 1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
+ 2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
+ 2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
+ 2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
+ 1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
+ 2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+ 1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
+ 2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+ 1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
+ 2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
+ 2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
+ 2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
+ 2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
+ 2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
+ 2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+ 96808489U, // <u,4,7,6>: Cost 1 vrev RHS
+ 2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
+ 96955963U, // <u,4,7,u>: Cost 1 vrev RHS
+ 1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
+ 1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
+ 1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
+ 2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
+ 161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS
+ 1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
+ 537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS
+ 2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+ 537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS
+ 2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
+ 1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
+ 2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
+ 2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
+ 1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+ 2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+ 2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
+ 1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
+ 1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
+ 1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
+ 2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
+ 2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
+ 2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
+ 1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
+ 2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
+ 2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
+ 1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+ 1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+ 2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
+ 2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
+ 2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
+ 1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
+ 2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
+ 2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+ 2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
+ 3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
+ 1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
+ 2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
+ 2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
+ 2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
+ 2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
+ 2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
+ 3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
+ 2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
+ 2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
+ 2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
+ 2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+ 2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
+ 2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
+ 1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
+ 1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
+ 2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
+ 1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+ 1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
+ 1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
+ 2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
+ 2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
+ 2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
+ 1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
+ 229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS
+ 2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+ 1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+ 229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS
+ 1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
+ 2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
+ 2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
+ 1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
+ 1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
+ 2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+ 2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
+ 2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
+ 2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
+ 1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
+ 1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+ 2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+ 2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
+ 1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
+ 1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
+ 1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
+ 2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
+ 1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
+ 1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
+ 229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS
+ 2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
+ 1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
+ 2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
+ 1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
+ 2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
+ 2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
+ 2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
+ 1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
+ 2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
+ 2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
+ 2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
+ 2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
+ 2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
+ 2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+ 2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
+ 1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
+ 1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
+ 2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
+ 2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
+ 2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
+ 1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
+ 2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
+ 2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
+ 1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+ 1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+ 2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
+ 2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
+ 2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
+ 2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
+ 1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
+ 2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
+ 3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+ 1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS
+ 1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS
+ 2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
+ 2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
+ 2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
+ 2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
+ 1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+ 1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
+ 2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
+ 1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
+ 2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
+ 2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
+ 2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
+ 2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+ 1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
+ 2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
+ 2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
+ 2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
+ 1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
+ 1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
+ 2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
+ 2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
+ 2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
+ 1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
+ 2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
+ 296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS
+ 1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+ 296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS
+ 432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS
+ 1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+ 1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS
+ 1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
+ 1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS
+ 432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS
+ 432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS
+ 1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
+ 1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+ 1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+ 432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS
+ 1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
+ 296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS
+ 1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS
+ 432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS
+ 1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+ 497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS
+ 1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+ 2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
+ 1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+ 1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
+ 2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
+ 2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
+ 497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS
+ 1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+ 1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+ 1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+ 1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
+ 2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
+ 1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
+ 2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
+ 2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
+ 1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
+ 2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
+ 1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
+ 1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+ 1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+ 2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
+ 2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
+ 1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
+ 2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
+ 1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
+ 1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+ 2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
+ 2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
+ 1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+ 1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+ 1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
+ 1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
+ 2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
+ 1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+ 1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+ 2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
+ 2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+ 2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
+ 1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+ 497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS
+ 1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
+ 497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS
+ 2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+ 1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+ 2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+ 1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+ 1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+ 1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+ 1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+ 1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+ 1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
+ 2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+ 1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+ 1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
+ 1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
+ 1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+ 1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+ 1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+ 1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+ 2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
+ 2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
+ 2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
+ 1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+ 1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
+ 1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
+ 363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS
+ 363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS
+ 1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+ 497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS
+ 1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+ 1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+ 1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+ 497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS
+ 1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+ 363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS
+ 497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS
+ 135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS
+ 471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS
+ 1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+ 1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+ 1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+ 1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
+ 1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+ 1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
+ 471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS
+ 1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+ 202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS
+ 537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS
+ 1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+ 1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
+ 1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+ 1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
+ 1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+ 537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS
+ 1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
+ 1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
+ 269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
+ 2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+ 1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+ 1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS
+ 1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+ 120371557U, // <u,u,3,2>: Cost 1 vrev LHS
+ 336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS
+ 408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS
+ 1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+ 1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+ 1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS
+ 408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS
+ 1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
+ 1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
+ 1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+ 1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
+ 161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS
+ 471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS
+ 1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+ 1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+ 471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS
+ 1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
+ 1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+ 1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
+ 1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+ 1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
+ 229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS
+ 537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS
+ 1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
+ 537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS
+ 1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
+ 2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
+ 1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+ 1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+ 1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
+ 1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
+ 296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS
+ 1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
+ 1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
+ 1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS
+ 432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS
+ 1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+ 120699277U, // <u,u,7,6>: Cost 1 vrev RHS
+ 363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS
+ 432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS
+ 408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS
+ 471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS
+ 537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS
+ 471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS
+ 537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
0
};
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
index d5bc3f6..ad51bc1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/BitVector.h"
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
index 305b232..22d15b5 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -201,6 +201,10 @@ def CPSR : ARMReg<0, "cpsr">;
def FPSCR : ARMReg<1, "fpscr">;
def ITSTATE : ARMReg<2, "itstate">;
+// Special Registers - only available in privileged mode.
+def FPSID : ARMReg<0, "fpsid">;
+def FPEXC : ARMReg<8, "fpexc">;
+
// Register classes.
//
// pc == Program Counter
@@ -256,7 +260,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
// restricted GPR register class. Many Thumb2 instructions allow the full
// register range for operands, but have undefined behaviours when PC
-// or SP (R13 or R15) are used. The ARM ARM refers to these operands
+// or SP (R13 or R15) are used. The ARM ISA refers to these operands
// via the BadReg() pseudo-code description.
def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
R7, R8, R9, R10, R11, R12, LR]> {
@@ -381,27 +385,29 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- // VFP2
+ // VFP2 / VFPv3-D16
static const unsigned ARM_DPR_VFP2[] = {
ARM::D0, ARM::D1, ARM::D2, ARM::D3,
ARM::D4, ARM::D5, ARM::D6, ARM::D7,
ARM::D8, ARM::D9, ARM::D10, ARM::D11,
ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
- // VFP3
+ // VFP3: D8-D15 are callee saved and should be allocated last.
+ // Save other low registers for use as DPR_VFP2 and DPR_8 classes.
static const unsigned ARM_DPR_VFP3[] = {
- ARM::D0, ARM::D1, ARM::D2, ARM::D3,
- ARM::D4, ARM::D5, ARM::D6, ARM::D7,
- ARM::D8, ARM::D9, ARM::D10, ARM::D11,
- ARM::D12, ARM::D13, ARM::D14, ARM::D15,
ARM::D16, ARM::D17, ARM::D18, ARM::D19,
ARM::D20, ARM::D21, ARM::D22, ARM::D23,
ARM::D24, ARM::D25, ARM::D26, ARM::D27,
- ARM::D28, ARM::D29, ARM::D30, ARM::D31 };
+ ARM::D28, ARM::D29, ARM::D30, ARM::D31,
+ ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+ ARM::D8, ARM::D9, ARM::D10, ARM::D11,
+ ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
+
DPRClass::iterator
DPRClass::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- if (Subtarget.hasVFP3())
+ if (Subtarget.hasVFP3() && !Subtarget.hasD16())
return ARM_DPR_VFP3;
return ARM_DPR_VFP2;
}
@@ -410,7 +416,7 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
DPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- if (Subtarget.hasVFP3())
+ if (Subtarget.hasVFP3() && !Subtarget.hasD16())
return ARM_DPR_VFP3 + (sizeof(ARM_DPR_VFP3)/sizeof(unsigned));
else
return ARM_DPR_VFP2 + (sizeof(ARM_DPR_VFP2)/sizeof(unsigned));
@@ -438,6 +444,29 @@ def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> {
let SubRegClasses = [(DPR dsub_0, dsub_1)];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Q4-Q7 are callee saved and should be allocated last.
+ // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
+ static const unsigned ARM_QPR[] = {
+ ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11,
+ ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15,
+ ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
+ ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7 };
+
+ QPRClass::iterator
+ QPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ return ARM_QPR;
+ }
+
+ QPRClass::iterator
+ QPRClass::allocation_order_end(const MachineFunction &MF) const {
+ return ARM_QPR + (sizeof(ARM_QPR)/sizeof(unsigned));
+ }
+ }];
}
// Subset of QPR that have 32-bit SPR subregs.
@@ -463,6 +492,27 @@ def QQPR : RegisterClass<"ARM", [v4i64],
[QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> {
let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
(QPR qsub_0, qsub_1)];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // QQ2-QQ3 are callee saved and should be allocated last.
+ // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
+ static const unsigned ARM_QQPR[] = {
+ ARM::QQ4, ARM::QQ5, ARM::QQ6, ARM::QQ7,
+ ARM::QQ0, ARM::QQ1, ARM::QQ2, ARM::QQ3 };
+
+ QQPRClass::iterator
+ QQPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ return ARM_QQPR;
+ }
+
+ QQPRClass::iterator
+ QQPRClass::allocation_order_end(const MachineFunction &MF) const {
+ return ARM_QQPR + (sizeof(ARM_QQPR)/sizeof(unsigned));
+ }
+ }];
}
// Subset of QQPR that have 32-bit SPR subregs.
@@ -483,6 +533,26 @@ def QQQQPR : RegisterClass<"ARM", [v8i64],
let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
dsub_4, dsub_5, dsub_6, dsub_7),
(QPR qsub_0, qsub_1, qsub_2, qsub_3)];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // QQQQ1 is callee saved and should be allocated last.
+ // Save QQQQ0 for use as QPR_VFP2 and QPR_8 classes.
+ static const unsigned ARM_QQQQPR[] = {
+ ARM::QQQQ2, ARM::QQQQ3, ARM::QQQQ0, ARM::QQQQ1 };
+
+ QQQQPRClass::iterator
+ QQQQPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ return ARM_QQQQPR;
+ }
+
+ QQQQPRClass::iterator
+ QQQQPRClass::allocation_order_end(const MachineFunction &MF) const {
+ return ARM_QQQQPR + (sizeof(ARM_QQQQPR)/sizeof(unsigned));
+ }
+ }];
}
// Condition code registers.
diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
index b60ccca..958c5c6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td
+++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
@@ -14,42 +14,86 @@ def IIC_iALUx : InstrItinClass;
def IIC_iALUi : InstrItinClass;
def IIC_iALUr : InstrItinClass;
def IIC_iALUsi : InstrItinClass;
+def IIC_iALUsir : InstrItinClass;
def IIC_iALUsr : InstrItinClass;
+def IIC_iBITi : InstrItinClass;
+def IIC_iBITr : InstrItinClass;
+def IIC_iBITsi : InstrItinClass;
+def IIC_iBITsr : InstrItinClass;
def IIC_iUNAr : InstrItinClass;
def IIC_iUNAsi : InstrItinClass;
-def IIC_iUNAsr : InstrItinClass;
+def IIC_iEXTr : InstrItinClass;
+def IIC_iEXTAr : InstrItinClass;
+def IIC_iEXTAsr : InstrItinClass;
def IIC_iCMPi : InstrItinClass;
def IIC_iCMPr : InstrItinClass;
def IIC_iCMPsi : InstrItinClass;
def IIC_iCMPsr : InstrItinClass;
+def IIC_iTSTi : InstrItinClass;
+def IIC_iTSTr : InstrItinClass;
+def IIC_iTSTsi : InstrItinClass;
+def IIC_iTSTsr : InstrItinClass;
def IIC_iMOVi : InstrItinClass;
def IIC_iMOVr : InstrItinClass;
def IIC_iMOVsi : InstrItinClass;
def IIC_iMOVsr : InstrItinClass;
+def IIC_iMOVix2 : InstrItinClass;
+def IIC_iMOVix2addpc : InstrItinClass;
+def IIC_iMOVix2ld : InstrItinClass;
+def IIC_iMVNi : InstrItinClass;
+def IIC_iMVNr : InstrItinClass;
+def IIC_iMVNsi : InstrItinClass;
+def IIC_iMVNsr : InstrItinClass;
def IIC_iCMOVi : InstrItinClass;
def IIC_iCMOVr : InstrItinClass;
def IIC_iCMOVsi : InstrItinClass;
def IIC_iCMOVsr : InstrItinClass;
+def IIC_iCMOVix2 : InstrItinClass;
def IIC_iMUL16 : InstrItinClass;
def IIC_iMAC16 : InstrItinClass;
def IIC_iMUL32 : InstrItinClass;
def IIC_iMAC32 : InstrItinClass;
def IIC_iMUL64 : InstrItinClass;
def IIC_iMAC64 : InstrItinClass;
-def IIC_iLoadi : InstrItinClass;
-def IIC_iLoadr : InstrItinClass;
-def IIC_iLoadsi : InstrItinClass;
-def IIC_iLoadiu : InstrItinClass;
-def IIC_iLoadru : InstrItinClass;
-def IIC_iLoadsiu : InstrItinClass;
-def IIC_iLoadm : InstrItinClass;
-def IIC_iStorei : InstrItinClass;
-def IIC_iStorer : InstrItinClass;
-def IIC_iStoresi : InstrItinClass;
-def IIC_iStoreiu : InstrItinClass;
-def IIC_iStoreru : InstrItinClass;
-def IIC_iStoresiu : InstrItinClass;
-def IIC_iStorem : InstrItinClass;
+def IIC_iLoad_i : InstrItinClass;
+def IIC_iLoad_r : InstrItinClass;
+def IIC_iLoad_si : InstrItinClass;
+def IIC_iLoad_iu : InstrItinClass;
+def IIC_iLoad_ru : InstrItinClass;
+def IIC_iLoad_siu : InstrItinClass;
+def IIC_iLoad_bh_i : InstrItinClass;
+def IIC_iLoad_bh_r : InstrItinClass;
+def IIC_iLoad_bh_si : InstrItinClass;
+def IIC_iLoad_bh_iu : InstrItinClass;
+def IIC_iLoad_bh_ru : InstrItinClass;
+def IIC_iLoad_bh_siu : InstrItinClass;
+def IIC_iLoad_d_i : InstrItinClass;
+def IIC_iLoad_d_r : InstrItinClass;
+def IIC_iLoad_d_ru : InstrItinClass;
+def IIC_iLoad_m : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded
+def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded
+def IIC_iPop : InstrItinClass<0>; // micro-coded
+def IIC_iPop_Br : InstrItinClass<0>; // micro-coded
+def IIC_iLoadiALU : InstrItinClass;
+def IIC_iStore_i : InstrItinClass;
+def IIC_iStore_r : InstrItinClass;
+def IIC_iStore_si : InstrItinClass;
+def IIC_iStore_iu : InstrItinClass;
+def IIC_iStore_ru : InstrItinClass;
+def IIC_iStore_siu : InstrItinClass;
+def IIC_iStore_bh_i : InstrItinClass;
+def IIC_iStore_bh_r : InstrItinClass;
+def IIC_iStore_bh_si : InstrItinClass;
+def IIC_iStore_bh_iu : InstrItinClass;
+def IIC_iStore_bh_ru : InstrItinClass;
+def IIC_iStore_bh_siu : InstrItinClass;
+def IIC_iStore_d_i : InstrItinClass;
+def IIC_iStore_d_r : InstrItinClass;
+def IIC_iStore_d_ru : InstrItinClass;
+def IIC_iStore_m : InstrItinClass<0>; // micro-coded
+def IIC_iStore_mu : InstrItinClass<0>; // micro-coded
+def IIC_Preload : InstrItinClass;
def IIC_Br : InstrItinClass;
def IIC_fpSTAT : InstrItinClass;
def IIC_fpUNA32 : InstrItinClass;
@@ -80,19 +124,76 @@ def IIC_fpSQRT32 : InstrItinClass;
def IIC_fpSQRT64 : InstrItinClass;
def IIC_fpLoad32 : InstrItinClass;
def IIC_fpLoad64 : InstrItinClass;
-def IIC_fpLoadm : InstrItinClass;
+def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded
+def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded
def IIC_fpStore32 : InstrItinClass;
def IIC_fpStore64 : InstrItinClass;
-def IIC_fpStorem : InstrItinClass;
+def IIC_fpStore_m : InstrItinClass<0>; // micro-coded
+def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded
def IIC_VLD1 : InstrItinClass;
+def IIC_VLD1x2 : InstrItinClass;
+def IIC_VLD1x3 : InstrItinClass;
+def IIC_VLD1x4 : InstrItinClass;
+def IIC_VLD1u : InstrItinClass;
+def IIC_VLD1x2u : InstrItinClass;
+def IIC_VLD1x3u : InstrItinClass;
+def IIC_VLD1x4u : InstrItinClass;
+def IIC_VLD1ln : InstrItinClass;
+def IIC_VLD1lnu : InstrItinClass;
+def IIC_VLD1dup : InstrItinClass;
+def IIC_VLD1dupu : InstrItinClass;
def IIC_VLD2 : InstrItinClass;
+def IIC_VLD2x2 : InstrItinClass;
+def IIC_VLD2u : InstrItinClass;
+def IIC_VLD2x2u : InstrItinClass;
+def IIC_VLD2ln : InstrItinClass;
+def IIC_VLD2lnu : InstrItinClass;
+def IIC_VLD2dup : InstrItinClass;
+def IIC_VLD2dupu : InstrItinClass;
def IIC_VLD3 : InstrItinClass;
+def IIC_VLD3ln : InstrItinClass;
+def IIC_VLD3u : InstrItinClass;
+def IIC_VLD3lnu : InstrItinClass;
+def IIC_VLD3dup : InstrItinClass;
+def IIC_VLD3dupu : InstrItinClass;
def IIC_VLD4 : InstrItinClass;
-def IIC_VST : InstrItinClass;
+def IIC_VLD4ln : InstrItinClass;
+def IIC_VLD4u : InstrItinClass;
+def IIC_VLD4lnu : InstrItinClass;
+def IIC_VLD4dup : InstrItinClass;
+def IIC_VLD4dupu : InstrItinClass;
+def IIC_VST1 : InstrItinClass;
+def IIC_VST1x2 : InstrItinClass;
+def IIC_VST1x3 : InstrItinClass;
+def IIC_VST1x4 : InstrItinClass;
+def IIC_VST1u : InstrItinClass;
+def IIC_VST1x2u : InstrItinClass;
+def IIC_VST1x3u : InstrItinClass;
+def IIC_VST1x4u : InstrItinClass;
+def IIC_VST1ln : InstrItinClass;
+def IIC_VST1lnu : InstrItinClass;
+def IIC_VST2 : InstrItinClass;
+def IIC_VST2x2 : InstrItinClass;
+def IIC_VST2u : InstrItinClass;
+def IIC_VST2x2u : InstrItinClass;
+def IIC_VST2ln : InstrItinClass;
+def IIC_VST2lnu : InstrItinClass;
+def IIC_VST3 : InstrItinClass;
+def IIC_VST3u : InstrItinClass;
+def IIC_VST3ln : InstrItinClass;
+def IIC_VST3lnu : InstrItinClass;
+def IIC_VST4 : InstrItinClass;
+def IIC_VST4u : InstrItinClass;
+def IIC_VST4ln : InstrItinClass;
+def IIC_VST4lnu : InstrItinClass;
def IIC_VUNAD : InstrItinClass;
def IIC_VUNAQ : InstrItinClass;
def IIC_VBIND : InstrItinClass;
def IIC_VBINQ : InstrItinClass;
+def IIC_VPBIND : InstrItinClass;
+def IIC_VFMULD : InstrItinClass;
+def IIC_VFMULQ : InstrItinClass;
+def IIC_VMOV : InstrItinClass;
def IIC_VMOVImm : InstrItinClass;
def IIC_VMOVD : InstrItinClass;
def IIC_VMOVQ : InstrItinClass;
@@ -101,6 +202,7 @@ def IIC_VMOVID : InstrItinClass;
def IIC_VMOVISL : InstrItinClass;
def IIC_VMOVSI : InstrItinClass;
def IIC_VMOVDI : InstrItinClass;
+def IIC_VMOVN : InstrItinClass;
def IIC_VPERMD : InstrItinClass;
def IIC_VPERMQ : InstrItinClass;
def IIC_VPERMQ3 : InstrItinClass;
@@ -152,7 +254,7 @@ def IIC_VTBX4 : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
-def GenericItineraries : ProcessorItineraries<[], []>;
+def GenericItineraries : ProcessorItineraries<[], [], []>;
include "ARMScheduleV6.td"
include "ARMScheduleA8.td"
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
index 282abca..8d86c01 100644
--- a/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td
@@ -14,18 +14,17 @@
//
// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
// Functional Units.
-def A8_Issue : FuncUnit; // issue
def A8_Pipe0 : FuncUnit; // pipeline 0
def A8_Pipe1 : FuncUnit; // pipeline 1
-def A8_LdSt0 : FuncUnit; // pipeline 0 load/store
-def A8_LdSt1 : FuncUnit; // pipeline 1 load/store
+def A8_LSPipe : FuncUnit; // Load / store pipeline
def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe
def A8_NLSPipe : FuncUnit; // NEON LS pipe
//
// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
//
def CortexA8Itineraries : ProcessorItineraries<
- [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [
+ [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
+ [], [
// Two fully-pipelined integer ALU pipelines
//
// No operand cycles
@@ -35,12 +34,23 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
//
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ //
// Unary Instructions that produce a result
InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iUNAsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
//
// Compare instructions
InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
@@ -48,124 +58,184 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
//
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ //
// Move instructions, unconditional
InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LSPipe]>], [5]>,
//
// Move instructions, conditional
InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
// Integer multiply pipeline
// Result written in E5, but that is relative to the last cycle of multicycle,
// so we use 6 for those cases
//
InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
- InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>,
- InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
- InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>,
- InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
- InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>,
- InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
- InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>,
- InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
- InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>,
- InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
// Integer load pipeline
//
- // loads have an extra cycle of latency, but are fully pipelined
- // use A8_Issue to enforce the 1 load/store per cycle limit
- //
// Immediate offset
- InstrItinData<IIC_iLoadi , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
//
// Register offset
- InstrItinData<IIC_iLoadr , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
//
// Scaled register offset, issues over 2 cycles
- InstrItinData<IIC_iLoadsi , [InstrStage<2, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0], 0>,
- InstrStage<1, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
+ // FIXME: lsl by 2 takes 1 cycle.
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
//
// Immediate offset with update
- InstrItinData<IIC_iLoadiu , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
//
// Register offset with update
- InstrItinData<IIC_iLoadru , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
//
// Scaled register offset with update, issues over 2 cycles
- InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0], 0>,
- InstrStage<1, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
- //
- // Load multiple
- InstrItinData<IIC_iLoadm , [InstrStage<2, [A8_Issue], 0>,
- InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>]>,
+ InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+ //
+ // Load multiple, def is the 5th operand. Pipeline 0 only.
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 2, 1, 1, 3]>,
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
+ //
+ // Push, def is the 3th operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>,
+ InstrStage<3, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+ [1, 1, 3]>,
- // Integer store pipeline
//
- // use A8_Issue to enforce the 1 load/store per cycle limit
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
+
+
+ // Integer store pipeline
//
// Immediate offset
- InstrItinData<IIC_iStorei , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1]>,
//
// Register offset
- InstrItinData<IIC_iStorer , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
//
// Scaled register offset, issues over 2 cycles
- InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0], 0>,
- InstrStage<1, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
//
// Immediate offset with update
- InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
//
// Register offset with update
- InstrItinData<IIC_iStoreru , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
//
// Scaled register offset with update, issues over 2 cycles
- InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0], 0>,
- InstrStage<1, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
- //
- // Store multiple
- InstrItinData<IIC_iStorem , [InstrStage<2, [A8_Issue], 0>,
- InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>]>,
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Store multiple. Pipeline 0 only.
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
+ InstrStage<2, [A8_LSPipe]>], [2]>,
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
// Branch
//
@@ -178,440 +248,786 @@ def CortexA8Itineraries : ProcessorItineraries<
// possible.
//
// FP Special Register to Integer Register File Move
- InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_NLSPipe]>]>,
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe]>], [20]>,
//
// Single-precision FP Unary
- InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [7, 1]>,
//
// Double-precision FP Unary
- InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<4, [A8_NPipe], 0>,
InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
//
// Single-precision FP Compare
- InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [1, 1]>,
//
// Double-precision FP Compare
- InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<4, [A8_NPipe], 0>,
InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
//
// Single to Double FP Convert
- InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<7, [A8_NPipe], 0>,
InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
//
// Double to Single FP Convert
- InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<5, [A8_NPipe], 0>,
InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
//
// Single-Precision FP to Integer Convert
- InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [7, 1]>,
//
// Double-Precision FP to Integer Convert
- InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<8, [A8_NPipe], 0>,
InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
//
// Integer to Single-Precision FP Convert
- InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [7, 1]>,
//
// Integer to Double-Precision FP Convert
- InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<8, [A8_NPipe], 0>,
InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
//
// Single-precision FP ALU
- InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
//
// Double-precision FP ALU
- InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<9, [A8_NPipe], 0>,
InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
//
// Single-precision FP Multiply
- InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
//
// Double-precision FP Multiply
- InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<11, [A8_NPipe], 0>,
InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
//
// Single-precision FP MAC
- InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
//
// Double-precision FP MAC
- InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<19, [A8_NPipe], 0>,
InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
//
// Single-precision FP DIV
- InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<20, [A8_NPipe], 0>,
InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
//
// Double-precision FP DIV
- InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<29, [A8_NPipe], 0>,
InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
//
// Single-precision FP SQRT
- InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<19, [A8_NPipe], 0>,
InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
//
// Double-precision FP SQRT
- InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<29, [A8_NPipe], 0>,
InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
+
+ //
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [2, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [2, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [20, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>],
+ [20, 20, 1]>,
+
//
// Single-precision FP Load
- // use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0], 0>,
- InstrStage<1, [A8_NLSPipe]>]>,
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1]>,
//
// Double-precision FP Load
- // use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0], 0>,
- InstrStage<1, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0], 0>,
- InstrStage<1, [A8_NLSPipe]>]>,
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [2, 1]>,
//
// FP Load Multiple
- // use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>,
- InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0], 0>,
- InstrStage<1, [A8_NLSPipe]>]>,
+ // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
//
// Single-precision FP Store
- // use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0], 0>,
- InstrStage<1, [A8_NLSPipe]>]>,
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1]>,
//
// Double-precision FP Store
- // use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0], 0>,
- InstrStage<1, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0], 0>,
- InstrStage<1, [A8_NLSPipe]>]>,
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>],
+ [1, 1]>,
//
// FP Store Multiple
- // use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
- InstrStage<2, [A8_Pipe0], 0>,
- InstrStage<2, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0], 0>,
- InstrStage<1, [A8_NLSPipe]>]>,
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>,
+ InstrStage<1, [A8_NLSPipe], 0>,
+ InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
// NEON
// Issue through integer pipeline, and execute in NEON unit.
//
// VLD1
- // FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0], 0>,
- InstrStage<1, [A8_NLSPipe]>]>,
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1]>,
+ // VLD1x2
+ InstrItinData<IIC_VLD1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD1x3
+ InstrItinData<IIC_VLD1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD1x4
+ InstrItinData<IIC_VLD1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD1u
+ InstrItinData<IIC_VLD1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD1x2u
+ InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1]>,
+ //
+ // VLD1x3u
+ InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1]>,
+ //
+ // VLD1x4u
+ InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD1ln
+ InstrItinData<IIC_VLD1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 1, 1, 1]>,
+ //
+ // VLD1lnu
+ InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 2, 1, 1, 1, 1]>,
+ //
+ // VLD1dup
+ InstrItinData<IIC_VLD1dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1]>,
+ //
+ // VLD1dupu
+ InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1, 1]>,
//
// VLD2
- // FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0], 0>,
- InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD2x2
+ InstrItinData<IIC_VLD2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD2ln
+ InstrItinData<IIC_VLD2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 3, 1, 1, 1, 1]>,
+ //
+ // VLD2u
+ InstrItinData<IIC_VLD2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1, 1, 1]>,
+ //
+ // VLD2x2u
+ InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD2lnu
+ InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [3, 3, 2, 1, 1, 1, 1, 1]>,
+ //
+ // VLD2dup
+ InstrItinData<IIC_VLD2dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 1]>,
+ //
+ // VLD2dupu
+ InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 2, 2, 1, 1]>,
//
// VLD3
- // FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0], 0>,
- InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3ln
+ InstrItinData<IIC_VLD3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3u
+ InstrItinData<IIC_VLD3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 2, 1]>,
+ //
+ // VLD3lnu
+ InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1, 1]>,
//
// VLD4
- // FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0], 0>,
- InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 4, 1]>,
+ //
+ // VLD4ln
+ InstrItinData<IIC_VLD4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4u
+ InstrItinData<IIC_VLD4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [3, 3, 4, 4, 2, 1]>,
+ //
+ // VLD4lnu
+ InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<5, [A8_NLSPipe], 0>,
+ InstrStage<5, [A8_LSPipe]>],
+ [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4dup
+ InstrItinData<IIC_VLD4dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 1]>,
+ //
+ // VLD4dupu
+ InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 3, 2, 1, 1]>,
+ //
+ // VST1
+ InstrItinData<IIC_VST1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
//
- // VST
- // FIXME: We don't model this instruction properly
- InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0], 0>,
- InstrStage<1, [A8_NLSPipe]>]>,
+ // VST1x2
+ InstrItinData<IIC_VST1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST1x3
+ InstrItinData<IIC_VST1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST1x4
+ InstrItinData<IIC_VST1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1u
+ InstrItinData<IIC_VST1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST1x2u
+ InstrItinData<IIC_VST1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST1x3u
+ InstrItinData<IIC_VST1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST1x4u
+ InstrItinData<IIC_VST1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST2
+ InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2x2
+ InstrItinData<IIC_VST2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2u
+ InstrItinData<IIC_VST2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST2x2u
+ InstrItinData<IIC_VST2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2ln
+ InstrItinData<IIC_VST2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2lnu
+ InstrItinData<IIC_VST2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<2, [A8_NLSPipe], 0>,
+ InstrStage<2, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST3
+ InstrItinData<IIC_VST3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3u
+ InstrItinData<IIC_VST3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST3ln
+ InstrItinData<IIC_VST3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3lnu
+ InstrItinData<IIC_VST3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST4
+ InstrItinData<IIC_VST4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4u
+ InstrItinData<IIC_VST4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4ln
+ InstrItinData<IIC_VST4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4lnu
+ InstrItinData<IIC_VST4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<4, [A8_NLSPipe], 0>,
+ InstrStage<4, [A8_LSPipe]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
//
// Double-register FP Unary
- InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [5, 2]>,
//
// Quad-register FP Unary
// Result written in N5, but that is relative to the last cycle of multicycle,
// so we use 6 for those cases
- InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [6, 2]>,
//
// Double-register FP Binary
- InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
//
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
+
+ //
// Quad-register FP Binary
// Result written in N5, but that is relative to the last cycle of multicycle,
// so we use 6 for those cases
- InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
//
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [1, 1]>,
+ //
// Move Immediate
- InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [3]>,
//
// Double-register Permute Move
- InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
//
// Quad-register Permute Move
// Result written in N2, but that is relative to the last cycle of multicycle,
// so we use 3 for those cases
- InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
//
// Integer to Single-precision Move
- InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
//
// Integer to Double-precision Move
- InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
//
// Single-precision to Integer Move
- InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
//
// Double-precision to Integer Move
- InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
//
// Integer to Lane Move
- InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
//
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+ InstrStage<1, [A8_NPipe]>], [2, 1]>,
+ //
// Double-register Permute
- InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
//
// Quad-register Permute
// Result written in N2, but that is relative to the last cycle of multicycle,
// so we use 3 for those cases
- InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
//
// Quad-register Permute (3 cycle issue)
// Result written in N2, but that is relative to the last cycle of multicycle,
// so we use 4 for those cases
- InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
//
// Double-register FP Multiple-Accumulate
- InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
//
// Quad-register FP Multiple-Accumulate
// Result written in N9, but that is relative to the last cycle of multicycle,
// so we use 10 for those cases
- InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
//
// Double-register Reciprical Step
- InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
//
// Quad-register Reciprical Step
- InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
//
// Double-register Integer Count
- InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
//
// Quad-register Integer Count
// Result written in N3, but that is relative to the last cycle of multicycle,
// so we use 4 for those cases
- InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
//
// Double-register Integer Unary
- InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [4, 2]>,
//
// Quad-register Integer Unary
- InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [4, 2]>,
//
// Double-register Integer Q-Unary
- InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [4, 1]>,
//
// Quad-register Integer CountQ-Unary
- InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [4, 1]>,
//
// Double-register Integer Binary
- InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
//
// Quad-register Integer Binary
- InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
//
// Double-register Integer Binary (4 cycle)
- InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
//
// Quad-register Integer Binary (4 cycle)
- InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
//
// Double-register Integer Subtract
- InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
//
// Quad-register Integer Subtract
- InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
//
// Double-register Integer Subtract
- InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
//
// Quad-register Integer Subtract
- InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
//
// Double-register Integer Shift
- InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
//
// Quad-register Integer Shift
- InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
//
// Double-register Integer Shift (4 cycle)
- InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
//
// Quad-register Integer Shift (4 cycle)
- InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
//
// Double-register Integer Pair Add Long
- InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
//
// Quad-register Integer Pair Add Long
- InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
//
// Double-register Absolute Difference and Accumulate
- InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
//
// Quad-register Absolute Difference and Accumulate
- InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
//
// Double-register Integer Multiply (.8, .16)
- InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
//
// Double-register Integer Multiply (.32)
- InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
//
// Quad-register Integer Multiply (.8, .16)
- InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
//
// Quad-register Integer Multiply (.32)
- InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>,
InstrStage<2, [A8_NLSPipe], 0>,
InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
//
// Double-register Integer Multiply-Accumulate (.8, .16)
- InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
//
// Double-register Integer Multiply-Accumulate (.32)
- InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
//
// Quad-register Integer Multiply-Accumulate (.8, .16)
- InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
//
// Quad-register Integer Multiply-Accumulate (.32)
- InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NPipe]>,
InstrStage<2, [A8_NLSPipe], 0>,
InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
//
// Double-register VEXT
- InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
//
// Quad-register VEXT
- InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
//
// VTB
- InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
- InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
- InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
- InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
//
// VTBX
- InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
- InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
- InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
- InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
index df2f896..82c6735 100644
--- a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td
@@ -16,130 +16,417 @@
// Reference Manual".
//
// Functional units
-def A9_Pipe0 : FuncUnit; // pipeline 0
-def A9_Pipe1 : FuncUnit; // pipeline 1
-def A9_LSPipe : FuncUnit; // LS pipe
-def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe
+def A9_Issue0 : FuncUnit; // Issue 0
+def A9_Issue1 : FuncUnit; // Issue 1
+def A9_Branch : FuncUnit; // Branch
+def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
+def A9_ALU1 : FuncUnit; // ALU pipeline 1
+def A9_AGU : FuncUnit; // Address generation unit for ld / st
+def A9_NPipe : FuncUnit; // NEON pipeline
+def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
+def A9_LSUnit : FuncUnit; // L/S Unit
def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
def A9_DRegsN : FuncUnit; // FP register set, NEON side
-// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
-//
+// Bypasses
+def A9_LdBypass : Bypass;
+
def CortexA9Itineraries : ProcessorItineraries<
- [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
+ [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
+ A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
+ [A9_LdBypass], [
// Two fully-pipelined integer ALU pipelines
- // FIXME: There are no operand latencies for these instructions at all!
+
//
// Move instructions, unconditional
- InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
- InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
- InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
- InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [5]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [3, 1, 1]>,
//
// No operand cycles
- InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
//
// Binary Instructions that produce a result
- InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
- InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
- InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [3, 1, 1, 1],
+ [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
+ //
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
//
// Unary Instructions that produce a result
- InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+
+ // CLZ, RBIT, etc.
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+
+ // BFC, BFI, UBFX, SBFX
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+ InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
+ InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
//
// Compare instructions
- InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
- InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1], [A9_LdBypass]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>],
+ [1, 1], [A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>],
+ [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
+ //
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
//
// Move instructions, conditional
- InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
- InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+ // FIXME: Correctly model the extra input dep on the destination.
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>,
+ InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
// Integer multiply pipeline
//
- InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
- InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
- InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
- InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
- InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>,
- InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
- InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>,
- InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<2, [A9_ALU0]>],
+ [4, 1, 1, 1]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<3, [A9_ALU0]>],
+ [4, 5, 1, 1]>,
// Integer load pipeline
// FIXME: The timings are some rough approximations
//
// Immediate offset
- InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 1], [A9_LdBypass]>,
+ // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1], [A9_LdBypass]>,
//
// Register offset
- InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1, 1], [A9_LdBypass]>,
//
// Scaled register offset
- InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit], 0>],
+ [4, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [5, 1, 1], [A9_LdBypass]>,
//
// Immediate offset with update
- InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 2, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1], [A9_LdBypass]>,
//
// Register offset with update
- InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 2, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 3, 1, 1], [A9_LdBypass]>,
//
// Scaled register offset with update
- InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [5, 4, 1, 1], [A9_LdBypass]>,
+ //
+ // Load multiple, def is the 5th operand.
+ // FIXME: This assumes 3 to 4 registers.
+ InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 2, 1, 1, 3],
+ [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass]>,
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<2, [A9_LSUnit]>,
+ InstrStage<1, [A9_Branch]>],
+ [1, 1, 3],
+ [NoBypass, NoBypass, A9_LdBypass]>,
+
//
- // Load multiple
- InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe]>]>,
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>,
+ InstrStage<1, [A9_ALU0, A9_ALU1]>],
+ [2, 1]>,
// Integer store pipeline
///
// Immediate offset
- InstrItinData<IIC_iStorei , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+ // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1]>,
//
// Register offset
- InstrItinData<IIC_iStorer , [InstrStage<1, [ A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
//
// Scaled register offset
- InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
//
// Immediate offset with update
- InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
//
// Register offset with update
- InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
//
// Scaled register offset with update
- InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_AGU], 1>,
+ InstrStage<1, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
//
// Store multiple
- InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe]>]>,
+ InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<2, [A9_LSUnit]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_AGU], 0>,
+ InstrStage<2, [A9_LSUnit]>], [2]>,
+
+ //
+ // Preload
+ InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
+
// Branch
//
// no delay slots, so the latency of a branch is unimportant
- InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
+ InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
+ InstrStage<1, [A9_Issue1], 0>,
+ InstrStage<1, [A9_Branch]>]>,
// VFP and NEON shares the same register file. This means that every VFP
// instruction should wait for full completion of the consecutive NEON
@@ -159,687 +446,1379 @@ def CortexA9Itineraries : ProcessorItineraries<
// Issue through integer pipeline, and execute in NEON unit.
// FP Special Register to Integer Register File Move
- InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>]>,
+ InstrStage<1, [A9_NPipe]>],
+ [1]>,
//
// Single-precision FP Unary
- InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
//
// Double-precision FP Unary
- InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
//
// Single-precision FP Compare
- InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 4 cycles
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
//
// Double-precision FP Compare
- InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 4 cycles
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
//
// Single to Double FP Convert
- InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
//
// Double to Single FP Convert
- InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
//
// Single to Half FP Convert
- InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
//
// Half to Single FP Convert
- InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [2, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
//
// Single-Precision FP to Integer Convert
- InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
//
// Double-Precision FP to Integer Convert
- InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
//
// Integer to Single-Precision FP Convert
- InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
//
// Integer to Double-Precision FP Convert
- InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
//
// Single-precision FP ALU
- InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
//
// Double-precision FP ALU
- InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
//
// Single-precision FP Multiply
- InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<6, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 1, 1]>,
//
// Double-precision FP Multiply
- InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<7, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 1, 1]>,
//
// Single-precision FP MAC
- InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<9, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [8, 1, 1, 1]>,
//
// Double-precision FP MAC
- InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<10, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>,
+ InstrStage<2, [A9_NPipe]>],
+ [9, 1, 1, 1]>,
//
// Single-precision FP DIV
- InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<16, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
+ InstrStage<10, [A9_NPipe]>],
+ [15, 1, 1]>,
//
// Double-precision FP DIV
- InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<26, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
+ InstrStage<20, [A9_NPipe]>],
+ [25, 1, 1]>,
//
// Single-precision FP SQRT
- InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<18, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<13, [A9_NPipe]>], [17, 1]>,
+ InstrStage<13, [A9_NPipe]>],
+ [17, 1]>,
//
// Double-precision FP SQRT
- InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<33, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<28, [A9_NPipe]>], [32, 1]>,
+ InstrStage<28, [A9_NPipe]>],
+ [32, 1]>,
//
// Integer to Single-precision Move
- InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra 1 latency cycle since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
//
// Integer to Double-precision Move
- InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra 1 latency cycle since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1, 1]>,
//
// Single-precision to Integer Move
- InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
//
// Double-precision to Integer Move
- InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1, 1]>,
//
// Single-precision FP Load
- InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_LSPipe]>,
- InstrStage<1, [A9_NPipe]>]>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
//
// Double-precision FP Load
- InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ // FIXME: Result latency is 1 if address is 64-bit aligned.
+ InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_LSPipe]>,
- InstrStage<1, [A9_NPipe]>]>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1]>,
//
// FP Load Multiple
- InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_LSPipe]>,
- InstrStage<1, [A9_NPipe]>]>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
//
// Single-precision FP Store
- InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_LSPipe]>,
- InstrStage<1, [A9_NPipe]>]>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
//
// Double-precision FP Store
- InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_LSPipe]>,
- InstrStage<1, [A9_NPipe]>]>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
//
// FP Store Multiple
- InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_LSPipe]>,
- InstrStage<1, [A9_NPipe]>]>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsVFP], 0, Required>,
+ InstrStage<2, [A9_DRegsN], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
// NEON
- // Issue through integer pipeline, and execute in NEON unit.
- // FIXME: Neon pipeline and LdSt unit are multiplexed.
- // Add some syntactic sugar to model this!
// VLD1
- // FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_LSPipe]>,
- InstrStage<1, [A9_NPipe]>]>,
+ // FIXME: Conservatively assume insufficent alignment.
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1]>,
+ // VLD1x2
+ InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 1]>,
+ // VLD1x3
+ InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 2, 3, 1]>,
+ // VLD1x4
+ InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 2, 3, 3, 1]>,
+ // VLD1u
+ InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 1]>,
+ // VLD1x2u
+ InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 2, 1]>,
+ // VLD1x3u
+ InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 2, 3, 2, 1]>,
+ // VLD1x4u
+ InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 2, 3, 3, 2, 1]>,
+ //
+ // VLD1ln
+ InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [4, 1, 1, 1]>,
+ //
+ // VLD1lnu
+ InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [4, 2, 1, 1, 1, 1]>,
+ //
+ // VLD1dup
+ InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 1]>,
+ //
+ // VLD1dupu
+ InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 2, 1, 1]>,
//
// VLD2
- // FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
- // Extra latency cycles since wbck is 6 cycles
- InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_LSPipe]>,
- InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 1]>,
+ //
+ // VLD2x2
+ InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 4, 3, 4, 1]>,
+ //
+ // VLD2ln
+ InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [4, 4, 1, 1, 1, 1]>,
+ //
+ // VLD2u
+ InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 2, 1, 1, 1]>,
+ //
+ // VLD2x2u
+ InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 4, 3, 4, 2, 1]>,
+ //
+ // VLD2lnu
+ InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [4, 4, 2, 1, 1, 1, 1, 1]>,
+ //
+ // VLD2dup
+ InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 1]>,
+ //
+ // VLD2dupu
+ InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 2, 1, 1]>,
//
// VLD3
- // FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
- // Extra latency cycles since wbck is 6 cycles
- InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_LSPipe]>,
- InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 1]>,
+ //
+ // VLD3ln
+ InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3u
+ InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 2, 1]>,
+ //
+ // VLD3lnu
+ InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1, 1]>,
//
// VLD4
- // FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
- // Extra latency cycles since wbck is 6 cycles
- InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_LSPipe]>,
- InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
- //
- // VST
- // FIXME: We don't model this instruction properly
- InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
- // Extra latency cycles since wbck is 6 cycles
- InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1], 0>,
- InstrStage<1, [A9_LSPipe]>,
- InstrStage<1, [A9_NPipe]>]>,
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 1]>,
+ //
+ // VLD4ln
+ InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4u
+ InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 2, 1]>,
+ //
+ // VLD4lnu
+ InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<5, [A9_NPipe], 0>,
+ InstrStage<5, [A9_LSUnit]>],
+ [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VLD4dup
+ InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 1]>,
+ //
+ // VLD4dupu
+ InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 2, 1, 1]>,
+ //
+ // VST1
+ InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1x2
+ InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST1x3
+ InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST1x4
+ InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1u
+ InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST1x2u
+ InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST1x3u
+ InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST1x4u
+ InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST1ln
+ InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1]>,
+ //
+ // VST1lnu
+ InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1]>,
+ //
+ // VST2
+ InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2x2
+ InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2u
+ InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST2x2u
+ InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST2ln
+ InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 1, 1]>,
+ //
+ // VST2lnu
+ InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1]>,
+ //
+ // VST3
+ InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3u
+ InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST3ln
+ InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2]>,
+ //
+ // VST3lnu
+ InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2]>,
+ //
+ // VST4
+ InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4u
+ InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4ln
+ InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [1, 1, 1, 1, 2, 2]>,
+ //
+ // VST4lnu
+ InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [2, 1, 1, 1, 1, 1, 2, 2]>,
+
//
// Double-register Integer Unary
- InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 2]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2]>,
//
// Quad-register Integer Unary
- InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 2]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2]>,
//
// Double-register Integer Q-Unary
- InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
//
// Quad-register Integer CountQ-Unary
- InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1]>,
//
// Double-register Integer Binary
- InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
//
// Quad-register Integer Binary
- InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
//
// Double-register Integer Subtract
- InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 1]>,
//
// Quad-register Integer Subtract
- InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 1]>,
//
// Double-register Integer Shift
- InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1, 1]>,
//
// Quad-register Integer Shift
- InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1, 1]>,
//
// Double-register Integer Shift (4 cycle)
- InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
//
// Quad-register Integer Shift (4 cycle)
- InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 1, 1]>,
//
// Double-register Integer Binary (4 cycle)
- InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 2]>,
//
// Quad-register Integer Binary (4 cycle)
- InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 2]>,
//
// Double-register Integer Subtract (4 cycle)
- InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 1]>,
//
// Quad-register Integer Subtract (4 cycle)
- InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [4, 2, 1]>,
//
// Double-register Integer Count
- InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 2, 2]>,
//
// Quad-register Integer Count
// Result written in N3, but that is relative to the last cycle of multicycle,
// so we use 4 for those cases
- InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
+ InstrStage<2, [A9_NPipe]>],
+ [4, 2, 2]>,
//
// Double-register Absolute Difference and Accumulate
- InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
//
// Quad-register Absolute Difference and Accumulate
- InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
//
// Double-register Integer Pair Add Long
- InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 1]>,
//
// Quad-register Integer Pair Add Long
- InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 1]>,
//
// Double-register Integer Multiply (.8, .16)
- InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 2, 2]>,
//
// Quad-register Integer Multiply (.8, .16)
- InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 2, 2]>,
//
// Double-register Integer Multiply (.32)
- InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 2, 1]>,
//
// Quad-register Integer Multiply (.32)
- InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
+ InstrStage<4, [A9_NPipe]>],
+ [9, 2, 1]>,
//
// Double-register Integer Multiply-Accumulate (.8, .16)
- InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 3, 2, 2]>,
//
// Double-register Integer Multiply-Accumulate (.32)
- InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 3, 2, 1]>,
//
// Quad-register Integer Multiply-Accumulate (.8, .16)
- InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
+ InstrStage<2, [A9_NPipe]>],
+ [7, 3, 2, 2]>,
//
// Quad-register Integer Multiply-Accumulate (.32)
- InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
+ InstrStage<4, [A9_NPipe]>],
+ [9, 3, 2, 1]>,
+
+ //
+ // Move
+ InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [1,1]>,
//
// Move Immediate
- InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [3]>,
+ InstrStage<1, [A9_NPipe]>],
+ [3]>,
//
// Double-register Permute Move
- InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
- // FIXME: all latencies are arbitrary, no information is available
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe]>], [2, 1]>,
+ InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
//
// Quad-register Permute Move
- // Result written in N2, but that is relative to the last cycle of multicycle,
- // so we use 3 for those cases
- InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
- // FIXME: all latencies are arbitrary, no information is available
- InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [3, 1]>,
+ InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
//
// Integer to Single-precision Move
- InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
- // FIXME: all latencies are arbitrary, no information is available
+ InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [2, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1]>,
//
// Integer to Double-precision Move
- InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
- // FIXME: all latencies are arbitrary, no information is available
+ InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [1, 1, 1]>,
//
// Single-precision to Integer Move
- InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
- // FIXME: all latencies are arbitrary, no information is available
+ InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [2, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1]>,
//
// Double-precision to Integer Move
- InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
- // FIXME: all latencies are arbitrary, no information is available
+ InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 2, 1]>,
//
// Integer to Lane Move
- InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
- // FIXME: all latencies are arbitrary, no information is available
+ InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 1]>,
//
+ // Vector narrow move
+ InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [3, 1]>,
+ //
// Double-register FP Unary
- InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [5, 2]>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2]>,
//
// Quad-register FP Unary
// Result written in N5, but that is relative to the last cycle of multicycle,
// so we use 6 for those cases
- InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [6, 2]>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 2]>,
//
// Double-register FP Binary
// FIXME: We're using this itin for many instructions and [2, 2] here is too
// optimistic.
- InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
- // Extra latency cycles since wbck is 7 cycles
+ InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2, 2]>,
+
+ //
+ // VPADD, etc.
+ InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 1, 1]>,
+ //
+ // Double-register FP VMUL
+ InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [5, 2, 1]>,
//
// Quad-register FP Binary
// Result written in N5, but that is relative to the last cycle of multicycle,
// so we use 6 for those cases
// FIXME: We're using this itin for many instructions and [2, 2] here is too
// optimistic.
- InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
- // Extra latency cycles since wbck is 8 cycles
+ InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 2, 2]>,
+ //
+ // Quad-register FP VMUL
+ InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
+ InstrStage<1, [A9_NPipe]>],
+ [6, 2, 1]>,
//
// Double-register FP Multiple-Accumulate
- InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
+ InstrStage<2, [A9_NPipe]>],
+ [6, 3, 2, 1]>,
//
// Quad-register FP Multiple-Accumulate
// Result written in N9, but that is relative to the last cycle of multicycle,
// so we use 10 for those cases
- InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
+ InstrStage<4, [A9_NPipe]>],
+ [8, 4, 2, 1]>,
//
// Double-register Reciprical Step
- InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
- // Extra latency cycles since wbck is 7 cycles
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
+ InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 10 cycles
+ InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe]>],
+ [9, 2, 2]>,
//
// Quad-register Reciprical Step
- InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
- // Extra latency cycles since wbck is 9 cycles
- InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
+ InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 11 cycles
+ InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe]>],
+ [10, 2, 2]>,
//
// Double-register Permute
- InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 2, 1, 1]>,
//
// Quad-register Permute
// Result written in N2, but that is relative to the last cycle of multicycle,
// so we use 3 for those cases
- InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 3, 1, 1]>,
//
// Quad-register Permute (3 cycle issue)
// Result written in N2, but that is relative to the last cycle of multicycle,
// so we use 4 for those cases
- InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 4, 1, 1]>,
//
// Double-register VEXT
- InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
- // Extra latency cycles since wbck is 7 cycles
+ InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
+ InstrStage<1, [A9_NPipe]>],
+ [2, 1, 1]>,
//
// Quad-register VEXT
- InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
- // Extra latency cycles since wbck is 9 cycles
+ InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ // Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2]>,
//
// VTB
- InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
- InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 2, 1]>,
+ InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
- InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 2, 2, 1]>,
+ InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<2, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
- InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 2, 2, 3, 3, 1]>,
//
// VTBX
- InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
- InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2, 1]>,
+ InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
- InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<2, [A9_NPipe]>],
+ [3, 1, 2, 2, 1]>,
+ InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
- InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<3, [A9_NPipe]>],
+ [4, 1, 2, 2, 3, 1]>,
+ InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+ InstrStage<2, [A9_NPipe]>],
+ [4, 1, 2, 2, 3, 3, 1]>
]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
index 08b560c..c1880a7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td
@@ -19,7 +19,7 @@ def V6_Pipe : FuncUnit; // pipeline
// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual"
//
def ARMV6Itineraries : ProcessorItineraries<
- [V6_Pipe], [
+ [V6_Pipe], [], [
//
// No operand cycles
InstrItinData<IIC_iALUx , [InstrStage<1, [V6_Pipe]>]>,
@@ -30,10 +30,20 @@ def ARMV6Itineraries : ProcessorItineraries<
InstrItinData<IIC_iALUsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
InstrItinData<IIC_iALUsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
//
+ // Bitwise Instructions that produce a result
+ InstrItinData<IIC_iBITi , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iBITr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+ InstrItinData<IIC_iBITsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iBITsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+ //
// Unary Instructions that produce a result
InstrItinData<IIC_iUNAr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
InstrItinData<IIC_iUNAsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
- InstrItinData<IIC_iUNAsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ //
+ // Zero and sign extension instructions
+ InstrItinData<IIC_iEXTr , [InstrStage<1, [V6_Pipe]>], [1, 1]>,
+ InstrItinData<IIC_iEXTAr , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iEXTAsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
//
// Compare instructions
InstrItinData<IIC_iCMPi , [InstrStage<1, [V6_Pipe]>], [2]>,
@@ -41,17 +51,39 @@ def ARMV6Itineraries : ProcessorItineraries<
InstrItinData<IIC_iCMPsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
InstrItinData<IIC_iCMPsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
//
+ // Test instructions
+ InstrItinData<IIC_iTSTi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iTSTr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iTSTsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iTSTsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ //
// Move instructions, unconditional
InstrItinData<IIC_iMOVi , [InstrStage<1, [V6_Pipe]>], [2]>,
InstrItinData<IIC_iMOVr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
InstrItinData<IIC_iMOVsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+ InstrItinData<IIC_iMOVix2 , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3]>,
+ InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [5]>,
//
// Move instructions, conditional
InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>,
InstrItinData<IIC_iCMOVr , [InstrStage<1, [V6_Pipe]>], [3, 2]>,
InstrItinData<IIC_iCMOVsi , [InstrStage<1, [V6_Pipe]>], [3, 1]>,
InstrItinData<IIC_iCMOVsr , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iCMOVix2 , [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [4]>,
+ //
+ // MVN instructions
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [V6_Pipe]>], [2]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
// Integer multiply pipeline
//
@@ -65,50 +97,90 @@ def ARMV6Itineraries : ProcessorItineraries<
// Integer load pipeline
//
// Immediate offset
- InstrItinData<IIC_iLoadi , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+ InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
//
// Register offset
- InstrItinData<IIC_iLoadr , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
//
// Scaled register offset, issues over 2 cycles
- InstrItinData<IIC_iLoadsi , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_si, [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
//
// Immediate offset with update
- InstrItinData<IIC_iLoadiu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu, [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
//
// Register offset with update
- InstrItinData<IIC_iLoadru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru, [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
//
// Scaled register offset with update, issues over 2 cycles
- InstrItinData<IIC_iLoadsiu , [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+ InstrItinData<IIC_iLoad_siu, [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+
+ //
+ // Load multiple, def is the 5th operand.
+ InstrItinData<IIC_iLoad_m , [InstrStage<3, [V6_Pipe]>], [1, 1, 1, 1, 4]>,
+ //
+ // Load multiple + update, defs are the 1st and 5th operands.
+ InstrItinData<IIC_iLoad_mu , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 1, 4]>,
+ //
+ // Load multiple plus branch
+ InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>,
+
+ //
+ // iLoadi + iALUr for t2LDRpci_pic.
+ InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [3, 1]>,
//
- // Load multiple
- InstrItinData<IIC_iLoadm , [InstrStage<3, [V6_Pipe]>]>,
+ // Pop, def is the 3rd operand.
+ InstrItinData<IIC_iPop , [InstrStage<3, [V6_Pipe]>], [1, 1, 4]>,
+ //
+ // Pop + branch, def is the 3rd operand.
+ InstrItinData<IIC_iPop_Br, [InstrStage<3, [V6_Pipe]>,
+ InstrStage<1, [V6_Pipe]>], [1, 2, 4]>,
// Integer store pipeline
//
// Immediate offset
- InstrItinData<IIC_iStorei , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_bh_i, [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+ InstrItinData<IIC_iStore_d_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
//
// Register offset
- InstrItinData<IIC_iStorer , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
-
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r, [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
//
// Scaled register offset, issues over 2 cycles
- InstrItinData<IIC_iStoresi , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_si, [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
//
// Immediate offset with update
- InstrItinData<IIC_iStoreiu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_iu, [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
//
// Register offset with update
- InstrItinData<IIC_iStoreru , [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
//
// Scaled register offset with update, issues over 2 cycles
- InstrItinData<IIC_iStoresiu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
//
// Store multiple
- InstrItinData<IIC_iStorem , [InstrStage<3, [V6_Pipe]>]>,
+ InstrItinData<IIC_iStore_m , [InstrStage<3, [V6_Pipe]>]>,
+ //
+ // Store multiple + update
+ InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
// Branch
//
@@ -183,6 +255,18 @@ def ARMV6Itineraries : ProcessorItineraries<
// Double-precision FP SQRT
InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
//
+ // Integer to Single-precision Move
+ InstrItinData<IIC_fpMOVIS, [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+ //
+ // Integer to Double-precision Move
+ InstrItinData<IIC_fpMOVID, [InstrStage<1, [V6_Pipe]>], [10, 1, 1]>,
+ //
+ // Single-precision to Integer Move
+ InstrItinData<IIC_fpMOVSI, [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+ //
+ // Double-precision to Integer Move
+ InstrItinData<IIC_fpMOVDI, [InstrStage<1, [V6_Pipe]>], [10, 10, 1]>,
+ //
// Single-precision FP Load
InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
//
@@ -190,7 +274,10 @@ def ARMV6Itineraries : ProcessorItineraries<
InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
//
// FP Load Multiple
- InstrItinData<IIC_fpLoadm , [InstrStage<3, [V6_Pipe]>]>,
+ InstrItinData<IIC_fpLoad_m , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 5]>,
+ //
+ // FP Load Multiple + update
+ InstrItinData<IIC_fpLoad_mu, [InstrStage<3, [V6_Pipe]>], [3, 2, 1, 1, 5]>,
//
// Single-precision FP Store
InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
@@ -200,5 +287,8 @@ def ARMV6Itineraries : ProcessorItineraries<
InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
//
// FP Store Multiple
- InstrItinData<IIC_fpStorem , [InstrStage<3, [V6_Pipe]>]>
+ InstrItinData<IIC_fpStore_m, [InstrStage<3, [V6_Pipe]>], [2, 2, 2, 2]>,
+ //
+ // FP Store Multiple + update
+ InstrItinData<IIC_fpStore_mu,[InstrStage<3, [V6_Pipe]>], [3, 2, 2, 2, 2]>
]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index a289407..2b9202b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -29,10 +29,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
// Do repeated 4-byte loads and stores. To be improved.
// This requires 4-byte alignment.
if ((Align & 3) != 0)
@@ -66,7 +64,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
- SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0);
+ SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+ false, 0);
TFOps[i] = Loads[i].getValue(1);
SrcOff += VTSize;
}
@@ -77,7 +76,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
DAG.getConstant(DstOff, MVT::i32)),
- DstSV, DstSVOff + DstOff, isVolatile, false, 0);
+ DstPtrInfo.getWithOffset(DstOff),
+ isVolatile, false, 0);
DstOff += VTSize;
}
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
@@ -103,7 +103,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
- SrcSV, SrcSVOff + SrcOff, false, false, 0);
+ SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
TFOps[i] = Loads[i].getValue(1);
++i;
SrcOff += VTSize;
@@ -125,7 +125,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
DAG.getConstant(DstOff, MVT::i32)),
- DstSV, DstSVOff + DstOff, false, false, 0);
+ DstPtrInfo.getWithOffset(DstOff), false, false, 0);
++i;
DstOff += VTSize;
BytesLeft -= VTSize;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
index d7d00c2..7533690 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -33,10 +33,8 @@ public:
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const;
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
};
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index cb539f4..0bd740c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -13,6 +13,7 @@
#include "ARMSubtarget.h"
#include "ARMGenSubtarget.inc"
+#include "ARMBaseRegisterInfo.h"
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -24,45 +25,52 @@ ReserveR9("arm-reserve-r9", cl::Hidden,
cl::desc("Reserve R9, making it unavailable as GPR"));
static cl::opt<bool>
-UseMOVT("arm-use-movt",
- cl::init(true), cl::Hidden);
+DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+StrictAlign("arm-strict-align", cl::Hidden,
+ cl::desc("Disallow all unaligned memory accesses"));
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
bool isT)
: ARMArchVersion(V4)
+ , ARMProcFamily(Others)
, ARMFPUType(None)
, UseNEONForSinglePrecisionFP(false)
- , SlowVMLx(false)
+ , SlowFPVMLx(false)
, SlowFPBrcc(false)
, IsThumb(isT)
, ThumbMode(Thumb1)
, NoARM(false)
, PostRAScheduler(false)
, IsR9Reserved(ReserveR9)
- , UseMovt(UseMOVT)
+ , UseMovt(false)
, HasFP16(false)
+ , HasD16(false)
, HasHardwareDivide(false)
, HasT2ExtractPack(false)
, HasDataBarrier(false)
, Pref32BitThumb(false)
+ , HasMPExtension(false)
, FPOnlySP(false)
+ , AllowsUnalignedMem(false)
, stackAlignment(4)
, CPUString("generic")
- , TargetType(isELF) // Default to ELF unless otherwise specified.
+ , TargetTriple(TT)
, TargetABI(ARM_ABI_APCS) {
- // default to soft float ABI
+ // Default to soft float ABI
if (FloatABIType == FloatABI::Default)
FloatABIType = FloatABI::Soft;
// Determine default and user specified characteristics
- // Parse features string.
- CPUString = ParseSubtargetFeatures(FS, CPUString);
-
// When no arch is specified either by CPU or by attributes, make the default
// ARMv4T.
- if (CPUString == "generic" && (FS.empty() || FS == "generic"))
+ const char *ARMArchFeature = "";
+ if (CPUString == "generic" && (FS.empty() || FS == "generic")) {
ARMArchVersion = V4T;
+ ARMArchFeature = ",+v4t";
+ }
// Set the boolean corresponding to the current target triple, or the default
// if one cannot be determined, to true.
@@ -80,47 +88,78 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
unsigned SubVer = TT[Idx];
if (SubVer >= '7' && SubVer <= '9') {
ARMArchVersion = V7A;
- if (Len >= Idx+2 && TT[Idx+1] == 'm')
+ ARMArchFeature = ",+v7a";
+ if (Len >= Idx+2 && TT[Idx+1] == 'm') {
ARMArchVersion = V7M;
+ ARMArchFeature = ",+v7m";
+ }
} else if (SubVer == '6') {
ARMArchVersion = V6;
- if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
+ ARMArchFeature = ",+v6";
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') {
ARMArchVersion = V6T2;
+ ARMArchFeature = ",+v6t2";
+ }
} else if (SubVer == '5') {
ARMArchVersion = V5T;
- if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+ ARMArchFeature = ",+v5t";
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') {
ARMArchVersion = V5TE;
+ ARMArchFeature = ",+v5te";
+ }
} else if (SubVer == '4') {
- if (Len >= Idx+2 && TT[Idx+1] == 't')
+ if (Len >= Idx+2 && TT[Idx+1] == 't') {
ARMArchVersion = V4T;
- else
+ ARMArchFeature = ",+v4t";
+ } else {
ARMArchVersion = V4;
+ ARMArchFeature = "";
+ }
}
}
+ if (TT.find("eabi") != std::string::npos)
+ TargetABI = ARM_ABI_AAPCS;
+
+ // Parse features string. If the first entry in FS (the CPU) is missing,
+ // insert the architecture feature derived from the target triple. This is
+ // important for setting features that are implied based on the architecture
+ // version.
+ std::string FSWithArch;
+ if (FS.empty())
+ FSWithArch = std::string(ARMArchFeature);
+ else if (FS.find(',') == 0)
+ FSWithArch = std::string(ARMArchFeature) + FS;
+ else
+ FSWithArch = FS;
+ CPUString = ParseSubtargetFeatures(FSWithArch, CPUString);
+
+ // After parsing Itineraries, set ItinData.IssueWidth.
+ computeIssueWidth();
+
// Thumb2 implies at least V6T2.
if (ARMArchVersion >= V6T2)
ThumbMode = Thumb2;
else if (ThumbMode >= Thumb2)
ARMArchVersion = V6T2;
- if (Len >= 10) {
- if (TT.find("-darwin") != std::string::npos)
- // arm-darwin
- TargetType = isDarwin;
- }
-
- if (TT.find("eabi") != std::string::npos)
- TargetABI = ARM_ABI_AAPCS;
-
if (isAAPCS_ABI())
stackAlignment = 8;
- if (isTargetDarwin())
+ if (!isTargetDarwin())
+ UseMovt = hasV6T2Ops();
+ else {
IsR9Reserved = ReserveR9 | (ARMArchVersion < V6);
+ UseMovt = DarwinUseMOVT && hasV6T2Ops();
+ }
if (!isThumb() || hasThumb2())
PostRAScheduler = true;
+
+ // v6+ may or may not support unaligned mem access depending on the system
+ // configuration.
+ if (!StrictAlign && hasV6Ops() && isTargetDarwin())
+ AllowsUnalignedMem = true;
}
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
@@ -163,7 +202,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
// through a stub.
if (!isDecl && !GV->isWeakForLinker())
return false;
-
+
// Unless we have a symbol with hidden visibility, we have to go through a
// normal $non_lazy_ptr stub because this symbol might be resolved late.
if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
@@ -174,6 +213,34 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
return false;
}
+unsigned ARMSubtarget::getMispredictionPenalty() const {
+ // If we have a reasonable estimate of the pipeline depth, then we can
+ // estimate the penalty of a misprediction based on that.
+ if (isCortexA8())
+ return 13;
+ else if (isCortexA9())
+ return 8;
+
+ // Otherwise, just return a sensible default.
+ return 10;
+}
+
+void ARMSubtarget::computeIssueWidth() {
+ unsigned allStage1Units = 0;
+ for (const InstrItinerary *itin = InstrItins.Itineraries;
+ itin->FirstStage != ~0U; ++itin) {
+ const InstrStage *IS = InstrItins.Stages + itin->FirstStage;
+ allStage1Units |= IS->getUnits();
+ }
+ InstrItins.IssueWidth = 0;
+ while (allStage1Units) {
+ ++InstrItins.IssueWidth;
+ // clear the lowest bit
+ allStage1Units ^= allStage1Units & ~(allStage1Units - 1);
+ }
+ assert(InstrItins.IssueWidth <= 2 && "itinerary bug, too many stage 1 units");
+}
+
bool ARMSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtarget::AntiDepBreakMode& Mode,
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 67e5803..76c1c3f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -17,7 +17,7 @@
#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtarget.h"
-#include "ARMBaseRegisterInfo.h"
+#include "llvm/ADT/Triple.h"
#include <string>
namespace llvm {
@@ -29,6 +29,10 @@ protected:
V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
};
+ enum ARMProcFamilyEnum {
+ Others, CortexA8, CortexA9
+ };
+
enum ARMFPEnum {
None, VFPv2, VFPv3, NEON
};
@@ -42,6 +46,9 @@ protected:
/// V6, V6T2, V7A, V7M.
ARMArchEnum ARMArchVersion;
+ /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
+ ARMProcFamilyEnum ARMProcFamily;
+
/// ARMFPUType - Floating Point Unit type.
ARMFPEnum ARMFPUType;
@@ -50,9 +57,9 @@ protected:
/// determine if NEON should actually be used.
bool UseNEONForSinglePrecisionFP;
- /// SlowVMLx - If the VFP2 instructions are available, indicates whether
- /// the VML[AS] instructions are slow (if so, don't use them).
- bool SlowVMLx;
+ /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
+ /// whether the FP VML[AS] instructions are slow (if so, don't use them).
+ bool SlowFPVMLx;
/// SlowFPBrcc - True if floating point compare + branch is slow.
bool SlowFPBrcc;
@@ -80,6 +87,10 @@ protected:
/// only so far)
bool HasFP16;
+ /// HasD16 - True if subtarget is limited to 16 double precision
+ /// FP registers for VFPv3.
+ bool HasD16;
+
/// HasHardwareDivide - True if subtarget supports [su]div
bool HasHardwareDivide;
@@ -95,10 +106,19 @@ protected:
/// over 16-bit ones.
bool Pref32BitThumb;
+ /// HasMPExtension - True if the subtarget supports Multiprocessing
+ /// extension (ARMv7 only).
+ bool HasMPExtension;
+
/// FPOnlySP - If true, the floating point unit only supports single
/// precision.
bool FPOnlySP;
+ /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+ /// accesses for some types. For details, see
+ /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
+ bool AllowsUnalignedMem;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -106,6 +126,9 @@ protected:
/// CPUString - String name of used CPU.
std::string CPUString;
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
@@ -136,6 +159,8 @@ protected:
std::string ParseSubtargetFeatures(const std::string &FS,
const std::string &CPU);
+ void computeIssueWidth();
+
bool hasV4TOps() const { return ARMArchVersion >= V4T; }
bool hasV5TOps() const { return ARMArchVersion >= V5T; }
bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
@@ -143,6 +168,9 @@ protected:
bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
bool hasV7Ops() const { return ARMArchVersion >= V7A; }
+ bool isCortexA8() const { return ARMProcFamily == CortexA8; }
+ bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+
bool hasARMOps() const { return !NoARM; }
bool hasVFP2() const { return ARMFPUType >= VFPv2; }
@@ -153,15 +181,17 @@ protected:
bool hasDivide() const { return HasHardwareDivide; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
- bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
+ bool useFPVMLx() const { return !SlowFPVMLx; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
bool isFPOnlySP() const { return FPOnlySP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
+ bool hasMPExtension() const { return HasMPExtension; }
bool hasFP16() const { return HasFP16; }
+ bool hasD16() const { return HasD16; }
- bool isTargetDarwin() const { return TargetType == isDarwin; }
- bool isTargetELF() const { return TargetType == isELF; }
+ bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
+ bool isTargetELF() const { return !isTargetDarwin(); }
bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
@@ -175,8 +205,12 @@ protected:
bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+ bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+
const std::string & getCPUString() const { return CPUString; }
+ unsigned getMispredictionPenalty() const;
+
/// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtarget::AntiDepBreakMode& Mode,
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 30ff827..0ee773b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -12,15 +12,18 @@
#include "ARMTargetMachine.h"
#include "ARMMCAsmInfo.h"
-#include "ARMFrameInfo.h"
+#include "ARMFrameLowering.h"
#include "ARM.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
+static cl::opt<bool>ExpandMLx("expand-fp-mlx", cl::init(false), cl::Hidden);
+
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
@@ -31,6 +34,26 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
}
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+ MCContext &Ctx, TargetAsmBackend &TAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ switch (Triple(TT).getOS()) {
+ case Triple::Darwin:
+ return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
+ case Triple::MinGW32:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ llvm_unreachable("ARM does not support Windows COFF format");
+ return NULL;
+ default:
+ return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
+ }
+}
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -39,6 +62,19 @@ extern "C" void LLVMInitializeARMTarget() {
// Register the target asm info.
RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo);
RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
+ TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend);
+ TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer);
+ TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer);
+
}
/// TargetMachine ctor - Create an ARM architecture model.
@@ -49,9 +85,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
bool isThumb)
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS, isThumb),
- FrameInfo(Subtarget),
JITInfo(),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ InstrItins(Subtarget.getInstrItineraryData())
+{
DefRelocModel = getRelocationModel();
}
@@ -59,12 +95,14 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
- std::string("e-p:32:32-f64:32:32-i64:32:32-"
+ std::string("e-p:32:32-f64:32:64-i64:32:64-"
"v128:32:128-v64:32:64-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"v128:64:128-v64:64:64-n32")),
+ ELFWriterInfo(*this),
TLInfo(*this),
- TSInfo(*this) {
+ TSInfo(*this),
+ FrameLowering(Subtarget) {
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
"support ARM mode execution!");
@@ -77,14 +115,18 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
DataLayout(Subtarget.isAPCS_ABI() ?
- std::string("e-p:32:32-f64:32:32-i64:32:32-"
+ std::string("e-p:32:32-f64:32:64-i64:32:64-"
"i16:16:32-i8:8:32-i1:8:32-"
"v128:32:128-v64:32:64-a:0:32-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"i16:16:32-i8:8:32-i1:8:32-"
"v128:64:128-v64:64:64-a:0:32-n32")),
+ ELFWriterInfo(*this),
TLInfo(*this),
- TSInfo(*this) {
+ TSInfo(*this),
+ FrameLowering(Subtarget.hasThumb2()
+ ? new ARMFrameLowering(Subtarget)
+ : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
}
// Pass Pipeline Configuration
@@ -104,12 +146,12 @@ bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (Subtarget.hasNEON())
- PM.add(createNEONPreAllocPass());
-
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true));
+ if (ExpandMLx &&
+ OptLevel != CodeGenOpt::None && Subtarget.hasVFP2())
+ PM.add(createMLxExpansionPass());
return true;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
index 17e5425..e0aa149 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -14,16 +14,19 @@
#ifndef ARMTARGETMACHINE_H
#define ARMTARGETMACHINE_H
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
#include "ARMInstrInfo.h"
-#include "ARMFrameInfo.h"
+#include "ARMELFWriterInfo.h"
+#include "ARMFrameLowering.h"
#include "ARMJITInfo.h"
#include "ARMSubtarget.h"
#include "ARMISelLowering.h"
#include "ARMSelectionDAGInfo.h"
#include "Thumb1InstrInfo.h"
+#include "Thumb1FrameLowering.h"
#include "Thumb2InstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/ADT/OwningPtr.h"
namespace llvm {
@@ -31,9 +34,7 @@ namespace llvm {
class ARMBaseTargetMachine : public LLVMTargetMachine {
protected:
ARMSubtarget Subtarget;
-
private:
- ARMFrameInfo FrameInfo;
ARMJITInfo JITInfo;
InstrItineraryData InstrItins;
Reloc::Model DefRelocModel; // Reloc model before it's overridden.
@@ -42,11 +43,10 @@ public:
ARMBaseTargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool isThumb);
- virtual const ARMFrameInfo *getFrameInfo() const { return &FrameInfo; }
virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
- virtual const InstrItineraryData getInstrItineraryData() const {
- return InstrItins;
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
}
// Pass Pipeline Configuration
@@ -64,9 +64,11 @@ public:
class ARMTargetMachine : public ARMBaseTargetMachine {
ARMInstrInfo InstrInfo;
const TargetData DataLayout; // Calculates type size & alignment
+ ARMELFWriterInfo ELFWriterInfo;
ARMTargetLowering TLInfo;
ARMSelectionDAGInfo TSInfo;
-public:
+ ARMFrameLowering FrameLowering;
+ public:
ARMTargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
@@ -81,9 +83,15 @@ public:
virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
+ virtual const ARMFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
};
/// ThumbTargetMachine - Thumb target machine.
@@ -94,8 +102,11 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
// Either Thumb1InstrInfo or Thumb2InstrInfo.
OwningPtr<ARMBaseInstrInfo> InstrInfo;
const TargetData DataLayout; // Calculates type size & alignment
+ ARMELFWriterInfo ELFWriterInfo;
ARMTargetLowering TLInfo;
ARMSelectionDAGInfo TSInfo;
+ // Either Thumb1FrameLowering or ARMFrameLowering.
+ OwningPtr<ARMFrameLowering> FrameLowering;
public:
ThumbTargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
@@ -117,7 +128,14 @@ public:
virtual const ARMBaseInstrInfo *getInstrInfo() const {
return InstrInfo.get();
}
+ /// returns either Thumb1FrameLowering or ARMFrameLowering
+ virtual const ARMFrameLowering *getFrameLowering() const {
+ return FrameLowering.get();
+ }
virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index 091a3b3..7535da5 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -12,6 +12,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
using namespace dwarf;
@@ -26,14 +27,20 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
StaticCtorSection =
- getContext().getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY,
- MCSectionELF::SHF_WRITE |
- MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
SectionKind::getDataRel());
StaticDtorSection =
- getContext().getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY,
- MCSectionELF::SHF_WRITE |
- MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
SectionKind::getDataRel());
}
+
+ AttributesSection =
+ getContext().getELFSection(".ARM.attributes",
+ ELF::SHT_ARM_ATTRIBUTES,
+ 0,
+ SectionKind::getMetadata());
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
index 097fc2c..c6a7261 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -18,10 +18,19 @@ class MCContext;
class TargetMachine;
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+protected:
+ const MCSection *AttributesSection;
public:
- ARMElfTargetObjectFile() : TargetLoweringObjectFileELF() {}
+ ARMElfTargetObjectFile() :
+ TargetLoweringObjectFileELF(),
+ AttributesSection(NULL)
+ {}
virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ virtual const MCSection *getAttributesSection() const {
+ return AttributesSection;
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
index f859d1b..2428ce1 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -10,10 +10,6 @@
#include "ARM.h"
#include "ARMTargetMachine.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -22,119 +18,135 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+
#include <string>
#include <map>
using namespace llvm;
namespace {
-
- class ARMBaseAsmLexer : public TargetAsmLexer {
- const MCAsmInfo &AsmInfo;
-
- const AsmToken &lexDefinite() {
- return getLexer()->Lex();
- }
-
- AsmToken LexTokenUAL();
- protected:
- typedef std::map <std::string, unsigned> rmap_ty;
-
- rmap_ty RegisterMap;
-
- void InitRegisterMap(const TargetRegisterInfo *info) {
- unsigned numRegs = info->getNumRegs();
-
- for (unsigned i = 0; i < numRegs; ++i) {
- const char *regName = info->getName(i);
- if (regName)
- RegisterMap[regName] = i;
- }
- }
-
- unsigned MatchRegisterName(StringRef Name) {
- rmap_ty::iterator iter = RegisterMap.find(Name.str());
- if (iter != RegisterMap.end())
- return iter->second;
- else
- return 0;
- }
-
- AsmToken LexToken() {
- if (!Lexer) {
- SetError(SMLoc(), "No MCAsmLexer installed");
- return AsmToken(AsmToken::Error, "", 0);
- }
-
- switch (AsmInfo.getAssemblerDialect()) {
- default:
- SetError(SMLoc(), "Unhandled dialect");
- return AsmToken(AsmToken::Error, "", 0);
- case 0:
- return LexTokenUAL();
- }
- }
- public:
- ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
- : TargetAsmLexer(T), AsmInfo(MAI) {
+
+class ARMBaseAsmLexer : public TargetAsmLexer {
+ const MCAsmInfo &AsmInfo;
+
+ const AsmToken &lexDefinite() {
+ return getLexer()->Lex();
+ }
+
+ AsmToken LexTokenUAL();
+protected:
+ typedef std::map <std::string, unsigned> rmap_ty;
+
+ rmap_ty RegisterMap;
+
+ void InitRegisterMap(const TargetRegisterInfo *info) {
+ unsigned numRegs = info->getNumRegs();
+
+ for (unsigned i = 0; i < numRegs; ++i) {
+ const char *regName = info->getName(i);
+ if (regName)
+ RegisterMap[regName] = i;
}
- };
-
- class ARMAsmLexer : public ARMBaseAsmLexer {
- public:
- ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
- : ARMBaseAsmLexer(T, MAI) {
- std::string tripleString("arm-unknown-unknown");
- std::string featureString;
- OwningPtr<const TargetMachine>
- targetMachine(T.createTargetMachine(tripleString, featureString));
- InitRegisterMap(targetMachine->getRegisterInfo());
+ }
+
+ unsigned MatchRegisterName(StringRef Name) {
+ rmap_ty::iterator iter = RegisterMap.find(Name.str());
+ if (iter != RegisterMap.end())
+ return iter->second;
+ else
+ return 0;
+ }
+
+ AsmToken LexToken() {
+ if (!Lexer) {
+ SetError(SMLoc(), "No MCAsmLexer installed");
+ return AsmToken(AsmToken::Error, "", 0);
}
- };
-
- class ThumbAsmLexer : public ARMBaseAsmLexer {
- public:
- ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
- : ARMBaseAsmLexer(T, MAI) {
- std::string tripleString("thumb-unknown-unknown");
- std::string featureString;
- OwningPtr<const TargetMachine>
- targetMachine(T.createTargetMachine(tripleString, featureString));
- InitRegisterMap(targetMachine->getRegisterInfo());
+
+ switch (AsmInfo.getAssemblerDialect()) {
+ default:
+ SetError(SMLoc(), "Unhandled dialect");
+ return AsmToken(AsmToken::Error, "", 0);
+ case 0:
+ return LexTokenUAL();
}
- };
-}
+ }
+public:
+ ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : TargetAsmLexer(T), AsmInfo(MAI) {
+ }
+};
+
+class ARMAsmLexer : public ARMBaseAsmLexer {
+public:
+ ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : ARMBaseAsmLexer(T, MAI) {
+ std::string tripleString("arm-unknown-unknown");
+ std::string featureString;
+ OwningPtr<const TargetMachine>
+ targetMachine(T.createTargetMachine(tripleString, featureString));
+ InitRegisterMap(targetMachine->getRegisterInfo());
+ }
+};
+
+class ThumbAsmLexer : public ARMBaseAsmLexer {
+public:
+ ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : ARMBaseAsmLexer(T, MAI) {
+ std::string tripleString("thumb-unknown-unknown");
+ std::string featureString;
+ OwningPtr<const TargetMachine>
+ targetMachine(T.createTargetMachine(tripleString, featureString));
+ InitRegisterMap(targetMachine->getRegisterInfo());
+ }
+};
+
+} // end anonymous namespace
AsmToken ARMBaseAsmLexer::LexTokenUAL() {
const AsmToken &lexedToken = lexDefinite();
-
+
switch (lexedToken.getKind()) {
- default:
- return AsmToken(lexedToken);
+ default: break;
case AsmToken::Error:
SetError(Lexer->getErrLoc(), Lexer->getErr());
- return AsmToken(lexedToken);
- case AsmToken::Identifier:
- {
+ break;
+ case AsmToken::Identifier: {
std::string upperCase = lexedToken.getString().str();
std::string lowerCase = LowercaseString(upperCase);
StringRef lowerRef(lowerCase);
-
+
unsigned regID = MatchRegisterName(lowerRef);
-
- if (regID) {
+ // Check for register aliases.
+ // r13 -> sp
+ // r14 -> lr
+ // r15 -> pc
+ // ip -> r12
+ // FIXME: Some assemblers support lots of others. Do we want them all?
+ if (!regID) {
+ regID = StringSwitch<unsigned>(lowerCase)
+ .Case("r13", ARM::SP)
+ .Case("r14", ARM::LR)
+ .Case("r15", ARM::PC)
+ .Case("ip", ARM::R12)
+ .Default(0);
+ }
+
+ if (regID)
return AsmToken(AsmToken::Register,
lexedToken.getString(),
static_cast<int64_t>(regID));
- } else {
- return AsmToken(lexedToken);
- }
}
}
+
+ return AsmToken(lexedToken);
}
extern "C" void LLVMInitializeARMAsmLexer() {
RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget);
RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
}
-
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 75e2a73..129af20 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -8,28 +8,28 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMCExpr.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMSubtarget.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmParser.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
using namespace llvm;
-namespace {
-struct ARMOperand;
-
-// The shift types for register controlled shifts in arm memory addressing
+/// Shift types used for register controlled shifts in ARM memory addressing.
enum ShiftType {
Lsl,
Lsr,
@@ -38,24 +38,30 @@ enum ShiftType {
Rrx
};
+namespace {
+
+class ARMOperand;
+
class ARMAsmParser : public TargetAsmParser {
MCAsmParser &Parser;
TargetMachine &TM;
-private:
MCAsmParser &getParser() const { return Parser; }
-
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
-
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
- bool MaybeParseRegister(OwningPtr<ARMOperand> &Op, bool ParseWriteBack);
+ int TryParseRegister();
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+ bool ParsePrefix(ARMMCExpr::VariantKind &RefKind);
+ const MCExpr *ApplyPrefixToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant);
- bool ParseRegisterList(OwningPtr<ARMOperand> &Op);
-
- bool ParseMemory(OwningPtr<ARMOperand> &Op);
bool ParseMemoryOffsetReg(bool &Negative,
bool &OffsetRegShifted,
@@ -65,70 +71,76 @@ private:
bool &OffsetIsReg,
int &OffsetRegNum,
SMLoc &E);
-
bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
-
- bool ParseOperand(OwningPtr<ARMOperand> &Op);
-
bool ParseDirectiveWord(unsigned Size, SMLoc L);
-
bool ParseDirectiveThumb(SMLoc L);
-
bool ParseDirectiveThumbFunc(SMLoc L);
-
bool ParseDirectiveCode(SMLoc L);
-
bool ParseDirectiveSyntax(SMLoc L);
- bool MatchInstruction(SMLoc IDLoc,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst) {
- if (!MatchInstructionImpl(Operands, Inst))
- return false;
-
- // FIXME: We should give nicer diagnostics about the exact failure.
- Error(IDLoc, "unrecognized instruction");
-
- return true;
- }
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
+ void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode);
/// @name Auto-generated Match Functions
/// {
- unsigned ComputeAvailableFeatures(const ARMSubtarget *Subtarget) const;
-
- bool MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>
- &Operands,
- MCInst &Inst);
+#define GET_ASSEMBLER_HEADER
+#include "ARMGenAsmMatcher.inc"
/// }
+ OperandMatchResultTy tryParseCoprocNumOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseCoprocRegOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseMemBarrierOptOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseProcIFlagsOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseMSRMaskOperand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
public:
ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
- : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
+ : TargetAsmParser(T), Parser(_Parser), TM(_TM) {
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(
+ &TM.getSubtarget<ARMSubtarget>()));
+ }
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
virtual bool ParseDirective(AsmToken DirectiveID);
};
-
+} // end anonymous namespace
+
+namespace {
+
/// ARMOperand - Instances of this class represent a parsed ARM machine
/// instruction.
-struct ARMOperand : public MCParsedAsmOperand {
-private:
- ARMOperand() {}
-public:
+class ARMOperand : public MCParsedAsmOperand {
enum KindTy {
CondCode,
+ CCOut,
+ CoprocNum,
+ CoprocReg,
Immediate,
+ MemBarrierOpt,
Memory,
+ MSRMask,
+ ProcIFlags,
Register,
+ RegisterList,
+ DPRRegisterList,
+ SPRRegisterList,
Token
} Kind;
SMLoc StartLoc, EndLoc;
+ SmallVector<unsigned, 8> Registers;
union {
struct {
@@ -136,40 +148,54 @@ public:
} CC;
struct {
+ ARM_MB::MemBOpt Val;
+ } MBOpt;
+
+ struct {
+ unsigned Val;
+ } Cop;
+
+ struct {
+ ARM_PROC::IFlags Val;
+ } IFlags;
+
+ struct {
+ unsigned Val;
+ } MMask;
+
+ struct {
const char *Data;
unsigned Length;
} Tok;
struct {
unsigned RegNum;
- bool Writeback;
} Reg;
struct {
const MCExpr *Val;
} Imm;
-
- // This is for all forms of ARM address expressions
+
+ /// Combined record for all forms of ARM address expressions.
struct {
unsigned BaseRegNum;
- unsigned OffsetRegNum; // used when OffsetIsReg is true
- const MCExpr *Offset; // used when OffsetIsReg is false
- const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
- enum ShiftType ShiftType; // used when OffsetRegShifted is true
- unsigned
- OffsetRegShifted : 1, // only used when OffsetIsReg is true
- Preindexed : 1,
- Postindexed : 1,
- OffsetIsReg : 1,
- Negative : 1, // only used when OffsetIsReg is true
- Writeback : 1;
+ union {
+ unsigned RegNum; ///< Offset register num, when OffsetIsReg.
+ const MCExpr *Value; ///< Offset value, when !OffsetIsReg.
+ } Offset;
+ const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
+ enum ShiftType ShiftType; // used when OffsetRegShifted is true
+ unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true
+ unsigned Preindexed : 1;
+ unsigned Postindexed : 1;
+ unsigned OffsetIsReg : 1;
+ unsigned Negative : 1; // only used when OffsetIsReg is true
+ unsigned Writeback : 1;
} Mem;
-
};
-
- //ARMOperand(KindTy K, SMLoc S, SMLoc E)
- // : Kind(K), StartLoc(S), EndLoc(E) {}
-
+
+ ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
Kind = o.Kind;
StartLoc = o.StartLoc;
@@ -181,18 +207,36 @@ public:
case Token:
Tok = o.Tok;
break;
+ case CCOut:
case Register:
Reg = o.Reg;
break;
+ case RegisterList:
+ case DPRRegisterList:
+ case SPRRegisterList:
+ Registers = o.Registers;
+ break;
+ case CoprocNum:
+ case CoprocReg:
+ Cop = o.Cop;
+ break;
case Immediate:
Imm = o.Imm;
break;
+ case MemBarrierOpt:
+ MBOpt = o.MBOpt;
+ break;
case Memory:
Mem = o.Mem;
break;
+ case MSRMask:
+ MMask = o.MMask;
+ break;
+ case ProcIFlags:
+ IFlags = o.IFlags;
}
}
-
+
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
@@ -203,32 +247,129 @@ public:
return CC.Val;
}
+ unsigned getCoproc() const {
+ assert((Kind == CoprocNum || Kind == CoprocReg) && "Invalid access!");
+ return Cop.Val;
+ }
+
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
}
unsigned getReg() const {
- assert(Kind == Register && "Invalid access!");
+ assert((Kind == Register || Kind == CCOut) && "Invalid access!");
return Reg.RegNum;
}
+ const SmallVectorImpl<unsigned> &getRegList() const {
+ assert((Kind == RegisterList || Kind == DPRRegisterList ||
+ Kind == SPRRegisterList) && "Invalid access!");
+ return Registers;
+ }
+
const MCExpr *getImm() const {
assert(Kind == Immediate && "Invalid access!");
return Imm.Val;
}
- bool isCondCode() const { return Kind == CondCode; }
+ ARM_MB::MemBOpt getMemBarrierOpt() const {
+ assert(Kind == MemBarrierOpt && "Invalid access!");
+ return MBOpt.Val;
+ }
- bool isImm() const { return Kind == Immediate; }
+ ARM_PROC::IFlags getProcIFlags() const {
+ assert(Kind == ProcIFlags && "Invalid access!");
+ return IFlags.Val;
+ }
+
+ unsigned getMSRMask() const {
+ assert(Kind == MSRMask && "Invalid access!");
+ return MMask.Val;
+ }
+
+ /// @name Memory Operand Accessors
+ /// @{
+
+ unsigned getMemBaseRegNum() const {
+ return Mem.BaseRegNum;
+ }
+ unsigned getMemOffsetRegNum() const {
+ assert(Mem.OffsetIsReg && "Invalid access!");
+ return Mem.Offset.RegNum;
+ }
+ const MCExpr *getMemOffset() const {
+ assert(!Mem.OffsetIsReg && "Invalid access!");
+ return Mem.Offset.Value;
+ }
+ unsigned getMemOffsetRegShifted() const {
+ assert(Mem.OffsetIsReg && "Invalid access!");
+ return Mem.OffsetRegShifted;
+ }
+ const MCExpr *getMemShiftAmount() const {
+ assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
+ return Mem.ShiftAmount;
+ }
+ enum ShiftType getMemShiftType() const {
+ assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
+ return Mem.ShiftType;
+ }
+ bool getMemPreindexed() const { return Mem.Preindexed; }
+ bool getMemPostindexed() const { return Mem.Postindexed; }
+ bool getMemOffsetIsReg() const { return Mem.OffsetIsReg; }
+ bool getMemNegative() const { return Mem.Negative; }
+ bool getMemWriteback() const { return Mem.Writeback; }
+
+ /// @}
+ bool isCoprocNum() const { return Kind == CoprocNum; }
+ bool isCoprocReg() const { return Kind == CoprocReg; }
+ bool isCondCode() const { return Kind == CondCode; }
+ bool isCCOut() const { return Kind == CCOut; }
+ bool isImm() const { return Kind == Immediate; }
bool isReg() const { return Kind == Register; }
+ bool isRegList() const { return Kind == RegisterList; }
+ bool isDPRRegList() const { return Kind == DPRRegisterList; }
+ bool isSPRRegList() const { return Kind == SPRRegisterList; }
+ bool isToken() const { return Kind == Token; }
+ bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
+ bool isMemory() const { return Kind == Memory; }
+ bool isMemMode5() const {
+ if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() ||
+ getMemNegative())
+ return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!CE) return false;
+
+ // The offset must be a multiple of 4 in the range 0-1020.
+ int64_t Value = CE->getValue();
+ return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
+ }
+ bool isMemModeRegThumb() const {
+ if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback())
+ return false;
+ return true;
+ }
+ bool isMemModeImmThumb() const {
+ if (!isMemory() || getMemOffsetIsReg() || getMemWriteback())
+ return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!CE) return false;
- bool isToken() const {return Kind == Token; }
+ // The offset must be a multiple of 4 in the range 0-124.
+ uint64_t Value = CE->getValue();
+ return ((Value & 0x3) == 0 && Value <= 124);
+ }
+ bool isMSRMask() const { return Kind == MSRMask; }
+ bool isProcIFlags() const { return Kind == ProcIFlags; }
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
- // Add as immediates when possible.
- if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (Expr == 0)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
else
Inst.addOperand(MCOperand::CreateExpr(Expr));
@@ -237,8 +378,23 @@ public:
void addCondCodeOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
- // FIXME: What belongs here?
- Inst.addOperand(MCOperand::CreateReg(0));
+ unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR;
+ Inst.addOperand(MCOperand::CreateReg(RegNum));
+ }
+
+ void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+ }
+
+ void addCoprocRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+ }
+
+ void addCCOutOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
}
void addRegOperands(MCInst &Inst, unsigned N) const {
@@ -246,66 +402,181 @@ public:
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
+ void addRegListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ++I)
+ Inst.addOperand(MCOperand::CreateReg(*I));
+ }
+
+ void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
+ void addSPRRegListOperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
+ void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
+ }
+
+ void addMemMode5Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && isMemMode5() && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ assert(!getMemOffsetIsReg() && "Invalid mode 5 operand");
+
+ // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+ // the difference?
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ assert(CE && "Non-constant mode 5 offset operand!");
+
+ // The MCInst offset operand doesn't include the low two bits (like
+ // the instruction encoding).
+ int64_t Offset = CE->getValue() / 4;
+ if (Offset >= 0)
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add,
+ Offset)));
+ else
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub,
+ -Offset)));
+ }
+
+ void addMemModeRegThumbOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && isMemModeRegThumb() && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+ }
+
+ void addMemModeImmThumbOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && isMemModeImmThumb() && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ assert(CE && "Non-constant mode offset operand!");
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ }
+
+ void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask())));
+ }
+
+ void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
+ }
+
virtual void dump(raw_ostream &OS) const;
- static void CreateCondCode(OwningPtr<ARMOperand> &Op, ARMCC::CondCodes CC,
- SMLoc S) {
- Op.reset(new ARMOperand);
- Op->Kind = CondCode;
+ static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CondCode);
Op->CC.Val = CC;
Op->StartLoc = S;
Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CoprocNum);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
}
- static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str,
- SMLoc S) {
- Op.reset(new ARMOperand);
- Op->Kind = Token;
+ static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CoprocReg);
+ Op->Cop.Val = CopVal;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(CCOut);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
Op->EndLoc = S;
+ return Op;
}
- static void CreateReg(OwningPtr<ARMOperand> &Op, unsigned RegNum,
- bool Writeback, SMLoc S, SMLoc E) {
- Op.reset(new ARMOperand);
- Op->Kind = Register;
+ static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(Register);
Op->Reg.RegNum = RegNum;
- Op->Reg.Writeback = Writeback;
-
Op->StartLoc = S;
Op->EndLoc = E;
+ return Op;
}
- static void CreateImm(OwningPtr<ARMOperand> &Op, const MCExpr *Val,
- SMLoc S, SMLoc E) {
- Op.reset(new ARMOperand);
- Op->Kind = Immediate;
+ static ARMOperand *
+ CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
+ SMLoc StartLoc, SMLoc EndLoc) {
+ KindTy Kind = RegisterList;
+
+ if (ARM::DPRRegClass.contains(Regs.front().first))
+ Kind = DPRRegisterList;
+ else if (ARM::SPRRegClass.contains(Regs.front().first))
+ Kind = SPRRegisterList;
+
+ ARMOperand *Op = new ARMOperand(Kind);
+ for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+ I = Regs.begin(), E = Regs.end(); I != E; ++I)
+ Op->Registers.push_back(I->first);
+ array_pod_sort(Op->Registers.begin(), Op->Registers.end());
+ Op->StartLoc = StartLoc;
+ Op->EndLoc = EndLoc;
+ return Op;
+ }
+
+ static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(Immediate);
Op->Imm.Val = Val;
-
Op->StartLoc = S;
Op->EndLoc = E;
+ return Op;
}
- static void CreateMem(OwningPtr<ARMOperand> &Op,
- unsigned BaseRegNum, bool OffsetIsReg,
- const MCExpr *Offset, unsigned OffsetRegNum,
- bool OffsetRegShifted, enum ShiftType ShiftType,
- const MCExpr *ShiftAmount, bool Preindexed,
- bool Postindexed, bool Negative, bool Writeback,
- SMLoc S, SMLoc E) {
- Op.reset(new ARMOperand);
- Op->Kind = Memory;
+ static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
+ const MCExpr *Offset, int OffsetRegNum,
+ bool OffsetRegShifted, enum ShiftType ShiftType,
+ const MCExpr *ShiftAmount, bool Preindexed,
+ bool Postindexed, bool Negative, bool Writeback,
+ SMLoc S, SMLoc E) {
+ assert((OffsetRegNum == -1 || OffsetIsReg) &&
+ "OffsetRegNum must imply OffsetIsReg!");
+ assert((!OffsetRegShifted || OffsetIsReg) &&
+ "OffsetRegShifted must imply OffsetIsReg!");
+ assert((Offset || OffsetIsReg) &&
+ "Offset must exists unless register offset is used!");
+ assert((!ShiftAmount || (OffsetIsReg && OffsetRegShifted)) &&
+ "Cannot have shift amount without shifted register offset!");
+ assert((!Offset || !OffsetIsReg) &&
+ "Cannot have expression offset and register offset!");
+
+ ARMOperand *Op = new ARMOperand(Memory);
Op->Mem.BaseRegNum = BaseRegNum;
Op->Mem.OffsetIsReg = OffsetIsReg;
- Op->Mem.Offset = Offset;
- Op->Mem.OffsetRegNum = OffsetRegNum;
+ if (OffsetIsReg)
+ Op->Mem.Offset.RegNum = OffsetRegNum;
+ else
+ Op->Mem.Offset.Value = Offset;
Op->Mem.OffsetRegShifted = OffsetRegShifted;
Op->Mem.ShiftType = ShiftType;
Op->Mem.ShiftAmount = ShiftAmount;
@@ -313,9 +584,34 @@ public:
Op->Mem.Postindexed = Postindexed;
Op->Mem.Negative = Negative;
Op->Mem.Writeback = Writeback;
-
+
Op->StartLoc = S;
Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(MemBarrierOpt);
+ Op->MBOpt.Val = Opt;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(ProcIFlags);
+ Op->IFlags.Val = IFlags;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
+ ARMOperand *Op = new ARMOperand(MSRMask);
+ Op->MMask.Val = MMask;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
}
};
@@ -324,17 +620,77 @@ public:
void ARMOperand::dump(raw_ostream &OS) const {
switch (Kind) {
case CondCode:
- OS << ARMCondCodeToString(getCondCode());
+ OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
+ break;
+ case CCOut:
+ OS << "<ccout " << getReg() << ">";
+ break;
+ case CoprocNum:
+ OS << "<coprocessor number: " << getCoproc() << ">";
+ break;
+ case CoprocReg:
+ OS << "<coprocessor register: " << getCoproc() << ">";
+ break;
+ case MSRMask:
+ OS << "<mask: " << getMSRMask() << ">";
break;
case Immediate:
getImm()->print(OS);
break;
+ case MemBarrierOpt:
+ OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
+ break;
case Memory:
- OS << "<memory>";
+ OS << "<memory "
+ << "base:" << getMemBaseRegNum();
+ if (getMemOffsetIsReg()) {
+ OS << " offset:<register " << getMemOffsetRegNum();
+ if (getMemOffsetRegShifted()) {
+ OS << " offset-shift-type:" << getMemShiftType();
+ OS << " offset-shift-amount:" << *getMemShiftAmount();
+ }
+ } else {
+ OS << " offset:" << *getMemOffset();
+ }
+ if (getMemOffsetIsReg())
+ OS << " (offset-is-reg)";
+ if (getMemPreindexed())
+ OS << " (pre-indexed)";
+ if (getMemPostindexed())
+ OS << " (post-indexed)";
+ if (getMemNegative())
+ OS << " (negative)";
+ if (getMemWriteback())
+ OS << " (writeback)";
+ OS << ">";
+ break;
+ case ProcIFlags: {
+ OS << "<ARM_PROC::";
+ unsigned IFlags = getProcIFlags();
+ for (int i=2; i >= 0; --i)
+ if (IFlags & (1 << i))
+ OS << ARM_PROC::IFlagsToString(1 << i);
+ OS << ">";
break;
+ }
case Register:
OS << "<register " << getReg() << ">";
break;
+ case RegisterList:
+ case DPRRegisterList:
+ case SPRRegisterList: {
+ OS << "<register_list ";
+
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ) {
+ OS << *I;
+ if (++I < E) OS << ", ";
+ }
+
+ OS << ">";
+ break;
+ }
case Token:
OS << "'" << getToken() << "'";
break;
@@ -348,184 +704,456 @@ static unsigned MatchRegisterName(StringRef Name);
/// }
+bool ARMAsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ RegNo = TryParseRegister();
+
+ return (RegNo == (unsigned)-1);
+}
+
/// Try to parse a register name. The token must be an Identifier when called,
-/// and if it is a register name a Reg operand is created, the token is eaten
-/// and false is returned. Else true is returned and no token is eaten.
-/// TODO this is likely to change to allow different register types and or to
-/// parse for a specific register type.
-bool ARMAsmParser::MaybeParseRegister
- (OwningPtr<ARMOperand> &Op, bool ParseWriteBack) {
- SMLoc S, E;
+/// and if it is a register name the token is eaten and the register number is
+/// returned. Otherwise return -1.
+///
+int ARMAsmParser::TryParseRegister() {
const AsmToken &Tok = Parser.getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
// FIXME: Validate register for the current architecture; we have to do
// validation later, so maybe there is no need for this here.
- int RegNum;
+ std::string upperCase = Tok.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ unsigned RegNum = MatchRegisterName(lowerCase);
+ if (!RegNum) {
+ RegNum = StringSwitch<unsigned>(lowerCase)
+ .Case("r13", ARM::SP)
+ .Case("r14", ARM::LR)
+ .Case("r15", ARM::PC)
+ .Case("ip", ARM::R12)
+ .Default(0);
+ }
+ if (!RegNum) return -1;
- RegNum = MatchRegisterName(Tok.getString());
- if (RegNum == -1)
- return true;
-
- S = Tok.getLoc();
-
Parser.Lex(); // Eat identifier token.
-
- E = Parser.getTok().getLoc();
+ return RegNum;
+}
- bool Writeback = false;
- if (ParseWriteBack) {
- const AsmToken &ExclaimTok = Parser.getTok();
- if (ExclaimTok.is(AsmToken::Exclaim)) {
- E = ExclaimTok.getLoc();
- Writeback = true;
- Parser.Lex(); // Eat exclaim token
+/// Try to parse a register name. The token must be an Identifier when called.
+/// If it's a register, an AsmOperand is created. Another AsmOperand is created
+/// if there is a "writeback". 'true' if it's not a register.
+///
+/// TODO this is likely to change to allow different register types and or to
+/// parse for a specific register type.
+bool ARMAsmParser::
+TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ int RegNo = TryParseRegister();
+ if (RegNo == -1)
+ return true;
+
+ Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
+
+ const AsmToken &ExclaimTok = Parser.getTok();
+ if (ExclaimTok.is(AsmToken::Exclaim)) {
+ Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(),
+ ExclaimTok.getLoc()));
+ Parser.Lex(); // Eat exclaim token
+ }
+
+ return false;
+}
+
+/// MatchCoprocessorOperandName - Try to parse an coprocessor related
+/// instruction with a symbolic operand name. Example: "p1", "p7", "c3",
+/// "c5", ...
+static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
+ // Use the same layout as the tablegen'erated register name matcher. Ugly,
+ // but efficient.
+ switch (Name.size()) {
+ default: break;
+ case 2:
+ if (Name[0] != CoprocOp)
+ return -1;
+ switch (Name[1]) {
+ default: return -1;
+ case '0': return 0;
+ case '1': return 1;
+ case '2': return 2;
+ case '3': return 3;
+ case '4': return 4;
+ case '5': return 5;
+ case '6': return 6;
+ case '7': return 7;
+ case '8': return 8;
+ case '9': return 9;
+ }
+ break;
+ case 3:
+ if (Name[0] != CoprocOp || Name[1] != '1')
+ return -1;
+ switch (Name[2]) {
+ default: return -1;
+ case '0': return 10;
+ case '1': return 11;
+ case '2': return 12;
+ case '3': return 13;
+ case '4': return 14;
+ case '5': return 15;
}
+ break;
}
- ARMOperand::CreateReg(Op, RegNum, Writeback, S, E);
+ return -1;
+}
- return false;
+/// tryParseCoprocNumOperand - Try to parse an coprocessor number operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
+ if (Num == -1)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
+ return MatchOperand_Success;
}
-/// Parse a register list, return false if successful else return true or an
-/// error. The first token must be a '{' when called.
-bool ARMAsmParser::ParseRegisterList(OwningPtr<ARMOperand> &Op) {
- SMLoc S, E;
- assert(Parser.getTok().is(AsmToken::LCurly) &&
- "Token is not an Left Curly Brace");
- S = Parser.getTok().getLoc();
- Parser.Lex(); // Eat left curly brace token.
-
- const AsmToken &RegTok = Parser.getTok();
- SMLoc RegLoc = RegTok.getLoc();
- if (RegTok.isNot(AsmToken::Identifier))
- return Error(RegLoc, "register expected");
- int RegNum = MatchRegisterName(RegTok.getString());
- if (RegNum == -1)
- return Error(RegLoc, "register expected");
+/// tryParseCoprocRegOperand - Try to parse an coprocessor register operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+
Parser.Lex(); // Eat identifier token.
- unsigned RegList = 1 << RegNum;
+ Operands.push_back(ARMOperand::CreateCoprocReg(Reg, S));
+ return MatchOperand_Success;
+}
- int HighRegNum = RegNum;
- // TODO ranges like "{Rn-Rm}"
- while (Parser.getTok().is(AsmToken::Comma)) {
- Parser.Lex(); // Eat comma token.
+/// Parse a register list, return it if successful else return null. The first
+/// token must be a '{' when called.
+bool ARMAsmParser::
+ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ assert(Parser.getTok().is(AsmToken::LCurly) &&
+ "Token is not a Left Curly Brace");
+ SMLoc S = Parser.getTok().getLoc();
+
+ // Read the rest of the registers in the list.
+ unsigned PrevRegNum = 0;
+ SmallVector<std::pair<unsigned, SMLoc>, 32> Registers;
+
+ do {
+ bool IsRange = Parser.getTok().is(AsmToken::Minus);
+ Parser.Lex(); // Eat non-identifier token.
const AsmToken &RegTok = Parser.getTok();
SMLoc RegLoc = RegTok.getLoc();
- if (RegTok.isNot(AsmToken::Identifier))
- return Error(RegLoc, "register expected");
- int RegNum = MatchRegisterName(RegTok.getString());
- if (RegNum == -1)
- return Error(RegLoc, "register expected");
+ if (RegTok.isNot(AsmToken::Identifier)) {
+ Error(RegLoc, "register expected");
+ return true;
+ }
- if (RegList & (1 << RegNum))
- Warning(RegLoc, "register duplicated in register list");
- else if (RegNum <= HighRegNum)
- Warning(RegLoc, "register not in ascending order in register list");
- RegList |= 1 << RegNum;
- HighRegNum = RegNum;
+ int RegNum = TryParseRegister();
+ if (RegNum == -1) {
+ Error(RegLoc, "register expected");
+ return true;
+ }
- Parser.Lex(); // Eat identifier token.
- }
+ if (IsRange) {
+ int Reg = PrevRegNum;
+ do {
+ ++Reg;
+ Registers.push_back(std::make_pair(Reg, RegLoc));
+ } while (Reg != RegNum);
+ } else {
+ Registers.push_back(std::make_pair(RegNum, RegLoc));
+ }
+
+ PrevRegNum = RegNum;
+ } while (Parser.getTok().is(AsmToken::Comma) ||
+ Parser.getTok().is(AsmToken::Minus));
+
+ // Process the right curly brace of the list.
const AsmToken &RCurlyTok = Parser.getTok();
- if (RCurlyTok.isNot(AsmToken::RCurly))
- return Error(RCurlyTok.getLoc(), "'}' expected");
- E = RCurlyTok.getLoc();
- Parser.Lex(); // Eat left curly brace token.
+ if (RCurlyTok.isNot(AsmToken::RCurly)) {
+ Error(RCurlyTok.getLoc(), "'}' expected");
+ return true;
+ }
+
+ SMLoc E = RCurlyTok.getLoc();
+ Parser.Lex(); // Eat right curly brace token.
+
+ // Verify the register list.
+ SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+ RI = Registers.begin(), RE = Registers.end();
+
+ unsigned HighRegNum = getARMRegisterNumbering(RI->first);
+ bool EmittedWarning = false;
+
+ DenseMap<unsigned, bool> RegMap;
+ RegMap[HighRegNum] = true;
+ for (++RI; RI != RE; ++RI) {
+ const std::pair<unsigned, SMLoc> &RegInfo = *RI;
+ unsigned Reg = getARMRegisterNumbering(RegInfo.first);
+
+ if (RegMap[Reg]) {
+ Error(RegInfo.second, "register duplicated in register list");
+ return true;
+ }
+
+ if (!EmittedWarning && Reg < HighRegNum)
+ Warning(RegInfo.second,
+ "register not in ascending order in register list");
+
+ RegMap[Reg] = true;
+ HighRegNum = std::max(Reg, HighRegNum);
+ }
+
+ Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
return false;
}
-/// Parse an arm memory expression, return false if successful else return true
+/// tryParseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef OptStr = Tok.getString();
+
+ unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()))
+ .Case("sy", ARM_MB::SY)
+ .Case("st", ARM_MB::ST)
+ .Case("ish", ARM_MB::ISH)
+ .Case("ishst", ARM_MB::ISHST)
+ .Case("nsh", ARM_MB::NSH)
+ .Case("nshst", ARM_MB::NSHST)
+ .Case("osh", ARM_MB::OSH)
+ .Case("oshst", ARM_MB::OSHST)
+ .Default(~0U);
+
+ if (Opt == ~0U)
+ return MatchOperand_NoMatch;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
+ return MatchOperand_Success;
+}
+
+/// tryParseProcIFlagsOperand - Try to parse iflags from CPS instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef IFlagsStr = Tok.getString();
+
+ unsigned IFlags = 0;
+ for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
+ .Case("a", ARM_PROC::A)
+ .Case("i", ARM_PROC::I)
+ .Case("f", ARM_PROC::F)
+ .Default(~0U);
+
+ // If some specific iflag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (Flag == ~0U || (IFlags & Flag))
+ return MatchOperand_NoMatch;
+
+ IFlags |= Flag;
+ }
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateProcIFlags((ARM_PROC::IFlags)IFlags, S));
+ return MatchOperand_Success;
+}
+
+/// tryParseMSRMaskOperand - Try to parse mask flags from MSR instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+ StringRef Mask = Tok.getString();
+
+ // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
+ size_t Start = 0, Next = Mask.find('_');
+ StringRef Flags = "";
+ StringRef SpecReg = Mask.slice(Start, Next);
+ if (Next != StringRef::npos)
+ Flags = Mask.slice(Next+1, Mask.size());
+
+ // FlagsVal contains the complete mask:
+ // 3-0: Mask
+ // 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+ unsigned FlagsVal = 0;
+
+ if (SpecReg == "apsr") {
+ FlagsVal = StringSwitch<unsigned>(Flags)
+ .Case("nzcvq", 0x8) // same as CPSR_c
+ .Case("g", 0x4) // same as CPSR_s
+ .Case("nzcvqg", 0xc) // same as CPSR_fs
+ .Default(~0U);
+
+ if (FlagsVal == ~0U) {
+ if (!Flags.empty())
+ return MatchOperand_NoMatch;
+ else
+ FlagsVal = 0; // No flag
+ }
+ } else if (SpecReg == "cpsr" || SpecReg == "spsr") {
+ for (int i = 0, e = Flags.size(); i != e; ++i) {
+ unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1))
+ .Case("c", 1)
+ .Case("x", 2)
+ .Case("s", 4)
+ .Case("f", 8)
+ .Default(~0U);
+
+ // If some specific flag is already set, it means that some letter is
+ // present more than once, this is not acceptable.
+ if (FlagsVal == ~0U || (FlagsVal & Flag))
+ return MatchOperand_NoMatch;
+ FlagsVal |= Flag;
+ }
+ } else // No match for special register.
+ return MatchOperand_NoMatch;
+
+ // Special register without flags are equivalent to "fc" flags.
+ if (!FlagsVal)
+ FlagsVal = 0x9;
+
+ // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+ if (SpecReg == "spsr")
+ FlagsVal |= 16;
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+ return MatchOperand_Success;
+}
+
+/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
+///
/// TODO Only preindexing and postindexing addressing are started, unindexed
/// with option, etc are still to do.
-bool ARMAsmParser::ParseMemory(OwningPtr<ARMOperand> &Op) {
+bool ARMAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S, E;
assert(Parser.getTok().is(AsmToken::LBrac) &&
- "Token is not an Left Bracket");
+ "Token is not a Left Bracket");
S = Parser.getTok().getLoc();
Parser.Lex(); // Eat left bracket token.
const AsmToken &BaseRegTok = Parser.getTok();
- if (BaseRegTok.isNot(AsmToken::Identifier))
- return Error(BaseRegTok.getLoc(), "register expected");
- if (MaybeParseRegister(Op, false))
- return Error(BaseRegTok.getLoc(), "register expected");
- int BaseRegNum = Op->getReg();
+ if (BaseRegTok.isNot(AsmToken::Identifier)) {
+ Error(BaseRegTok.getLoc(), "register expected");
+ return true;
+ }
+ int BaseRegNum = TryParseRegister();
+ if (BaseRegNum == -1) {
+ Error(BaseRegTok.getLoc(), "register expected");
+ return true;
+ }
+
+ // The next token must either be a comma or a closing bracket.
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac))
+ return true;
bool Preindexed = false;
bool Postindexed = false;
bool OffsetIsReg = false;
bool Negative = false;
bool Writeback = false;
+ ARMOperand *WBOp = 0;
+ int OffsetRegNum = -1;
+ bool OffsetRegShifted = false;
+ enum ShiftType ShiftType = Lsl;
+ const MCExpr *ShiftAmount = 0;
+ const MCExpr *Offset = 0;
// First look for preindexed address forms, that is after the "[Rn" we now
// have to see if the next token is a comma.
- const AsmToken &Tok = Parser.getTok();
if (Tok.is(AsmToken::Comma)) {
Preindexed = true;
Parser.Lex(); // Eat comma token.
- int OffsetRegNum;
- bool OffsetRegShifted;
- enum ShiftType ShiftType;
- const MCExpr *ShiftAmount;
- const MCExpr *Offset;
- if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
- Offset, OffsetIsReg, OffsetRegNum, E))
+
+ if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
+ Offset, OffsetIsReg, OffsetRegNum, E))
return true;
const AsmToken &RBracTok = Parser.getTok();
- if (RBracTok.isNot(AsmToken::RBrac))
- return Error(RBracTok.getLoc(), "']' expected");
+ if (RBracTok.isNot(AsmToken::RBrac)) {
+ Error(RBracTok.getLoc(), "']' expected");
+ return true;
+ }
E = RBracTok.getLoc();
Parser.Lex(); // Eat right bracket token.
const AsmToken &ExclaimTok = Parser.getTok();
if (ExclaimTok.is(AsmToken::Exclaim)) {
- E = ExclaimTok.getLoc();
+ WBOp = ARMOperand::CreateToken(ExclaimTok.getString(),
+ ExclaimTok.getLoc());
Writeback = true;
Parser.Lex(); // Eat exclaim token
}
- ARMOperand::CreateMem(Op, BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
- OffsetRegShifted, ShiftType, ShiftAmount,
- Preindexed, Postindexed, Negative, Writeback, S, E);
- return false;
- }
- // The "[Rn" we have so far was not followed by a comma.
- else if (Tok.is(AsmToken::RBrac)) {
- // This is a post indexing addressing forms, that is a ']' follows after
- // the "[Rn".
- Postindexed = true;
- Writeback = true;
+ } else {
+ // The "[Rn" we have so far was not followed by a comma.
+
+ // If there's anything other than the right brace, this is a post indexing
+ // addressing form.
E = Tok.getLoc();
Parser.Lex(); // Eat right bracket token.
- int OffsetRegNum = 0;
- bool OffsetRegShifted = false;
- enum ShiftType ShiftType;
- const MCExpr *ShiftAmount;
- const MCExpr *Offset;
-
const AsmToken &NextTok = Parser.getTok();
+
if (NextTok.isNot(AsmToken::EndOfStatement)) {
- if (NextTok.isNot(AsmToken::Comma))
- return Error(NextTok.getLoc(), "',' expected");
+ Postindexed = true;
+ Writeback = true;
+
+ if (NextTok.isNot(AsmToken::Comma)) {
+ Error(NextTok.getLoc(), "',' expected");
+ return true;
+ }
+
Parser.Lex(); // Eat comma token.
- if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
- ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
- E))
+
+ if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
+ ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
+ E))
return true;
}
+ }
- ARMOperand::CreateMem(Op, BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
- OffsetRegShifted, ShiftType, ShiftAmount,
- Preindexed, Postindexed, Negative, Writeback, S, E);
- return false;
+ // Force Offset to exist if used.
+ if (!OffsetIsReg) {
+ if (!Offset)
+ Offset = MCConstantExpr::Create(0, getContext());
}
- return true;
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset,
+ OffsetRegNum, OffsetRegShifted,
+ ShiftType, ShiftAmount, Preindexed,
+ Postindexed, Negative, Writeback,
+ S, E));
+ if (WBOp)
+ Operands.push_back(WBOp);
+
+ return false;
}
/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
@@ -543,7 +1171,6 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
bool &OffsetIsReg,
int &OffsetRegNum,
SMLoc &E) {
- OwningPtr<ARMOperand> Op;
Negative = false;
OffsetRegShifted = false;
OffsetIsReg = false;
@@ -559,13 +1186,15 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
// See if there is a register following the "[Rn," or "[Rn]," we have so far.
const AsmToken &OffsetRegTok = Parser.getTok();
if (OffsetRegTok.is(AsmToken::Identifier)) {
- OffsetIsReg = !MaybeParseRegister(Op, false);
- if (OffsetIsReg) {
- E = Op->getEndLoc();
- OffsetRegNum = Op->getReg();
+ SMLoc CurLoc = OffsetRegTok.getLoc();
+ OffsetRegNum = TryParseRegister();
+ if (OffsetRegNum != -1) {
+ OffsetIsReg = true;
+ E = CurLoc;
}
}
- // If we parsed a register as the offset then their can be a shift after that
+
+ // If we parsed a register as the offset then there can be a shift after that.
if (OffsetRegNum != -1) {
// Look for a comma then a shift
const AsmToken &Tok = Parser.getTok();
@@ -583,7 +1212,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
const AsmToken &HashTok = Parser.getTok();
if (HashTok.isNot(AsmToken::Hash))
return Error(HashTok.getLoc(), "'#' expected");
-
+
Parser.Lex(); // Eat hash token.
if (getParser().ParseExpression(Offset))
@@ -597,8 +1226,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
/// ( lsl | lsr | asr | ror ) , # shift_amount
/// rrx
/// and returns true if it parses a shift otherwise it returns false.
-bool ARMAsmParser::ParseShift(ShiftType &St,
- const MCExpr *&ShiftAmount,
+bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount,
SMLoc &E) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -636,13 +1264,33 @@ bool ARMAsmParser::ParseShift(ShiftType &St,
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
-bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
+bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic) {
SMLoc S, E;
-
+
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach.
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ if (ResTy == MatchOperand_Success)
+ return false;
+ // If there wasn't a custom match, try the generic matcher below. Otherwise,
+ // there was a match, but an error occurred, in which case, just return that
+ // the operand parsing failed.
+ if (ResTy == MatchOperand_ParseFail)
+ return true;
+
switch (getLexer().getKind()) {
+ default:
+ Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return true;
case AsmToken::Identifier:
- if (!MaybeParseRegister(Op, true))
+ if (!TryParseRegisterWithWriteBack(Operands))
return false;
+
+ // Fall though for the Identifier case that is not a register or a
+ // special name.
+ case AsmToken::Integer: // things like 1f and 2b as a branch targets
+ case AsmToken::Dot: { // . as a branch target
// This was not a register so parse other operands that start with an
// identifier (like labels) as expressions and create them as immediates.
const MCExpr *IdVal;
@@ -650,12 +1298,13 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
if (getParser().ParseExpression(IdVal))
return true;
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- ARMOperand::CreateImm(Op, IdVal, S, E);
+ Operands.push_back(ARMOperand::CreateImm(IdVal, S, E));
return false;
+ }
case AsmToken::LBrac:
- return ParseMemory(Op);
+ return ParseMemory(Operands);
case AsmToken::LCurly:
- return ParseRegisterList(Op);
+ return ParseRegisterList(Operands);
case AsmToken::Hash:
// #42 -> immediate.
// TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
@@ -665,28 +1314,134 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
if (getParser().ParseExpression(ImmVal))
return true;
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- ARMOperand::CreateImm(Op, ImmVal, S, E);
+ Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
return false;
- default:
- return Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ case AsmToken::Colon: {
+ // ":lower16:" and ":upper16:" expression prefixes
+ // FIXME: Check it's an expression prefix,
+ // e.g. (FOO - :lower16:BAR) isn't legal.
+ ARMMCExpr::VariantKind RefKind;
+ if (ParsePrefix(RefKind))
+ return true;
+
+ const MCExpr *SubExprVal;
+ if (getParser().ParseExpression(SubExprVal))
+ return true;
+
+ const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
+ getContext());
+ E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
+ return false;
+ }
}
}
-/// Parse an arm instruction mnemonic followed by its operands.
-bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- OwningPtr<ARMOperand> Op;
+// ParsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
+// :lower16: and :upper16:.
+bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) {
+ RefKind = ARMMCExpr::VK_ARM_None;
- // Create the leading tokens for the mnemonic, split by '.' characters.
- size_t Start = 0, Next = Name.find('.');
- StringRef Head = Name.slice(Start, Next);
+ // :lower16: and :upper16: modifiers
+ assert(getLexer().is(AsmToken::Colon) && "expected a :");
+ Parser.Lex(); // Eat ':'
+
+ if (getLexer().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "expected prefix identifier in operand");
+ return true;
+ }
+
+ StringRef IDVal = Parser.getTok().getIdentifier();
+ if (IDVal == "lower16") {
+ RefKind = ARMMCExpr::VK_ARM_LO16;
+ } else if (IDVal == "upper16") {
+ RefKind = ARMMCExpr::VK_ARM_HI16;
+ } else {
+ Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
+ return true;
+ }
+ Parser.Lex();
+
+ if (getLexer().isNot(AsmToken::Colon)) {
+ Error(Parser.getTok().getLoc(), "unexpected token after prefix");
+ return true;
+ }
+ Parser.Lex(); // Eat the last ':'
+ return false;
+}
+
+const MCExpr *
+ARMAsmParser::ApplyPrefixToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant) {
+ // Recurse over the given expression, rebuilding it to apply the given variant
+ // to the leftmost symbol.
+ if (Variant == MCSymbolRefExpr::VK_None)
+ return E;
+
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle target expr yet");
+ case MCExpr::Constant:
+ llvm_unreachable("Can't handle lower16/upper16 of constant yet");
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
- // Determine the predicate, if any.
+ if (SRE->getKind() != MCSymbolRefExpr::VK_None)
+ return 0;
+
+ return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+ }
+
+ case MCExpr::Unary:
+ llvm_unreachable("Can't handle unary expressions yet");
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ const MCExpr *LHS = ApplyPrefixToExpr(BE->getLHS(), Variant);
+ const MCExpr *RHS = BE->getRHS();
+ if (!LHS)
+ return 0;
+
+ return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+ }
+ }
+
+ assert(0 && "Invalid expression kind!");
+ return 0;
+}
+
+/// \brief Given a mnemonic, split out possible predication code and carry
+/// setting letters to form a canonical mnemonic and flags.
+//
+// FIXME: Would be nice to autogen this.
+static StringRef SplitMnemonic(StringRef Mnemonic,
+ unsigned &PredicationCode,
+ bool &CarrySetting,
+ unsigned &ProcessorIMod) {
+ PredicationCode = ARMCC::AL;
+ CarrySetting = false;
+ ProcessorIMod = 0;
+
+ // Ignore some mnemonics we know aren't predicated forms.
//
- // FIXME: We need a way to check whether a prefix supports predication,
- // otherwise we will end up with an ambiguity for instructions that happen to
- // end with a predicate name.
- unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
+ // FIXME: Would be nice to autogen this.
+ if (Mnemonic == "teq" || Mnemonic == "vceq" ||
+ Mnemonic == "movs" ||
+ Mnemonic == "svc" ||
+ (Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
+ Mnemonic == "vmls" || Mnemonic == "vnmls") ||
+ Mnemonic == "vacge" || Mnemonic == "vcge" ||
+ Mnemonic == "vclt" ||
+ Mnemonic == "vacgt" || Mnemonic == "vcgt" ||
+ Mnemonic == "vcle" ||
+ (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" ||
+ Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" ||
+ Mnemonic == "vqdmlal"))
+ return Mnemonic;
+
+ // First, split out any predication code.
+ unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
.Case("eq", ARMCC::EQ)
.Case("ne", ARMCC::NE)
.Case("hs", ARMCC::HS)
@@ -704,44 +1459,268 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
.Case("al", ARMCC::AL)
.Default(~0U);
if (CC != ~0U) {
- Head = Head.slice(0, Head.size() - 2);
- } else
- CC = ARMCC::AL;
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
+ PredicationCode = CC;
+ }
+
+ // Next, determine if we have a carry setting bit. We explicitly ignore all
+ // the instructions we know end in 's'.
+ if (Mnemonic.endswith("s") &&
+ !(Mnemonic == "asrs" || Mnemonic == "cps" || Mnemonic == "mls" ||
+ Mnemonic == "movs" || Mnemonic == "mrs" || Mnemonic == "smmls" ||
+ Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" ||
+ Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" ||
+ Mnemonic == "vrecps" || Mnemonic == "vrsqrts")) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
+ CarrySetting = true;
+ }
+
+ // The "cps" instruction can have a interrupt mode operand which is glued into
+ // the mnemonic. Check if this is the case, split it and parse the imod op
+ if (Mnemonic.startswith("cps")) {
+ // Split out any imod code.
+ unsigned IMod =
+ StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2, 2))
+ .Case("ie", ARM_PROC::IE)
+ .Case("id", ARM_PROC::ID)
+ .Default(~0U);
+ if (IMod != ~0U) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size()-2);
+ ProcessorIMod = IMod;
+ }
+ }
+
+ return Mnemonic;
+}
+
+/// \brief Given a canonical mnemonic, determine if the instruction ever allows
+/// inclusion of carry set or predication code operands.
+//
+// FIXME: It would be nice to autogen this.
+void ARMAsmParser::
+GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode) {
+ bool isThumb = TM.getSubtarget<ARMSubtarget>().isThumb();
+
+ if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+ Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
+ Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" ||
+ Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
+ Mnemonic == "umlal" || Mnemonic == "orr" || Mnemonic == "mov" ||
+ Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
+ Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" ||
+ Mnemonic == "eor" || Mnemonic == "smlal" || Mnemonic == "mvn") {
+ CanAcceptCarrySet = true;
+ } else {
+ CanAcceptCarrySet = false;
+ }
+
+ if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" ||
+ Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" ||
+ Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" ||
+ Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" ||
+ Mnemonic == "dsb" || Mnemonic == "movs" || Mnemonic == "isb" ||
+ Mnemonic == "clrex" || Mnemonic.startswith("cps")) {
+ CanAcceptPredicationCode = false;
+ } else {
+ CanAcceptPredicationCode = true;
+ }
- ARMOperand::CreateToken(Op, Head, NameLoc);
- Operands.push_back(Op.take());
+ if (isThumb)
+ if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
+ Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
+ CanAcceptPredicationCode = false;
+}
+
+/// Parse an arm instruction mnemonic followed by its operands.
+bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Head = Name.slice(Start, Next);
+
+ // Split out the predication code and carry setting flag from the mnemonic.
+ unsigned PredicationCode;
+ unsigned ProcessorIMod;
+ bool CarrySetting;
+ Head = SplitMnemonic(Head, PredicationCode, CarrySetting,
+ ProcessorIMod);
+
+ Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
+
+ // Next, add the CCOut and ConditionCode operands, if needed.
+ //
+ // For mnemonics which can ever incorporate a carry setting bit or predication
+ // code, our matching model involves us always generating CCOut and
+ // ConditionCode operands to match the mnemonic "as written" and then we let
+ // the matcher deal with finding the right instruction or generating an
+ // appropriate error.
+ bool CanAcceptCarrySet, CanAcceptPredicationCode;
+ GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode);
+
+ // Add the carry setting operand, if necessary.
+ //
+ // FIXME: It would be awesome if we could somehow invent a location such that
+ // match errors on this operand would print a nice diagnostic about how the
+ // 's' character in the mnemonic resulted in a CCOut operand.
+ if (CanAcceptCarrySet) {
+ Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
+ NameLoc));
+ } else {
+ // This mnemonic can't ever accept a carry set, but the user wrote one (or
+ // misspelled another mnemonic).
+
+ // FIXME: Issue a nice error.
+ }
+
+ // Add the predication code operand, if necessary.
+ if (CanAcceptPredicationCode) {
+ Operands.push_back(ARMOperand::CreateCondCode(
+ ARMCC::CondCodes(PredicationCode), NameLoc));
+ } else {
+ // This mnemonic can't ever accept a predication code, but the user wrote
+ // one (or misspelled another mnemonic).
+
+ // FIXME: Issue a nice error.
+ }
+
+ // Add the processor imod operand, if necessary.
+ if (ProcessorIMod) {
+ Operands.push_back(ARMOperand::CreateImm(
+ MCConstantExpr::Create(ProcessorIMod, getContext()),
+ NameLoc, NameLoc));
+ } else {
+ // This mnemonic can't ever accept a imod, but the user wrote
+ // one (or misspelled another mnemonic).
- ARMOperand::CreateCondCode(Op, ARMCC::CondCodes(CC), NameLoc);
- Operands.push_back(Op.take());
+ // FIXME: Issue a nice error.
+ }
// Add the remaining tokens in the mnemonic.
while (Next != StringRef::npos) {
Start = Next;
Next = Name.find('.', Start + 1);
- Head = Name.slice(Start, Next);
+ StringRef ExtraToken = Name.slice(Start, Next);
- ARMOperand::CreateToken(Op, Head, NameLoc);
- Operands.push_back(Op.take());
+ Operands.push_back(ARMOperand::CreateToken(ExtraToken, NameLoc));
}
// Read the remaining operands.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
- OwningPtr<ARMOperand> Op;
- if (ParseOperand(Op)) return true;
- Operands.push_back(Op.take());
+ if (ParseOperand(Operands, Head)) {
+ Parser.EatToEndOfStatement();
+ return true;
+ }
while (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
// Parse and remember the operand.
- if (ParseOperand(Op)) return true;
- Operands.push_back(Op.take());
+ if (ParseOperand(Operands, Head)) {
+ Parser.EatToEndOfStatement();
+ return true;
+ }
}
}
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Parser.EatToEndOfStatement();
+ return TokError("unexpected token in argument list");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
return false;
}
+bool ARMAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out) {
+ MCInst Inst;
+ unsigned ErrorInfo;
+ MatchResultTy MatchResult, MatchResult2;
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+ if (MatchResult != Match_Success) {
+ // If we get a Match_InvalidOperand it might be some arithmetic instruction
+ // that does not update the condition codes. So try adding a CCOut operand
+ // with a value of reg0.
+ if (MatchResult == Match_InvalidOperand) {
+ Operands.insert(Operands.begin() + 1,
+ ARMOperand::CreateCCOut(0,
+ ((ARMOperand*)Operands[0])->getStartLoc()));
+ MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+ if (MatchResult2 == Match_Success)
+ MatchResult = Match_Success;
+ else {
+ ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete CCOut;
+ }
+ }
+ // If we get a Match_MnemonicFail it might be some arithmetic instruction
+ // that updates the condition codes if it ends in 's'. So see if the
+ // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut
+ // operand with a value of CPSR.
+ else if(MatchResult == Match_MnemonicFail) {
+ // Get the instruction mnemonic, which is the first token.
+ StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken();
+ if (Mnemonic.substr(Mnemonic.size()-1) == "s") {
+ // removed the 's' from the mnemonic for matching.
+ StringRef MnemonicNoS = Mnemonic.slice(0, Mnemonic.size() - 1);
+ SMLoc NameLoc = ((ARMOperand*)Operands[0])->getStartLoc();
+ ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
+ Operands.erase(Operands.begin());
+ delete OldMnemonic;
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateToken(MnemonicNoS, NameLoc));
+ Operands.insert(Operands.begin() + 1,
+ ARMOperand::CreateCCOut(ARM::CPSR, NameLoc));
+ MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+ if (MatchResult2 == Match_Success)
+ MatchResult = Match_Success;
+ else {
+ ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
+ Operands.erase(Operands.begin());
+ delete OldMnemonic;
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateToken(Mnemonic, NameLoc));
+ ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
+ Operands.erase(Operands.begin() + 1);
+ delete CCOut;
+ }
+ }
+ }
+ }
+ switch (MatchResult) {
+ case Match_Success:
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_ConversionFail:
+ return Error(IDLoc, "unable to convert operands to instruction");
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+ return true;
+}
+
/// ParseDirective parses the arm specific directives
bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
@@ -771,7 +1750,7 @@ bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().is(AsmToken::EndOfStatement))
break;
-
+
// FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
return Error(L, "unexpected token in directive");
@@ -801,16 +1780,16 @@ bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
- return Error(L, "unexpected token in .syntax directive");
- StringRef ATTRIBUTE_UNUSED SymbolName = Parser.getTok().getIdentifier();
+ return Error(L, "unexpected token in .thumb_func directive");
+ StringRef Name = Tok.getString();
Parser.Lex(); // Consume the identifier token.
-
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(L, "unexpected token in directive");
Parser.Lex();
- // TODO: mark symbol as a thumb symbol
- // getParser().getStreamer().Emit???();
+ // Mark symbol as a thumb symbol.
+ MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
+ getParser().getStreamer().EmitThumbFunc(Func);
return false;
}
@@ -824,7 +1803,7 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
if (Mode == "unified" || Mode == "UNIFIED")
Parser.Lex();
else if (Mode == "divided" || Mode == "DIVIDED")
- Parser.Lex();
+ return Error(L, "'.syntax divided' arm asssembly not supported");
else
return Error(L, "unrecognized syntax mode in .syntax directive");
@@ -855,8 +1834,21 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
return Error(Parser.getTok().getLoc(), "unexpected token in directive");
Parser.Lex();
- // TODO tell the MC streamer the mode
- // getParser().getStreamer().Emit???();
+ // FIXME: We need to be able switch subtargets at this point so that
+ // MatchInstructionImpl() will work when it gets the AvailableFeatures which
+ // includes Feature_IsThumb or not to match the right instructions. This is
+ // blocked on the FIXME in llvm-mc.cpp when creating the TargetMachine.
+ if (Val == 16){
+ assert(TM.getSubtarget<ARMSubtarget>().isThumb() &&
+ "switching between arm/thumb not yet suppported via .code 16)");
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ }
+ else{
+ assert(!TM.getSubtarget<ARMSubtarget>().isThumb() &&
+ "switching between thumb/arm not yet suppported via .code 32)");
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+ }
+
return false;
}
@@ -869,4 +1861,6 @@ extern "C" void LLVMInitializeARMAsmParser() {
LLVMInitializeARMAsmLexer();
}
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
#include "ARMGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index e220289..78d73d3 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -39,9 +39,9 @@
/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
/// function for a Thumb instruction.
///
-#include "../ARMGenDecoderTables.inc"
+#include "ARMGenDecoderTables.inc"
-#include "../ARMGenEDInfo.inc"
+#include "ARMGenEDInfo.inc"
using namespace llvm;
@@ -89,7 +89,8 @@ static unsigned decodeARMInstruction(uint32_t &insn) {
return ARM::BFI;
}
- // Ditto for STRBT, which is a super-instruction for A8.6.199 Encoding A1 & A2.
+ // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings
+ // A1 & A2.
// As a result, the decoder fails to deocode USAT properly.
if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
return ARM::USAT;
@@ -252,9 +253,6 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
default:
return Opcode; // Return unmorphed opcode.
- case ARM::t2LDRDi8:
- return ARM::t2LDRDpci;
-
case ARM::t2LDR_POST: case ARM::t2LDR_PRE:
case ARM::t2LDRi12: case ARM::t2LDRi8:
case ARM::t2LDRs: case ARM::t2LDRT:
@@ -349,36 +347,6 @@ static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
return decodeThumbInstruction(insn);
}
-static inline bool Thumb2PreloadOpcodeNoPCI(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case ARM::t2PLDi12: case ARM::t2PLDi8:
- case ARM::t2PLDr: case ARM::t2PLDs:
- case ARM::t2PLDWi12: case ARM::t2PLDWi8:
- case ARM::t2PLDWr: case ARM::t2PLDWs:
- case ARM::t2PLIi12: case ARM::t2PLIi8:
- case ARM::t2PLIr: case ARM::t2PLIs:
- return true;
- }
-}
-
-static inline unsigned T2Morph2Preload2PCI(unsigned Opcode) {
- switch (Opcode) {
- default:
- return 0;
- case ARM::t2PLDi12: case ARM::t2PLDi8:
- case ARM::t2PLDr: case ARM::t2PLDs:
- return ARM::t2PLDpci;
- case ARM::t2PLDWi12: case ARM::t2PLDWi8:
- case ARM::t2PLDWr: case ARM::t2PLDWs:
- return ARM::t2PLDWpci;
- case ARM::t2PLIi12: case ARM::t2PLIi8:
- case ARM::t2PLIr: case ARM::t2PLIs:
- return ARM::t2PLIpci;
- }
-}
-
//
// Public interface for the disassembler
//
@@ -485,11 +453,6 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
// instructions as well.
unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn);
- // A8.6.117/119/120/121.
- // PLD/PLDW/PLI instructions with Rn==15 is transformed to the pci variant.
- if (Thumb2PreloadOpcodeNoPCI(Opcode) && slice(insn, 19, 16) == 15)
- Opcode = T2Morph2Preload2PCI(Opcode);
-
ARMFormat Format = ARMFormats[Opcode];
Size = IsThumb2 ? 4 : 2;
@@ -568,9 +531,9 @@ static MCDisassembler *createThumbDisassembler(const Target &T) {
return new ThumbDisassembler;
}
-extern "C" void LLVMInitializeARMDisassembler() {
+extern "C" void LLVMInitializeARMDisassembler() {
// Register the disassembler.
- TargetRegistry::RegisterMCDisassembler(TheARMTarget,
+ TargetRegistry::RegisterMCDisassembler(TheARMTarget,
createARMDisassembler);
TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
createThumbDisassembler);
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index 9f493b9..bac68dd 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -79,22 +79,9 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) {
}
// Return the register enum Based on RegClass and the raw register number.
-// For DRegPair, see comments below.
// FIXME: Auto-gened?
-static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister,
- bool DRegPair = false) {
-
- if (DRegPair && RegClassID == ARM::QPRRegClassID) {
- // LLVM expects { Dd, Dd+1 } to form a super register; this is not specified
- // in the ARM Architecture Manual as far as I understand it (A8.6.307).
- // Therefore, we morph the RegClassID to be the sub register class and don't
- // subsequently transform the RawRegister encoding when calculating RegNum.
- //
- // See also ARMinstPrinter::printOperand() wrt "dregpair" modifier part
- // where this workaround is meant for.
- RegClassID = ARM::DPRRegClassID;
- }
-
+static unsigned
+getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) {
// For this purpose, we can treat rGPR as if it were GPR.
if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID;
@@ -704,8 +691,8 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
// MSR/MSRsys: Rm mask=Inst{19-16}
// BXJ: Rm
// MSRi/MSRsysi: so_imm
-// SRSW/SRS: addrmode4:$addr mode_imm
-// RFEW/RFE: addrmode4:$addr Rn
+// SRSW/SRS: ldstm_mode:$amode mode_imm
+// RFEW/RFE: ldstm_mode:$amode Rn
static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -733,35 +720,34 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
NumOpsAdded = 1;
return true;
}
- // MSR and MSRsys take one GPR reg Rm, followed by the mask.
- if (Opcode == ARM::MSR || Opcode == ARM::MSRsys) {
- assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ // MSR take a mask, followed by one GPR reg Rm. The mask contains the R Bit in
+ // bit 4, and the special register fields in bits 3-0.
+ if (Opcode == ARM::MSR) {
+ assert(NumOps >= 1 && OpInfo[1].RegClass == ARM::GPRRegClassID &&
"Reg operand expected");
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
+ slice(insn, 19, 16) /* Special Reg */ ));
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
- MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
NumOpsAdded = 2;
return true;
}
- // MSRi and MSRsysi take one so_imm operand, followed by the mask.
- if (Opcode == ARM::MSRi || Opcode == ARM::MSRsysi) {
+ // MSRi take a mask, followed by one so_imm operand. The mask contains the
+ // R Bit in bit 4, and the special register fields in bits 3-0.
+ if (Opcode == ARM::MSRi) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
+ slice(insn, 19, 16) /* Special Reg */ ));
// SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
// A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
// See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
unsigned Imm = insn & 0xFF;
MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
- MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
NumOpsAdded = 2;
return true;
}
- // SRSW and SRS requires addrmode4:$addr for ${addr:submode}, followed by the
- // mode immediate (Inst{4-0}).
if (Opcode == ARM::SRSW || Opcode == ARM::SRS ||
Opcode == ARM::RFEW || Opcode == ARM::RFE) {
- // ARMInstPrinter::printAddrMode4Operand() prints special mode string
- // if the base register is SP; so don't set ARM::SP.
- MI.addOperand(MCOperand::CreateReg(0));
ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
@@ -807,9 +793,8 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// Misc. Branch Instructions.
-// BR_JTadd, BR_JTr, BR_JTm
// BLXr9, BXr9
-// BRIND, BX_RET
+// BX, BX_RET
static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -820,12 +805,12 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx = 0;
- // BX_RET has only two predicate operands, do an early return.
- if (Opcode == ARM::BX_RET)
+ // BX_RET and MOVPCLR have only two predicate operands; do an early return.
+ if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR)
return true;
- // BLXr9 and BRIND take one GPR reg.
- if (Opcode == ARM::BLXr9 || Opcode == ARM::BRIND) {
+ // BLXr9 and BX take one GPR reg.
+ if (Opcode == ARM::BLXr9 || Opcode == ARM::BX) {
assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
"Reg operand expected");
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -834,72 +819,6 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
- // BR_JTadd is an ADD with Rd = PC, (Rn, Rm) as the target and index regs.
- if (Opcode == ARM::BR_JTadd) {
- // InOperandList with GPR:$target and GPR:$idx regs.
-
- assert(NumOps == 4 && "Expect 4 operands");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
-
- // Fill in the two remaining imm operands to signify build completion.
- MI.addOperand(MCOperand::CreateImm(0));
- MI.addOperand(MCOperand::CreateImm(0));
-
- OpIdx = 4;
- return true;
- }
-
- // BR_JTr is a MOV with Rd = PC, and Rm as the source register.
- if (Opcode == ARM::BR_JTr) {
- // InOperandList with GPR::$target reg.
-
- assert(NumOps == 3 && "Expect 3 operands");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
-
- // Fill in the two remaining imm operands to signify build completion.
- MI.addOperand(MCOperand::CreateImm(0));
- MI.addOperand(MCOperand::CreateImm(0));
-
- OpIdx = 3;
- return true;
- }
-
- // BR_JTm is an LDR with Rt = PC.
- if (Opcode == ARM::BR_JTm) {
- // This is the reg/reg form, with base reg followed by +/- reg shop imm.
- // See also ARMAddressingModes.h (Addressing Mode #2).
-
- assert(NumOps == 5 && getIBit(insn) == 1 && "Expect 5 operands && I-bit=1");
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
-
- ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
-
- // Disassemble the offset reg (Rm), shift type, and immediate shift length.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
- // Inst{6-5} encodes the shift opcode.
- ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShImm = slice(insn, 11, 7);
-
- // A8.4.1. Possible rrx or shift amount of 32...
- getImmShiftSE(ShOp, ShImm);
- MI.addOperand(MCOperand::CreateImm(
- ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
-
- // Fill in the two remaining imm operands to signify build completion.
- MI.addOperand(MCOperand::CreateImm(0));
- MI.addOperand(MCOperand::CreateImm(0));
-
- OpIdx = 5;
- return true;
- }
-
return false;
}
@@ -1324,30 +1243,28 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5");
-
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
+ NumOpsAdded = 0;
unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
// Writeback to base, if necessary.
- if (Opcode == ARM::LDM_UPD || Opcode == ARM::STM_UPD) {
+ if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::STMIA_UPD ||
+ Opcode == ARM::LDMDA_UPD || Opcode == ARM::STMDA_UPD ||
+ Opcode == ARM::LDMDB_UPD || Opcode == ARM::STMDB_UPD ||
+ Opcode == ARM::LDMIB_UPD || Opcode == ARM::STMIB_UPD) {
MI.addOperand(MCOperand::CreateReg(Base));
- ++OpIdx;
+ ++NumOpsAdded;
}
+ // Add the base register operand.
MI.addOperand(MCOperand::CreateReg(Base));
- ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
-
// Handling the two predicate operands before the reglist.
int64_t CondVal = insn >> ARMII::CondShift;
MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal));
MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
- OpIdx += 4;
+ NumOpsAdded += 3;
// Fill the variadic part of reglist.
unsigned RegListBits = insn & ((1 << 16) - 1);
@@ -1355,7 +1272,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
if ((RegListBits >> i) & 1) {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
i)));
- ++OpIdx;
+ ++NumOpsAdded;
}
}
@@ -1586,8 +1503,7 @@ static unsigned decodeVFPRm(uint32_t insn, bool isSPVFP) {
}
// A7.5.1
-#if 0
-static uint64_t VFPExpandImm(unsigned char byte, unsigned N) {
+static APInt VFPExpandImm(unsigned char byte, unsigned N) {
assert(N == 32 || N == 64);
uint64_t Result;
@@ -1602,13 +1518,12 @@ static uint64_t VFPExpandImm(unsigned char byte, unsigned N) {
Result = (uint64_t)slice(byte, 7, 7) << 63 |
(uint64_t)slice(byte, 5, 0) << 48;
if (bit6)
- Result |= 0xffL << 54;
+ Result |= 0xffULL << 54;
else
- Result |= 0x1L << 62;
+ Result |= 0x1ULL << 62;
}
- return Result;
+ return APInt(N, Result);
}
-#endif
// VFP Unary Format Instructions:
//
@@ -1902,8 +1817,10 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
// Writeback to base, if necessary.
- if (Opcode == ARM::VLDMD_UPD || Opcode == ARM::VLDMS_UPD ||
- Opcode == ARM::VSTMD_UPD || Opcode == ARM::VSTMS_UPD) {
+ if (Opcode == ARM::VLDMDIA_UPD || Opcode == ARM::VLDMSIA_UPD ||
+ Opcode == ARM::VLDMDDB_UPD || Opcode == ARM::VLDMSDB_UPD ||
+ Opcode == ARM::VSTMDIA_UPD || Opcode == ARM::VSTMSIA_UPD ||
+ Opcode == ARM::VSTMDDB_UPD || Opcode == ARM::VSTMSDB_UPD) {
MI.addOperand(MCOperand::CreateReg(Base));
++OpIdx;
}
@@ -1926,8 +1843,10 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx += 4;
- bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD ||
- Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD);
+ bool isSPVFP = (Opcode == ARM::VLDMSIA || Opcode == ARM::VLDMSDB ||
+ Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD ||
+ Opcode == ARM::VSTMSIA || Opcode == ARM::VSTMSDB ||
+ Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD);
unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
// Extract Dd/Sd.
@@ -1985,10 +1904,14 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// Extract/decode the f64/f32 immediate.
if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
- // The asm syntax specifies the before-expanded <imm>.
- // Not VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
- // Opcode == ARM::FCONSTD ? 64 : 32)
- MI.addOperand(MCOperand::CreateImm(slice(insn,19,16)<<4 | slice(insn,3,0)));
+ // The asm syntax specifies the floating point value, not the 8-bit literal.
+ APInt immRaw = VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
+ Opcode == ARM::FCONSTD ? 64 : 32);
+ APFloat immFP = APFloat(immRaw, true);
+ double imm = Opcode == ARM::FCONSTD ? immFP.convertToDouble() :
+ immFP.convertToFloat();
+ MI.addOperand(MCOperand::CreateFPImm(imm));
+
++OpIdx;
}
@@ -2201,22 +2124,6 @@ static unsigned decodeN3VImm(uint32_t insn) {
return (insn >> 8) & 0xF;
}
-static bool UseDRegPair(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case ARM::VLD1q8_UPD:
- case ARM::VLD1q16_UPD:
- case ARM::VLD1q32_UPD:
- case ARM::VLD1q64_UPD:
- case ARM::VST1q8_UPD:
- case ARM::VST1q16_UPD:
- case ARM::VST1q32_UPD:
- case ARM::VST1q64_UPD:
- return true;
- }
-}
-
// VLD*
// D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm]
// VLD*LN*
@@ -2243,10 +2150,9 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
// We have homogeneous NEON registers for Load/Store.
unsigned RegClass = 0;
- bool DRegPair = UseDRegPair(Opcode);
// Double-spaced registers have increments of 2.
- unsigned Inc = (DblSpaced || DRegPair) ? 2 : 1;
+ unsigned Inc = DblSpaced ? 2 : 1;
unsigned Rn = decodeRn(insn);
unsigned Rm = decodeRm(insn);
@@ -2292,7 +2198,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
RegClass = OpInfo[OpIdx].RegClass;
while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd, DRegPair)));
+ getRegisterEnum(B, RegClass, Rd)));
Rd += Inc;
++OpIdx;
}
@@ -2311,7 +2217,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd, DRegPair)));
+ getRegisterEnum(B, RegClass, Rd)));
Rd += Inc;
++OpIdx;
}
@@ -2771,8 +2677,8 @@ static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode,
return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
N3V_VectorShift, B);
}
-static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
N3V_VectorExtract, B);
@@ -2959,9 +2865,9 @@ static inline bool MemBarrierInstr(uint32_t insn) {
static inline bool PreLoadOpcode(unsigned Opcode) {
switch(Opcode) {
- case ARM::PLDi: case ARM::PLDr:
- case ARM::PLDWi: case ARM::PLDWr:
- case ARM::PLIi: case ARM::PLIr:
+ case ARM::PLDi12: case ARM::PLDrs:
+ case ARM::PLDWi12: case ARM::PLDWrs:
+ case ARM::PLIi12: case ARM::PLIrs:
return true;
default:
return false;
@@ -2971,18 +2877,21 @@ static inline bool PreLoadOpcode(unsigned Opcode) {
static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- // Preload Data/Instruction requires either 2 or 4 operands.
- // PLDi, PLDWi, PLIi: Rn [+/-]imm12 add = (U == '1')
- // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: Rn Rm addrmode2_opc
+ // Preload Data/Instruction requires either 2 or 3 operands.
+ // PLDi, PLDWi, PLIi: addrmode_imm12
+ // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: ldst_so_reg
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRn(insn))));
- if (Opcode == ARM::PLDi || Opcode == ARM::PLDWi || Opcode == ARM::PLIi) {
+ if (Opcode == ARM::PLDi12 || Opcode == ARM::PLDWi12
+ || Opcode == ARM::PLIi12) {
unsigned Imm12 = slice(insn, 11, 0);
bool Negative = getUBit(insn) == 0;
- int Offset = Negative ? -1 - Imm12 : 1 * Imm12;
- MI.addOperand(MCOperand::CreateImm(Offset));
+ // -0 is represented specially. All other values are as normal.
+ if (Imm12 == 0 && Negative)
+ Imm12 = INT32_MIN;
+ MI.addOperand(MCOperand::CreateImm(Imm12));
NumOpsAdded = 2;
} else {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -3026,22 +2935,36 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
case ARM::WFE:
case ARM::WFI:
case ARM::SEV:
- case ARM::SETENDBE:
- case ARM::SETENDLE:
return true;
default:
break;
}
- // CPS has a singleton $opt operand that contains the following information:
- // opt{4-0} = mode from Inst{4-0}
- // opt{5} = changemode from Inst{17}
- // opt{8-6} = AIF from Inst{8-6}
- // opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
- if (Opcode == ARM::CPS) {
- unsigned Option = slice(insn, 4, 0) | slice(insn, 17, 17) << 5 |
- slice(insn, 8, 6) << 6 | slice(insn, 19, 18) << 9;
- MI.addOperand(MCOperand::CreateImm(Option));
+ if (Opcode == ARM::SETEND) {
+ NumOpsAdded = 1;
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 9, 9)));
+ return true;
+ }
+
+ // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
+ // opcodes which match the same real instruction. This is needed since there's
+ // no current handling of optional arguments. Fix here when a better handling
+ // of optional arguments is implemented.
+ if (Opcode == ARM::CPS3p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+ NumOpsAdded = 3;
+ return true;
+ }
+ if (Opcode == ARM::CPS2p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags
+ NumOpsAdded = 2;
+ return true;
+ }
+ if (Opcode == ARM::CPS1p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
NumOpsAdded = 1;
return true;
}
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 112817b..23372e0 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -564,6 +564,38 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
// t_addrmode_sp := sp + imm8 * 4
//
+// A8.6.63 LDRB (literal)
+// A8.6.79 LDRSB (literal)
+// A8.6.75 LDRH (literal)
+// A8.6.83 LDRSH (literal)
+// A8.6.59 LDR (literal)
+//
+// These instrs calculate an address from the PC value and an immediate offset.
+// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
+static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
+ uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ if (!OpInfo) return false;
+
+ assert(NumOps >= 2 &&
+ OpInfo[0].RegClass == ARM::GPRRegClassID &&
+ OpInfo[1].RegClass < 0 &&
+ "Expect >= 2 operands, first as reg, and second as imm operand");
+
+ // Build the register operand, followed by the (+/-)imm12 immediate.
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
+
+ NumOpsAdded = 2;
+
+ return true;
+}
+
+
// A6.2.4 Load/store single data item
//
// Load/Store Register (reg|imm): tRd tRn imm5 tRm
@@ -796,14 +828,13 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// CPS has a singleton $opt operand that contains the following information:
- // opt{4-0} = don't care
- // opt{5} = 0 (false)
- // opt{8-6} = AIF from Inst{2-0}
- // opt{10-9} = 1:imod from Inst{4} with 0b10 as enable and 0b11 as disable
+ // The first op would be 0b10 as enable and 0b11 as disable in regular ARM,
+ // but in Thumb it's is 0 as enable and 1 as disable. So map it to ARM's
+ // default one. The second get the AIF flags from Inst{2-0}.
if (Opcode == ARM::tCPS) {
- unsigned Option = slice(insn, 2, 0) << 6 | slice(insn, 4, 4) << 9 | 1 << 10;
- MI.addOperand(MCOperand::CreateImm(Option));
- NumOpsAdded = 1;
+ MI.addOperand(MCOperand::CreateImm(2 + slice(insn, 4, 4)));
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 2, 0)));
+ NumOpsAdded = 2;
return true;
}
@@ -833,40 +864,32 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
// A8.6.53 LDM / LDMIA
// A8.6.189 STM / STMIA
//
-// tLDM_UPD/tSTM_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
-// tLDM: tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+// tLDMIA_UPD/tSTMIA_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+// tLDMIA: tRt AM4ModeImm Pred-Imm Pred-CCR register_list
static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- assert((Opcode == ARM::tLDM || Opcode == ARM::tLDM_UPD ||
- Opcode == ARM::tSTM_UPD) && "Unexpected opcode");
-
- unsigned &OpIdx = NumOpsAdded;
+ uint32_t insn, unsigned short NumOps,
+ unsigned &NumOpsAdded, BO B) {
+ assert((Opcode == ARM::tLDMIA || Opcode == ARM::tLDMIA_UPD ||
+ Opcode == ARM::tSTMIA_UPD) && "Unexpected opcode");
unsigned tRt = getT1tRt(insn);
-
- OpIdx = 0;
+ NumOpsAdded = 0;
// WB register, if necessary.
- if (Opcode == ARM::tLDM_UPD || Opcode == ARM::tSTM_UPD) {
+ if (Opcode == ARM::tLDMIA_UPD || Opcode == ARM::tSTMIA_UPD) {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
tRt)));
- ++OpIdx;
+ ++NumOpsAdded;
}
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
tRt)));
- ++OpIdx;
-
- // A8.6.53 LDM / LDMIA / LDMFD - Encoding T1
- // A8.6.53 STM / STMIA / STMEA - Encoding T1
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)));
- ++OpIdx;
+ ++NumOpsAdded;
// Handling the two predicate operands before the reglist.
- if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
- OpIdx += 2;
- else {
+ if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
+ NumOpsAdded += 2;
+ } else {
DEBUG(errs() << "Expected predicate operands not found.\n");
return false;
}
@@ -874,13 +897,12 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
unsigned RegListBits = slice(insn, 7, 0);
// Fill the variadic part of reglist.
- for (unsigned i = 0; i < 8; ++i) {
+ for (unsigned i = 0; i < 8; ++i)
if ((RegListBits >> i) & 1) {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
i)));
- ++OpIdx;
+ ++NumOpsAdded;
}
- }
return true;
}
@@ -959,22 +981,23 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
// corresponding to op.
//
// Table A6-1 16-bit Thumb instruction encoding (abridged)
-// op Instruction or instruction class
-// ------ --------------------------------------------------------------------
-// 00xxxx Shift (immediate), add, subtract, move, and compare on page A6-7
-// 010000 Data-processing on page A6-8
-// 010001 Special data instructions and branch and exchange on page A6-9
-// 01001x Load from Literal Pool, see LDR (literal) on page A8-122
-// 0101xx Load/store single data item on page A6-10
+// op Instruction or instruction class
+// ------ --------------------------------------------------------------------
+// 00xxxx Shift (immediate), add, subtract, move, and compare on page A6-7
+// 010000 Data-processing on page A6-8
+// 010001 Special data instructions and branch and exchange on page A6-9
+// 01001x Load from Literal Pool, see LDR (literal) on page A8-122
+// 0101xx Load/store single data item on page A6-10
// 011xxx
// 100xxx
-// 10100x Generate PC-relative address, see ADR on page A8-32
-// 10101x Generate SP-relative address, see ADD (SP plus immediate) on page A8-28
-// 1011xx Miscellaneous 16-bit instructions on page A6-11
-// 11000x Store multiple registers, see STM / STMIA / STMEA on page A8-374
-// 11001x Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
-// 1101xx Conditional branch, and Supervisor Call on page A6-13
-// 11100x Unconditional Branch, see B on page A8-44
+// 10100x Generate PC-relative address, see ADR on page A8-32
+// 10101x Generate SP-relative address, see ADD (SP plus immediate) on
+// page A8-28
+// 1011xx Miscellaneous 16-bit instructions on page A6-11
+// 11000x Store multiple registers, see STM / STMIA / STMEA on page A8-374
+// 11001x Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
+// 1101xx Conditional branch, and Supervisor Call on page A6-13
+// 11100x Unconditional Branch, see B on page A8-44
//
static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode,
uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -1121,34 +1144,31 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
if (Thumb2RFEOpcode(Opcode))
return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B);
- assert((Opcode == ARM::t2LDM || Opcode == ARM::t2LDM_UPD ||
- Opcode == ARM::t2STM || Opcode == ARM::t2STM_UPD)
+ assert((Opcode == ARM::t2LDMIA || Opcode == ARM::t2LDMIA_UPD ||
+ Opcode == ARM::t2LDMDB || Opcode == ARM::t2LDMDB_UPD ||
+ Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD ||
+ Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD)
&& "Unexpected opcode");
assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5");
- unsigned &OpIdx = NumOpsAdded;
-
- OpIdx = 0;
+ NumOpsAdded = 0;
unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
// Writeback to base.
- if (Opcode == ARM::t2LDM_UPD || Opcode == ARM::t2STM_UPD) {
+ if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD ||
+ Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB_UPD) {
MI.addOperand(MCOperand::CreateReg(Base));
- ++OpIdx;
+ ++NumOpsAdded;
}
MI.addOperand(MCOperand::CreateReg(Base));
- ++OpIdx;
-
- ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
- ++OpIdx;
+ ++NumOpsAdded;
// Handling the two predicate operands before the reglist.
- if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
- OpIdx += 2;
- else {
+ if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
+ NumOpsAdded += 2;
+ } else {
DEBUG(errs() << "Expected predicate operands not found.\n");
return false;
}
@@ -1156,13 +1176,12 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned RegListBits = insn & ((1 << 16) - 1);
// Fill the variadic part of reglist.
- for (unsigned i = 0; i < 16; ++i) {
+ for (unsigned i = 0; i < 16; ++i)
if ((RegListBits >> i) & 1) {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
i)));
- ++OpIdx;
+ ++NumOpsAdded;
}
- }
return true;
}
@@ -1260,13 +1279,7 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
return true;
}
-// PC-based defined for Codegen, which do not get decoded by design:
-//
-// t2TBB, t2TBH: Rm immDontCare immDontCare
-//
-// Generic version defined for disassembly:
-//
-// t2TBBgen, t2TBHgen: Rn Rm Pred-Imm Pred-CCR
+// t2TBB, t2TBH: Rn Rm Pred-Imm Pred-CCR
static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode,
uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -1401,7 +1414,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
//
// Two register operands: Rs Rn ModImm
// One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm
-// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm - {t2MOVi, t2MVNi}
+// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm -
+// {t2MOVi, t2MVNi}
//
// ModImm = ThumbExpandImm(i:imm3:imm8)
static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
@@ -1644,15 +1658,25 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
break;
}
- // CPS has a singleton $opt operand that contains the following information:
- // opt{4-0} = mode from Inst{4-0}
- // opt{5} = changemode from Inst{8}
- // opt{8-6} = AIF from Inst{7-5}
- // opt{10-9} = imod from Inst{10-9} with 0b10 as enable and 0b11 as disable
- if (Opcode == ARM::t2CPS) {
- unsigned Option = slice(insn, 4, 0) | slice(insn, 8, 8) << 5 |
- slice(insn, 7, 5) << 6 | slice(insn, 10, 9) << 9;
- MI.addOperand(MCOperand::CreateImm(Option));
+ // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
+ // opcodes which match the same real instruction. This is needed since there's
+ // no current handling of optional arguments. Fix here when a better handling
+ // of optional arguments is implemented.
+ if (Opcode == ARM::t2CPS3p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+ NumOpsAdded = 3;
+ return true;
+ }
+ if (Opcode == ARM::t2CPS2p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags
+ NumOpsAdded = 2;
+ return true;
+ }
+ if (Opcode == ARM::t2CPS1p) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
NumOpsAdded = 1;
return true;
}
@@ -1678,11 +1702,13 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
NumOpsAdded = 1;
return true;
}
- // MSR and MSRsys take one GPR reg Rn, followed by the mask.
- if (Opcode == ARM::t2MSR || Opcode == ARM::t2MSRsys || Opcode == ARM::t2BXJ) {
+ // MSR take a mask, followed by one GPR reg Rn. The mask contains the R Bit in
+ // bit 4, and the special register fields in bits 3-0.
+ if (Opcode == ARM::t2MSR) {
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 20) << 4 /* R Bit */ |
+ slice(insn, 11, 8) /* Special Reg */));
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRn(insn))));
- MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8)));
NumOpsAdded = 2;
return true;
}
@@ -1728,12 +1754,12 @@ static inline bool Thumb2PreloadOpcode(unsigned Opcode) {
switch (Opcode) {
default:
return false;
- case ARM::t2PLDi12: case ARM::t2PLDi8: case ARM::t2PLDpci:
- case ARM::t2PLDr: case ARM::t2PLDs:
- case ARM::t2PLDWi12: case ARM::t2PLDWi8: case ARM::t2PLDWpci:
- case ARM::t2PLDWr: case ARM::t2PLDWs:
- case ARM::t2PLIi12: case ARM::t2PLIi8: case ARM::t2PLIpci:
- case ARM::t2PLIr: case ARM::t2PLIs:
+ case ARM::t2PLDi12: case ARM::t2PLDi8:
+ case ARM::t2PLDs:
+ case ARM::t2PLDWi12: case ARM::t2PLDWi8:
+ case ARM::t2PLDWs:
+ case ARM::t2PLIi12: case ARM::t2PLIi8:
+ case ARM::t2PLIs:
return true;
}
}
@@ -1769,11 +1795,10 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
int Offset = 0;
- if (Opcode == ARM::t2PLDpci || Opcode == ARM::t2PLDWpci ||
- Opcode == ARM::t2PLIpci) {
+ if (slice(insn, 19, 16) == 0xFF) {
bool Negative = slice(insn, 23, 23) == 0;
unsigned Imm12 = getImm12(insn);
- Offset = Negative ? -1 - Imm12 : 1 * Imm12;
+ Offset = Negative ? -1 - Imm12 : 1 * Imm12;
} else if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
Opcode == ARM::t2PLIi8) {
// A8.6.117 Encoding T2: add = FALSE
@@ -1795,37 +1820,6 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
-// A8.6.63 LDRB (literal)
-// A8.6.79 LDRSB (literal)
-// A8.6.75 LDRH (literal)
-// A8.6.83 LDRSH (literal)
-// A8.6.59 LDR (literal)
-//
-// These instrs calculate an address from the PC value and an immediate offset.
-// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
-static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
- uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
-
- const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
- if (!OpInfo) return false;
-
- assert(NumOps >= 2 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass < 0 &&
- "Expect >= 2 operands, first as reg, and second as imm operand");
-
- // Build the register operand, followed by the (+/-)imm12 immediate.
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
-
- MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
-
- NumOpsAdded = 2;
-
- return true;
-}
-
// A6.3.10 Store single data item
// A6.3.9 Load byte, memory hints
// A6.3.8 Load halfword, memory hints
@@ -1835,13 +1829,15 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
//
// t2LDRi12: Rd Rn (+)imm12
// t2LDRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2LDRs: Rd Rn Rm ConstantShiftSpecifier (see also DisassembleThumb2DPSoReg)
+// t2LDRs: Rd Rn Rm ConstantShiftSpecifier (see also
+// DisassembleThumb2DPSoReg)
// t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
// t2LDR_PRE: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
//
// t2STRi12: Rd Rn (+)imm12
// t2STRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
-// t2STRs: Rd Rn Rm ConstantShiftSpecifier (see also DisassembleThumb2DPSoReg)
+// t2STRs: Rd Rn Rm ConstantShiftSpecifier (see also
+// DisassembleThumb2DPSoReg)
// t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
// t2STR_PRE: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
//
@@ -1862,7 +1858,6 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
// See, for example, A6.3.7 Load word: Table A6-18 Load word.
if (Load && Rn == 15)
return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-
const TargetInstrDesc &TID = ARMInsts[Opcode];
const TargetOperandInfo *OpInfo = TID.OpInfo;
unsigned &OpIdx = NumOpsAdded;
@@ -1909,7 +1904,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
else
Imm = decodeImm8(insn);
}
-
+
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
R0)));
++OpIdx;
@@ -2081,25 +2076,29 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
// corresponding to (op1, op2, op).
//
// Table A6-9 32-bit Thumb instruction encoding
-// op1 op2 op Instruction class, see
-// --- ------- -- ------------------------------------------------------------
-// 01 00xx0xx - Load/store multiple on page A6-23
-// 00xx1xx - Load/store dual, load/store exclusive, table branch on page A6-24
-// 01xxxxx - Data-processing (shifted register) on page A6-31
-// 1xxxxxx - Coprocessor instructions on page A6-40
-// 10 x0xxxxx 0 Data-processing (modified immediate) on page A6-15
-// x1xxxxx 0 Data-processing (plain binary immediate) on page A6-19
-// - 1 Branches and miscellaneous control on page A6-20
-// 11 000xxx0 - Store single data item on page A6-30
-// 001xxx0 - Advanced SIMD element or structure load/store instructions on page A7-27
-// 00xx001 - Load byte, memory hints on page A6-28
-// 00xx011 - Load halfword, memory hints on page A6-26
-// 00xx101 - Load word on page A6-25
-// 00xx111 - UNDEFINED
-// 010xxxx - Data-processing (register) on page A6-33
-// 0110xxx - Multiply, multiply accumulate, and absolute difference on page A6-38
-// 0111xxx - Long multiply, long multiply accumulate, and divide on page A6-39
-// 1xxxxxx - Coprocessor instructions on page A6-40
+// op1 op2 op Instruction class, see
+// --- ------- -- -----------------------------------------------------------
+// 01 00xx0xx - Load/store multiple on page A6-23
+// 00xx1xx - Load/store dual, load/store exclusive, table branch on
+// page A6-24
+// 01xxxxx - Data-processing (shifted register) on page A6-31
+// 1xxxxxx - Coprocessor instructions on page A6-40
+// 10 x0xxxxx 0 Data-processing (modified immediate) on page A6-15
+// x1xxxxx 0 Data-processing (plain binary immediate) on page A6-19
+// - 1 Branches and miscellaneous control on page A6-20
+// 11 000xxx0 - Store single data item on page A6-30
+// 001xxx0 - Advanced SIMD element or structure load/store instructions
+// on page A7-27
+// 00xx001 - Load byte, memory hints on page A6-28
+// 00xx011 - Load halfword, memory hints on page A6-26
+// 00xx101 - Load word on page A6-25
+// 00xx111 - UNDEFINED
+// 010xxxx - Data-processing (register) on page A6-33
+// 0110xxx - Multiply, multiply accumulate, and absolute difference on
+// page A6-38
+// 0111xxx - Long multiply, long multiply accumulate, and divide on
+// page A6-39
+// 1xxxxxx - Coprocessor instructions on page A6-40
//
static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps,
@@ -2130,7 +2129,7 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded,
B);
}
- if (Opcode == ARM::t2TBBgen || Opcode == ARM::t2TBHgen) {
+ if (Opcode == ARM::t2TBB || Opcode == ARM::t2TBH) {
// Table branch.
return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B);
}
@@ -2175,7 +2174,8 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
}
} else {
// Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word
- return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded, B);
+ return DisassembleThumb2LdSt(true, MI, Opcode, insn, NumOps,
+ NumOpsAdded, B);
}
break;
case 1:
@@ -2229,7 +2229,7 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// A6.3 32-bit Thumb instruction encoding
-
+
uint16_t op1 = slice(HalfWord, 12, 11);
uint16_t op2 = slice(HalfWord, 10, 4);
uint16_t op = slice(insn, 15, 15);
diff --git a/contrib/llvm/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 8026e77..1499da0 100644
--- a/contrib/llvm/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
-#include "ARM.h" // FIXME: FACTOR ENUMS BETTER.
+#include "ARMBaseInfo.h"
#include "ARMInstPrinter.h"
#include "ARMAddressingModes.h"
#include "llvm/MC/MCInst.h"
@@ -22,86 +22,20 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-// Include the auto-generated portion of the assembly writer.
-#define MachineInstr MCInst
-#define ARMAsmPrinter ARMInstPrinter // FIXME: REMOVE.
+#define GET_INSTRUCTION_NAME
#include "ARMGenAsmWriter.inc"
-#undef MachineInstr
-#undef ARMAsmPrinter
-static unsigned NextReg(unsigned Reg) {
- switch (Reg) {
- default:
- assert(0 && "Unexpected register enum");
-
- case ARM::D0:
- return ARM::D1;
- case ARM::D1:
- return ARM::D2;
- case ARM::D2:
- return ARM::D3;
- case ARM::D3:
- return ARM::D4;
- case ARM::D4:
- return ARM::D5;
- case ARM::D5:
- return ARM::D6;
- case ARM::D6:
- return ARM::D7;
- case ARM::D7:
- return ARM::D8;
- case ARM::D8:
- return ARM::D9;
- case ARM::D9:
- return ARM::D10;
- case ARM::D10:
- return ARM::D11;
- case ARM::D11:
- return ARM::D12;
- case ARM::D12:
- return ARM::D13;
- case ARM::D13:
- return ARM::D14;
- case ARM::D14:
- return ARM::D15;
- case ARM::D15:
- return ARM::D16;
- case ARM::D16:
- return ARM::D17;
- case ARM::D17:
- return ARM::D18;
- case ARM::D18:
- return ARM::D19;
- case ARM::D19:
- return ARM::D20;
- case ARM::D20:
- return ARM::D21;
- case ARM::D21:
- return ARM::D22;
- case ARM::D22:
- return ARM::D23;
- case ARM::D23:
- return ARM::D24;
- case ARM::D24:
- return ARM::D25;
- case ARM::D25:
- return ARM::D26;
- case ARM::D26:
- return ARM::D27;
- case ARM::D27:
- return ARM::D28;
- case ARM::D28:
- return ARM::D29;
- case ARM::D29:
- return ARM::D30;
- case ARM::D30:
- return ARM::D31;
- }
+StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
}
+
void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+ unsigned Opcode = MI->getOpcode();
+
// Check for MOVs and print canonical forms, instead.
- if (MI->getOpcode() == ARM::MOVs) {
+ if (Opcode == ARM::MOVs) {
+ // FIXME: Thumb variants?
const MCOperand &Dst = MI->getOperand(0);
const MCOperand &MO1 = MI->getOperand(1);
const MCOperand &MO2 = MI->getOperand(2);
@@ -129,118 +63,82 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
}
// A8.6.123 PUSH
- if ((MI->getOpcode() == ARM::STM_UPD || MI->getOpcode() == ARM::t2STM_UPD) &&
+ if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
- const MCOperand &MO1 = MI->getOperand(2);
- if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) {
- O << '\t' << "push";
- printPredicateOperand(MI, 3, O);
- O << '\t';
- printRegisterList(MI, 5, O);
- return;
- }
+ O << '\t' << "push";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2STMDB_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ return;
}
// A8.6.122 POP
- if ((MI->getOpcode() == ARM::LDM_UPD || MI->getOpcode() == ARM::t2LDM_UPD) &&
+ if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
- const MCOperand &MO1 = MI->getOperand(2);
- if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) {
- O << '\t' << "pop";
- printPredicateOperand(MI, 3, O);
- O << '\t';
- printRegisterList(MI, 5, O);
- return;
- }
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2LDMIA_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ return;
}
// A8.6.355 VPUSH
- if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) &&
+ if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
- const MCOperand &MO1 = MI->getOperand(2);
- if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) {
- O << '\t' << "vpush";
- printPredicateOperand(MI, 3, O);
- O << '\t';
- printRegisterList(MI, 5, O);
- return;
- }
+ O << '\t' << "vpush";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ return;
}
// A8.6.354 VPOP
- if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) &&
+ if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
- const MCOperand &MO1 = MI->getOperand(2);
- if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) {
- O << '\t' << "vpop";
- printPredicateOperand(MI, 3, O);
- O << '\t';
- printRegisterList(MI, 5, O);
- return;
- }
+ O << '\t' << "vpop";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ return;
}
printInstruction(MI, O);
- }
+}
void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier) {
+ raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
unsigned Reg = Op.getReg();
- if (Modifier && strcmp(Modifier, "dregpair") == 0) {
- O << '{' << getRegisterName(Reg) << ", "
- << getRegisterName(NextReg(Reg)) << '}';
-#if 0
- // FIXME: Breaks e.g. ARM/vmul.ll.
- assert(0);
- /*
- unsigned DRegLo = TRI->getSubReg(Reg, ARM::dsub_0);
- unsigned DRegHi = TRI->getSubReg(Reg, ARM::dsub_1);
- O << '{'
- << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
- << '}';*/
-#endif
- } else if (Modifier && strcmp(Modifier, "lane") == 0) {
- assert(0);
- /*
- unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
- unsigned DReg = TRI->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1,
- &ARM::DPR_VFP2RegClass);
- O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']';
- */
- } else {
- O << getRegisterName(Reg);
- }
+ O << getRegisterName(Reg);
} else if (Op.isImm()) {
- assert((Modifier && !strcmp(Modifier, "call")) ||
- ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"));
O << '#' << Op.getImm();
} else {
- if (Modifier && Modifier[0] != 0 && strcmp(Modifier, "call") != 0)
- llvm_unreachable("Unsupported modifier");
assert(Op.isExpr() && "unknown operand kind in printOperand");
O << *Op.getExpr();
}
}
-static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm,
+static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream,
const MCAsmInfo *MAI) {
// Break it up into two parts that make up a shifter immediate.
V = ARM_AM::getSOImmVal(V);
assert(V != -1 && "Not a valid so_imm value!");
-
+
unsigned Imm = ARM_AM::getSOImmValImm(V);
unsigned Rot = ARM_AM::getSOImmValRot(V);
-
+
// Print low-level immediate formation info, per
// A5.1.3: "Data-processing operands - Immediate".
if (Rot) {
O << "#" << Imm << ", " << Rot;
// Pretty printed version.
- if (VerboseAsm)
- O << ' ' << MAI->getCommentString()
- << ' ' << (int)ARM_AM::rotr32(Imm, Rot);
+ if (CommentStream)
+ *CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n";
} else {
O << "#" << Imm;
}
@@ -253,15 +151,7 @@ void ARMInstPrinter::printSOImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
assert(MO.isImm() && "Not a valid so_imm value!");
- printSOImm(O, MO.getImm(), VerboseAsm, &MAI);
-}
-
-/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov'
-/// followed by an 'orr' to materialize.
-void ARMInstPrinter::printSOImm2PartOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- // FIXME: REMOVE this method.
- abort();
+ printSOImm(O, MO.getImm(), CommentStream, &MAI);
}
// so_reg is a 4-operand unit corresponding to register forms of the A5.1
@@ -274,9 +164,9 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
const MCOperand &MO3 = MI->getOperand(OpNum+2);
-
+
O << getRegisterName(MO1.getReg());
-
+
// Print the shift opc.
ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
@@ -294,14 +184,14 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
-
+
if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
printOperand(MI, Op, O);
return;
}
-
+
O << "[" << getRegisterName(MO1.getReg());
-
+
if (!MO2.getReg()) {
if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
O << ", #"
@@ -310,24 +200,24 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
O << "]";
return;
}
-
+
O << ", "
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
<< getRegisterName(MO2.getReg());
-
+
if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
O << ", "
<< ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
<< " #" << ShImm;
O << "]";
-}
+}
void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
-
+
if (!MO1.getReg()) {
unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
O << '#'
@@ -335,10 +225,10 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
<< ImmOffs;
return;
}
-
+
O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
<< getRegisterName(MO1.getReg());
-
+
if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
O << ", "
<< ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
@@ -350,15 +240,15 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
const MCOperand &MO3 = MI->getOperand(OpNum+2);
-
+
O << '[' << getRegisterName(MO1.getReg());
-
+
if (MO2.getReg()) {
O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm())
<< getRegisterName(MO2.getReg()) << ']';
return;
}
-
+
if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
O << ", #"
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
@@ -371,53 +261,42 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
-
+
if (MO1.getReg()) {
O << (char)ARM_AM::getAM3Op(MO2.getImm())
<< getRegisterName(MO1.getReg());
return;
}
-
+
unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
O << '#'
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
<< ImmOffs;
}
-
-void ARMInstPrinter::printAddrMode4Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O,
- const char *Modifier) {
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
- ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
- if (Modifier && strcmp(Modifier, "submode") == 0) {
- O << ARM_AM::getAMSubModeStr(Mode);
- } else if (Modifier && strcmp(Modifier, "wide") == 0) {
- ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
- if (Mode == ARM_AM::ia)
- O << ".w";
- } else {
- printOperand(MI, OpNum, O);
- }
+void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
+ .getImm());
+ O << ARM_AM::getAMSubModeStr(Mode);
}
void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O,
- const char *Modifier) {
+ raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
-
+
if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
printOperand(MI, OpNum, O);
return;
}
-
+
O << "[" << getRegisterName(MO1.getReg());
-
+
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
O << ", #"
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
- << ImmOffs*4;
+ << ImmOffs * 4;
}
O << "]";
}
@@ -426,7 +305,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
-
+
O << "[" << getRegisterName(MO1.getReg());
if (MO2.getImm()) {
// FIXME: Both darwin as and GNU as violate ARM docs here.
@@ -445,12 +324,6 @@ void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
O << ", " << getRegisterName(MO.getReg());
}
-void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O,
- const char *Modifier) {
- assert(0 && "FIXME: Implement printAddrModePCOperand");
-}
-
void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
@@ -497,33 +370,41 @@ void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
O << "}";
}
-void ARMInstPrinter::printCPSOptionOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
- unsigned option = Op.getImm();
- unsigned mode = option & 31;
- bool changemode = option >> 5 & 1;
- unsigned AIF = option >> 6 & 7;
- unsigned imod = option >> 9 & 3;
- if (imod == 2)
- O << "ie";
- else if (imod == 3)
- O << "id";
- O << '\t';
- if (imod > 1) {
- if (AIF & 4) O << 'a';
- if (AIF & 2) O << 'i';
- if (AIF & 1) O << 'f';
- if (AIF > 0 && changemode) O << ", ";
- }
- if (changemode)
- O << '#' << mode;
+ if (Op.getImm())
+ O << "be";
+ else
+ O << "le";
+}
+
+void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ O << ARM_PROC::IModToString(Op.getImm());
+}
+
+void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ unsigned IFlags = Op.getImm();
+ for (int i=2; i >= 0; --i)
+ if (IFlags & (1 << i))
+ O << ARM_PROC::IFlagsToString(1 << i);
}
void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
- unsigned Mask = Op.getImm();
+ unsigned SpecRegRBit = Op.getImm() >> 4;
+ unsigned Mask = Op.getImm() & 0xf;
+
+ if (SpecRegRBit)
+ O << "spsr";
+ else
+ O << "cpsr";
+
if (Mask) {
O << '_';
if (Mask & 8) O << 'f';
@@ -550,7 +431,7 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
O << ARMCondCodeToString(CC);
}
-void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
@@ -566,25 +447,24 @@ void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
}
}
-
-
-void ARMInstPrinter::printCPInstOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O,
- const char *Modifier) {
- // FIXME: remove this.
- abort();
-}
-
void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << MI->getOperand(OpNum).getImm();
}
+void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "p" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << "c" << MI->getOperand(OpNum).getImm();
+}
void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- // FIXME: remove this.
- abort();
+ llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
}
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
@@ -611,17 +491,25 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
- const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
O << "[" << getRegisterName(MO1.getReg());
- O << ", " << getRegisterName(MO2.getReg()) << "]";
+ if (unsigned RegNum = MO2.getReg())
+ O << ", " << getRegisterName(RegNum);
+ O << "]";
}
-void ARMInstPrinter::printThumbAddrModeRI5Operand(const MCInst *MI, unsigned Op,
- raw_ostream &O,
- unsigned Scale) {
+void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O,
+ unsigned Scale) {
const MCOperand &MO1 = MI->getOperand(Op);
- const MCOperand &MO2 = MI->getOperand(Op+1);
- const MCOperand &MO3 = MI->getOperand(Op+2);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
printOperand(MI, Op, O);
@@ -629,44 +517,32 @@ void ARMInstPrinter::printThumbAddrModeRI5Operand(const MCInst *MI, unsigned Op,
}
O << "[" << getRegisterName(MO1.getReg());
- if (MO3.getReg())
- O << ", " << getRegisterName(MO3.getReg());
- else if (unsigned ImmOffs = MO2.getImm())
+ if (unsigned ImmOffs = MO2.getImm())
O << ", #" << ImmOffs * Scale;
O << "]";
}
-void ARMInstPrinter::printThumbAddrModeS1Operand(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- printThumbAddrModeRI5Operand(MI, Op, O, 1);
+void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 1);
}
-void ARMInstPrinter::printThumbAddrModeS2Operand(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- printThumbAddrModeRI5Operand(MI, Op, O, 2);
+void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 2);
}
-void ARMInstPrinter::printThumbAddrModeS4Operand(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- printThumbAddrModeRI5Operand(MI, Op, O, 4);
+void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
+ unsigned Op,
+ raw_ostream &O) {
+ printThumbAddrModeImm5SOperand(MI, Op, O, 4);
}
void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
raw_ostream &O) {
- const MCOperand &MO1 = MI->getOperand(Op);
- const MCOperand &MO2 = MI->getOperand(Op+1);
- O << "[" << getRegisterName(MO1.getReg());
- if (unsigned ImmOffs = MO2.getImm())
- O << ", #" << ImmOffs*4;
- O << "]";
-}
-
-void ARMInstPrinter::printTBAddrMode(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg());
- if (MI->getOpcode() == ARM::t2TBH)
- O << ", lsl #1";
- O << ']';
+ printThumbAddrModeImm5SOperand(MI, Op, O, 4);
}
// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
@@ -689,16 +565,26 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
}
-void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, O);
+ return;
+ }
+
O << "[" << getRegisterName(MO1.getReg());
- unsigned OffImm = MO2.getImm();
- if (OffImm) // Don't print +0.
+ int32_t OffImm = (int32_t)MO2.getImm();
+ bool isSub = OffImm < 0;
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ if (isSub)
+ O << ", #-" << -OffImm;
+ else if (OffImm > 0)
O << ", #" << OffImm;
O << "]";
}
@@ -783,12 +669,37 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- O << '#' << MI->getOperand(OpNum).getImm();
+ const MCOperand &MO = MI->getOperand(OpNum);
+ O << '#';
+ if (MO.isFPImm()) {
+ O << (float)MO.getFPImm();
+ } else {
+ union {
+ uint32_t I;
+ float F;
+ } FPUnion;
+
+ FPUnion.I = MO.getImm();
+ O << FPUnion.F;
+ }
}
void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- O << '#' << MI->getOperand(OpNum).getImm();
+ const MCOperand &MO = MI->getOperand(OpNum);
+ O << '#';
+ if (MO.isFPImm()) {
+ O << MO.getFPImm();
+ } else {
+ // We expect the binary encoding of a floating point number here.
+ union {
+ uint64_t I;
+ double D;
+ } FPUnion;
+
+ FPUnion.I = MO.getImm();
+ O << FPUnion.D;
+ }
}
void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
diff --git a/contrib/llvm/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index e5ad0d0..679d313 100644
--- a/contrib/llvm/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -18,26 +18,25 @@
namespace llvm {
class MCOperand;
-
+
class ARMInstPrinter : public MCInstPrinter {
- bool VerboseAsm;
public:
- ARMInstPrinter(const MCAsmInfo &MAI, bool verboseAsm)
- : MCInstPrinter(MAI), VerboseAsm(verboseAsm) {}
+ ARMInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
virtual void printInst(const MCInst *MI, raw_ostream &O);
-
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+ static const char *getInstructionName(unsigned Opcode);
+
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
- const char *Modifier = 0);
-
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printSOImm2PartOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
+
void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
@@ -45,15 +44,11 @@ public:
void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
- void printAddrMode4Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O,
- const char *Modifier = 0);
- void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O,
- const char *Modifier = 0);
+ void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
- void printAddrModePCOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O,
- const char *Modifier = 0);
void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
@@ -64,20 +59,20 @@ public:
void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
- void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O, unsigned Scale);
- void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, unsigned Scale);
+ void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
-
+
void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
@@ -88,7 +83,10 @@ public:
raw_ostream &O);
void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
-
+
+ void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printCPSOptionOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printNegZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -98,21 +96,16 @@ public:
void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printCPInstOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O,
- const char *Modifier);
- void printJTBlockOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {}
- void printJT2BlockOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {}
- void printTBAddrMode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- // FIXME: Implement.
- void PrintSpecial(const MCInst *MI, raw_ostream &O, const char *Kind) {}
+ void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
};
-
-}
+
+} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000..18645c0
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARMAsmPrinter
+ ARMInstPrinter.cpp
+ )
+add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile b/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile
new file mode 100644
index 0000000..65d372e
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
new file mode 100644
index 0000000..f9e86eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -0,0 +1,321 @@
+//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
+// multiple and add / sub instructions) when special VMLx hazards are detected.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mlx-expansion"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
+static cl::opt<unsigned>
+ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
+
+STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
+
+namespace {
+ struct MLxExpansion : public MachineFunctionPass {
+ static char ID;
+ MLxExpansion() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM MLA / MLS expansion pass";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ unsigned MIIdx;
+ MachineInstr* LastMIs[4];
+
+ void clearStack();
+ void pushStack(MachineInstr *MI);
+ MachineInstr *getAccDefMI(MachineInstr *MI) const;
+ unsigned getDefReg(MachineInstr *MI) const;
+ bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
+ bool FindMLxHazard(MachineInstr *MI) const;
+ void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned MulOpc, unsigned AddSubOpc,
+ bool NegAcc, bool HasLane);
+ bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
+ };
+ char MLxExpansion::ID = 0;
+}
+
+void MLxExpansion::clearStack() {
+ std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0);
+ MIIdx = 0;
+}
+
+void MLxExpansion::pushStack(MachineInstr *MI) {
+ LastMIs[MIIdx] = MI;
+ if (++MIIdx == 4)
+ MIIdx = 0;
+}
+
+MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
+ // Look past COPY and INSERT_SUBREG instructions to find the
+ // real definition MI. This is important for _sfp instructions.
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return 0;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ while (true) {
+ if (DefMI->getParent() != MBB)
+ break;
+ if (DefMI->isCopyLike()) {
+ Reg = DefMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ } else if (DefMI->isInsertSubreg()) {
+ Reg = DefMI->getOperand(2).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DefMI = MRI->getVRegDef(Reg);
+ continue;
+ }
+ }
+ break;
+ }
+ return DefMI;
+}
+
+unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->hasOneNonDBGUse(Reg))
+ return Reg;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg);
+ if (UseMI->getParent() != MBB)
+ return Reg;
+
+ while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
+ Reg = UseMI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->hasOneNonDBGUse(Reg))
+ return Reg;
+ UseMI = &*MRI->use_nodbg_begin(Reg);
+ if (UseMI->getParent() != MBB)
+ return Reg;
+ }
+
+ return Reg;
+}
+
+bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
+ const TargetInstrDesc &TID = MI->getDesc();
+ // FIXME: Detect integer instructions properly.
+ unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+ if (Domain == ARMII::DomainVFP) {
+ unsigned Opcode = TID.getOpcode();
+ if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
+ Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ } else if (Domain == ARMII::DomainNEON) {
+ if (TID.mayStore() || TID.mayLoad())
+ return false;
+ } else {
+ return false;
+ }
+
+ return MI->readsRegister(Reg, TRI);
+ return false;
+}
+
+
+bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const {
+ if (NumExpand >= ExpandLimit)
+ return false;
+
+ if (ForceExapnd)
+ return true;
+
+ MachineInstr *DefMI = getAccDefMI(MI);
+ if (TII->isFpMLxInstruction(DefMI->getOpcode()))
+ // r0 = vmla
+ // r3 = vmla r0, r1, r2
+ // takes 16 - 17 cycles
+ //
+ // r0 = vmla
+ // r4 = vmul r1, r2
+ // r3 = vadd r0, r4
+ // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
+ return true;
+
+ // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
+ // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
+ // preserves the in-order retirement of the instructions.
+ // Look at the next few instructions, if *most* of them can cause hazards,
+ // then the scheduler can't *fix* this, we'd better break up the VMLA.
+ for (unsigned i = 1; i <= 4; ++i) {
+ int Idx = ((int)MIIdx - i + 4) % 4;
+ MachineInstr *NextMI = LastMIs[Idx];
+ if (!NextMI)
+ continue;
+
+ if (TII->canCauseFpMLxStall(NextMI->getOpcode()))
+ return true;
+
+ // Look for VMLx RAW hazard.
+ if (hasRAWHazard(getDefReg(MI), NextMI))
+ return true;
+ }
+
+ return false;
+}
+
+/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
+/// of MUL + ADD / SUB instructions.
+void
+MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+ unsigned MulOpc, unsigned AddSubOpc,
+ bool NegAcc, bool HasLane) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool DstDead = MI->getOperand(0).isDead();
+ unsigned AccReg = MI->getOperand(1).getReg();
+ unsigned Src1Reg = MI->getOperand(2).getReg();
+ unsigned Src2Reg = MI->getOperand(3).getReg();
+ bool Src1Kill = MI->getOperand(2).isKill();
+ bool Src2Kill = MI->getOperand(3).isKill();
+ unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
+ unsigned NextOp = HasLane ? 5 : 4;
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
+ unsigned PredReg = MI->getOperand(++NextOp).getReg();
+
+ const TargetInstrDesc &TID1 = TII->get(MulOpc);
+ const TargetInstrDesc &TID2 = TII->get(AddSubOpc);
+ unsigned TmpReg = MRI->createVirtualRegister(TID1.getRegClass(0, TRI));
+
+ MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID1, TmpReg)
+ .addReg(Src1Reg, getKillRegState(Src1Kill))
+ .addReg(Src2Reg, getKillRegState(Src2Kill));
+ if (HasLane)
+ MIB.addImm(LaneImm);
+ MIB.addImm(Pred).addReg(PredReg);
+
+ MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID2)
+ .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
+
+ if (NegAcc) {
+ bool AccKill = MRI->hasOneNonDBGUse(AccReg);
+ MIB.addReg(TmpReg, getKillRegState(true))
+ .addReg(AccReg, getKillRegState(AccKill));
+ } else {
+ MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
+ }
+ MIB.addImm(Pred).addReg(PredReg);
+
+ DEBUG({
+ dbgs() << "Expanding: " << *MI;
+ dbgs() << " to:\n";
+ MachineBasicBlock::iterator MII = MI;
+ MII = llvm::prior(MII);
+ MachineInstr &MI2 = *MII;
+ MII = llvm::prior(MII);
+ MachineInstr &MI1 = *MII;
+ dbgs() << " " << MI1;
+ dbgs() << " " << MI2;
+ });
+
+ MI->eraseFromParent();
+ ++NumExpand;
+}
+
+bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ clearStack();
+
+ unsigned Skip = 0;
+ MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
+ while (MII != E) {
+ MachineInstr *MI = &*MII;
+
+ if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) {
+ ++MII;
+ continue;
+ }
+
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.isBarrier()) {
+ clearStack();
+ Skip = 0;
+ ++MII;
+ continue;
+ }
+
+ unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+ if (Domain == ARMII::DomainGeneral) {
+ if (++Skip == 2)
+ // Assume dual issues of non-VFP / NEON instructions.
+ pushStack(0);
+ } else {
+ Skip = 0;
+
+ unsigned MulOpc, AddSubOpc;
+ bool NegAcc, HasLane;
+ if (!TII->isFpMLxInstruction(TID.getOpcode(),
+ MulOpc, AddSubOpc, NegAcc, HasLane) ||
+ !FindMLxHazard(MI))
+ pushStack(MI);
+ else {
+ ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
+ E = MBB.rend(); // May have changed if MI was the 1st instruction.
+ Changed = true;
+ continue;
+ }
+ }
+
+ ++MII;
+ }
+
+ return Changed;
+}
+
+bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
+ TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= ExpandFPMLxInstructions(MBB);
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createMLxExpansionPass() {
+ return new MLxExpansion();
+}
diff --git a/contrib/llvm/lib/Target/ARM/NEONPreAllocPass.cpp b/contrib/llvm/lib/Target/ARM/NEONPreAllocPass.cpp
deleted file mode 100644
index 3407ac6..0000000
--- a/contrib/llvm/lib/Target/ARM/NEONPreAllocPass.cpp
+++ /dev/null
@@ -1,406 +0,0 @@
-//===-- NEONPreAllocPass.cpp - Allocate adjacent NEON registers--*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "neon-prealloc"
-#include "ARM.h"
-#include "ARMInstrInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-using namespace llvm;
-
-namespace {
- class NEONPreAllocPass : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- MachineRegisterInfo *MRI;
-
- public:
- static char ID;
- NEONPreAllocPass() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {
- return "NEON register pre-allocation pass";
- }
-
- private:
- bool FormsRegSequence(MachineInstr *MI,
- unsigned FirstOpnd, unsigned NumRegs,
- unsigned Offset, unsigned Stride) const;
- bool PreAllocNEONRegisters(MachineBasicBlock &MBB);
- };
-
- char NEONPreAllocPass::ID = 0;
-}
-
-static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
- unsigned &Offset, unsigned &Stride) {
- // Default to unit stride with no offset.
- Stride = 1;
- Offset = 0;
-
- switch (Opcode) {
- default:
- break;
-
- case ARM::VLD2LNd8:
- case ARM::VLD2LNd16:
- case ARM::VLD2LNd32:
- FirstOpnd = 0;
- NumRegs = 2;
- return true;
-
- case ARM::VLD2LNq16:
- case ARM::VLD2LNq32:
- FirstOpnd = 0;
- NumRegs = 2;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD2LNq16odd:
- case ARM::VLD2LNq32odd:
- FirstOpnd = 0;
- NumRegs = 2;
- Offset = 1;
- Stride = 2;
- return true;
-
- case ARM::VLD3LNd8:
- case ARM::VLD3LNd16:
- case ARM::VLD3LNd32:
- FirstOpnd = 0;
- NumRegs = 3;
- return true;
-
- case ARM::VLD3LNq16:
- case ARM::VLD3LNq32:
- FirstOpnd = 0;
- NumRegs = 3;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD3LNq16odd:
- case ARM::VLD3LNq32odd:
- FirstOpnd = 0;
- NumRegs = 3;
- Offset = 1;
- Stride = 2;
- return true;
-
- case ARM::VLD4LNd8:
- case ARM::VLD4LNd16:
- case ARM::VLD4LNd32:
- FirstOpnd = 0;
- NumRegs = 4;
- return true;
-
- case ARM::VLD4LNq16:
- case ARM::VLD4LNq32:
- FirstOpnd = 0;
- NumRegs = 4;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD4LNq16odd:
- case ARM::VLD4LNq32odd:
- FirstOpnd = 0;
- NumRegs = 4;
- Offset = 1;
- Stride = 2;
- return true;
-
- case ARM::VST2LNd8:
- case ARM::VST2LNd16:
- case ARM::VST2LNd32:
- FirstOpnd = 2;
- NumRegs = 2;
- return true;
-
- case ARM::VST2LNq16:
- case ARM::VST2LNq32:
- FirstOpnd = 2;
- NumRegs = 2;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST2LNq16odd:
- case ARM::VST2LNq32odd:
- FirstOpnd = 2;
- NumRegs = 2;
- Offset = 1;
- Stride = 2;
- return true;
-
- case ARM::VST3LNd8:
- case ARM::VST3LNd16:
- case ARM::VST3LNd32:
- FirstOpnd = 2;
- NumRegs = 3;
- return true;
-
- case ARM::VST3LNq16:
- case ARM::VST3LNq32:
- FirstOpnd = 2;
- NumRegs = 3;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST3LNq16odd:
- case ARM::VST3LNq32odd:
- FirstOpnd = 2;
- NumRegs = 3;
- Offset = 1;
- Stride = 2;
- return true;
-
- case ARM::VST4LNd8:
- case ARM::VST4LNd16:
- case ARM::VST4LNd32:
- FirstOpnd = 2;
- NumRegs = 4;
- return true;
-
- case ARM::VST4LNq16:
- case ARM::VST4LNq32:
- FirstOpnd = 2;
- NumRegs = 4;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST4LNq16odd:
- case ARM::VST4LNq32odd:
- FirstOpnd = 2;
- NumRegs = 4;
- Offset = 1;
- Stride = 2;
- return true;
-
- case ARM::VTBL2:
- FirstOpnd = 1;
- NumRegs = 2;
- return true;
-
- case ARM::VTBL3:
- FirstOpnd = 1;
- NumRegs = 3;
- return true;
-
- case ARM::VTBL4:
- FirstOpnd = 1;
- NumRegs = 4;
- return true;
-
- case ARM::VTBX2:
- FirstOpnd = 2;
- NumRegs = 2;
- return true;
-
- case ARM::VTBX3:
- FirstOpnd = 2;
- NumRegs = 3;
- return true;
-
- case ARM::VTBX4:
- FirstOpnd = 2;
- NumRegs = 4;
- return true;
- }
-
- return false;
-}
-
-bool
-NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
- unsigned FirstOpnd, unsigned NumRegs,
- unsigned Offset, unsigned Stride) const {
- MachineOperand &FMO = MI->getOperand(FirstOpnd);
- assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand");
- unsigned VirtReg = FMO.getReg();
- (void)VirtReg;
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "expected a virtual register");
-
- unsigned LastSubIdx = 0;
- if (FMO.isDef()) {
- MachineInstr *RegSeq = 0;
- for (unsigned R = 0; R < NumRegs; ++R) {
- const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
- assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
- unsigned VirtReg = MO.getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "expected a virtual register");
- // Feeding into a REG_SEQUENCE.
- if (!MRI->hasOneNonDBGUse(VirtReg))
- return false;
- MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg);
- if (!UseMI->isRegSequence())
- return false;
- if (RegSeq && RegSeq != UseMI)
- return false;
- unsigned OpIdx = 1 + (Offset + R * Stride) * 2;
- if (UseMI->getOperand(OpIdx).getReg() != VirtReg)
- llvm_unreachable("Malformed REG_SEQUENCE instruction!");
- unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm();
- if (LastSubIdx) {
- if (LastSubIdx != SubIdx-Stride)
- return false;
- } else {
- // Must start from dsub_0 or qsub_0.
- if (SubIdx != (ARM::dsub_0+Offset) &&
- SubIdx != (ARM::qsub_0+Offset))
- return false;
- }
- RegSeq = UseMI;
- LastSubIdx = SubIdx;
- }
-
- // In the case of vld3, etc., make sure the trailing operand of
- // REG_SEQUENCE is an undef.
- if (NumRegs == 3) {
- unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2;
- const MachineOperand &MO = RegSeq->getOperand(OpIdx);
- unsigned VirtReg = MO.getReg();
- MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
- if (!DefMI || !DefMI->isImplicitDef())
- return false;
- }
- return true;
- }
-
- unsigned LastSrcReg = 0;
- SmallVector<unsigned, 4> SubIds;
- for (unsigned R = 0; R < NumRegs; ++R) {
- const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
- assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
- unsigned VirtReg = MO.getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "expected a virtual register");
- // Extracting from a Q or QQ register.
- MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
- if (!DefMI || !DefMI->isCopy() || !DefMI->getOperand(1).getSubReg())
- return false;
- VirtReg = DefMI->getOperand(1).getReg();
- if (LastSrcReg && LastSrcReg != VirtReg)
- return false;
- LastSrcReg = VirtReg;
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
- if (RC != ARM::QPRRegisterClass &&
- RC != ARM::QQPRRegisterClass &&
- RC != ARM::QQQQPRRegisterClass)
- return false;
- unsigned SubIdx = DefMI->getOperand(1).getSubReg();
- if (LastSubIdx) {
- if (LastSubIdx != SubIdx-Stride)
- return false;
- } else {
- // Must start from dsub_0 or qsub_0.
- if (SubIdx != (ARM::dsub_0+Offset) &&
- SubIdx != (ARM::qsub_0+Offset))
- return false;
- }
- SubIds.push_back(SubIdx);
- LastSubIdx = SubIdx;
- }
-
- // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
- // currently required for correctness. e.g.
- // %reg1041<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
- // %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
- // %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
- // VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,
- // reg1042 and reg1043 should be replaced with reg1041:6 and reg1041:5
- // respectively.
- // We need to change how we model uses of REG_SEQUENCE.
- for (unsigned R = 0; R < NumRegs; ++R) {
- MachineOperand &MO = MI->getOperand(FirstOpnd + R);
- unsigned OldReg = MO.getReg();
- MachineInstr *DefMI = MRI->getVRegDef(OldReg);
- assert(DefMI->isCopy());
- MO.setReg(LastSrcReg);
- MO.setSubReg(SubIds[R]);
- MO.setIsKill(false);
- // Delete the EXTRACT_SUBREG if its result is now dead.
- if (MRI->use_empty(OldReg))
- DefMI->eraseFromParent();
- }
-
- return true;
-}
-
-bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
- bool Modified = false;
-
- MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- for (; MBBI != E; ++MBBI) {
- MachineInstr *MI = &*MBBI;
- unsigned FirstOpnd, NumRegs, Offset, Stride;
- if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
- continue;
- if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
- continue;
-
- MachineBasicBlock::iterator NextI = llvm::next(MBBI);
- for (unsigned R = 0; R < NumRegs; ++R) {
- MachineOperand &MO = MI->getOperand(FirstOpnd + R);
- assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
- unsigned VirtReg = MO.getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "expected a virtual register");
-
- // For now, just assign a fixed set of adjacent registers.
- // This leaves plenty of room for future improvements.
- static const unsigned NEONDRegs[] = {
- ARM::D0, ARM::D1, ARM::D2, ARM::D3,
- ARM::D4, ARM::D5, ARM::D6, ARM::D7
- };
- MO.setReg(NEONDRegs[Offset + R * Stride]);
-
- if (MO.isUse()) {
- // Insert a copy from VirtReg.
- BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),MO.getReg())
- .addReg(VirtReg, getKillRegState(MO.isKill()));
- MO.setIsKill();
- } else if (MO.isDef() && !MO.isDead()) {
- // Add a copy to VirtReg.
- BuildMI(MBB, NextI, DebugLoc(), TII->get(TargetOpcode::COPY), VirtReg)
- .addReg(MO.getReg());
- }
- }
- }
-
- return Modified;
-}
-
-bool NEONPreAllocPass::runOnMachineFunction(MachineFunction &MF) {
- TII = MF.getTarget().getInstrInfo();
- MRI = &MF.getRegInfo();
-
- bool Modified = false;
- for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock &MBB = *MFI;
- Modified |= PreAllocNEONRegisters(MBB);
- }
-
- return Modified;
-}
-
-/// createNEONPreAllocPass - returns an instance of the NEON register
-/// pre-allocation pass.
-FunctionPass *llvm::createNEONPreAllocPass() {
- return new NEONPreAllocPass();
-}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
new file mode 100644
index 0000000..233e165
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -0,0 +1,352 @@
+//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb1 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1FrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+ return false;
+
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static void emitSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, DebugLoc dl,
+ const Thumb1RegisterInfo &MRI,
+ int NumBytes) {
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI, dl);
+}
+
+void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned BasePtr = RegInfo->getBaseRegister();
+
+ // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+ NumBytes = (NumBytes + 3) & ~3;
+ MFI->setStackSize(NumBytes);
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ if (VARegSaveSize)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetDarwin()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+ ++MBBI;
+ if (MBBI != MBB.end())
+ dl = MBBI->getDebugLoc();
+ }
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+ NumBytes = DPRCSOffset;
+
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (hasFP(MF)) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0);
+ if (NumBytes > 7)
+ // If offset is > 7 then sp cannot be adjusted in a single instruction,
+ // try restoring from fp instead.
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ if (NumBytes)
+ // Insert it after all the callee-save spills.
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes);
+
+ if (STI.isTargetELF() && hasFP(MF))
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (RegInfo->hasBasePointer(MF))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp. We can assume there's an FP here since hasFP already
+ // checks for hasVarSizedObjects.
+ if (MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+ if (MI->getOpcode() == ARM::tRestore &&
+ MI->getOperand(1).isFI() &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+ return true;
+ else if (MI->getOpcode() == ARM::tPOP) {
+ // The first two operands are predicates. The last two are
+ // imp-def and imp-use of SP. Check everything in between.
+ for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
+ if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert((MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET) &&
+ "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Thumb1RegisterInfo *RegInfo =
+ static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const Thumb1InstrInfo &TII =
+ *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+ } else {
+ // Unwind MBBI to point to first LDR / VLDRD.
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+ if (!isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+
+ if (AFI->shouldRestoreSPFromFP()) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot, the target is ELF and the function has FP, or
+ // the target uses var sized objects.
+ if (NumBytes) {
+ assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+ "No scratch register to restore SP from FP!");
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes,
+ TII, *RegInfo, dl);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+ .addReg(ARM::R4);
+ } else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+ .addReg(FramePtr);
+ } else {
+ if (MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI &&
+ prior(MBBI)->getOpcode() == ARM::tPOP) {
+ MachineBasicBlock::iterator PMBBI = prior(MBBI);
+ emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+ } else
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+ }
+ }
+
+ if (VARegSaveSize) {
+ // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+ // to LR, and we can't pop the value directly to the PC since
+ // we need to update the SP after popping the value. Therefore, we
+ // pop the old LR into R3 as a temporary.
+
+ // Move back past the callee-saved register restoration
+ while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ // Epilogue for vararg functions: pop LR to R3 and branch off it.
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(ARM::R3, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+ .addReg(ARM::R3, RegState::Kill);
+ // erase the old tBX_RET instruction
+ MBB.erase(MBBI);
+ }
+}
+
+bool Thumb1FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
+ AddDefaultPred(MIB);
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ bool isKill = true;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+ // then it's already added to the function and entry block live-in sets.
+ if (Reg == ARM::LR) {
+ MachineFunction &MF = *MBB.getParent();
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ MIB.addReg(Reg, getKillRegState(isKill));
+ }
+ return true;
+}
+
+bool Thumb1FrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ DebugLoc DL = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
+ AddDefaultPred(MIB);
+
+ bool NumRegs = false;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (Reg == ARM::LR) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ MI = MBB.erase(MI);
+ }
+ MIB.addReg(Reg, getDefRegState(true));
+ NumRegs = true;
+ }
+
+ // It's illegal to emit pop instruction without operands.
+ if (NumRegs)
+ MBB.insert(MI, &*MIB);
+ else
+ MF.DeleteMachineInstr(MIB);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
new file mode 100644
index 0000000..c592e12
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -0,0 +1,52 @@
+//===-- Thumb1FrameLowering.h - Thumb1-specific frame info stuff --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __THUMB_FRAMEINFO_H_
+#define __THUMM_FRAMEINFO_H_
+
+#include "ARM.h"
+#include "ARMFrameLowering.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+class Thumb1FrameLowering : public ARMFrameLowering {
+public:
+ explicit Thumb1FrameLowering(const ARMSubtarget &sti)
+ : ARMFrameLowering(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index af630ac..3fbb433 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -71,8 +71,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- MachineMemOperand::MOStore, 0,
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill))
@@ -99,85 +100,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- MachineMemOperand::MOLoad, 0,
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
}
}
-
-bool Thumb1InstrInfo::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
- AddDefaultPred(MIB);
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
- bool isKill = true;
-
- // Add the callee-saved register as live-in unless it's LR and
- // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
- // then it's already added to the function and entry block live-in sets.
- if (Reg == ARM::LR) {
- MachineFunction &MF = *MBB.getParent();
- if (MF.getFrameInfo()->isReturnAddressTaken() &&
- MF.getRegInfo().isLiveIn(Reg))
- isKill = false;
- }
-
- if (isKill)
- MBB.addLiveIn(Reg);
-
- MIB.addReg(Reg, getKillRegState(isKill));
- }
- return true;
-}
-
-bool Thumb1InstrInfo::
-restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- MachineFunction &MF = *MBB.getParent();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (CSI.empty())
- return false;
-
- bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
- DebugLoc DL = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::tPOP));
- AddDefaultPred(MIB);
-
- bool NumRegs = false;
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
- if (Reg == ARM::LR) {
- // Special epilogue for vararg functions. See emitEpilogue
- if (isVarArg)
- continue;
- Reg = ARM::PC;
- (*MIB).setDesc(get(ARM::tPOP_RET));
- MI = MBB.erase(MI);
- }
- MIB.addReg(Reg, getDefRegState(true));
- NumRegs = true;
- }
-
- // It's illegal to emit pop instruction without operands.
- if (NumRegs)
- MBB.insert(MI, &*MIB);
- else
- MF.DeleteMachineInstr(MIB);
-
- return true;
-}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
index 555135a..17ef2f7 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h
@@ -37,28 +37,19 @@ public:
///
const Thumb1RegisterInfo &getRegisterInfo() const { return RI; }
- bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
index a21a3da..f62a13e 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -29,7 +29,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -63,24 +63,11 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRcp))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
.addReg(DestReg, getDefRegState(true), SubIdx)
.addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
}
-bool Thumb1RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
- const MachineFrameInfo *FFI = MF.getFrameInfo();
- unsigned CFSize = FFI->getMaxCallFrameSize();
- // It's not always a good idea to include the call frame as part of the
- // stack frame. ARM (especially Thumb) has small immediate offset to
- // address the stack frame. So a large call frame can cause poor codegen
- // and may even makes it impossible to scavenge a register.
- if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
- return false;
-
- return !MF.getFrameInfo()->hasVarSizedObjects();
-}
-
/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
@@ -92,7 +79,7 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned BaseReg,
int NumBytes, bool CanChangeCC,
const TargetInstrInfo &TII,
- const Thumb1RegisterInfo& MRI,
+ const ARMBaseRegisterInfo& MRI,
DebugLoc dl) {
MachineFunction &MF = *MBB.getParent();
bool isHigh = !isARMLowRegister(DestReg) ||
@@ -162,13 +149,12 @@ static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
/// a destreg = basereg + immediate in Thumb code.
-static
-void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- unsigned DestReg, unsigned BaseReg,
- int NumBytes, const TargetInstrInfo &TII,
- const Thumb1RegisterInfo& MRI,
- DebugLoc dl) {
+void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMBaseRegisterInfo& MRI,
+ DebugLoc dl) {
bool isSub = NumBytes < 0;
unsigned Bytes = (unsigned)NumBytes;
if (isSub) Bytes = -NumBytes;
@@ -304,7 +290,9 @@ static void emitSPUpdate(MachineBasicBlock &MBB,
void Thumb1RegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (!hasReservedCallFrame(MF)) {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
// ADJCALLSTACKUP -> add, sp, sp, amount
@@ -315,7 +303,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Align = TFI->getStackAlignment();
Amount = (Amount+Align-1)/Align*Align;
// Replace the pseudo instruction with a new instruction...
@@ -363,6 +351,22 @@ static void removeOperands(MachineInstr &MI, unsigned i) {
MI.RemoveOperand(Op);
}
+/// convertToNonSPOpcode - Change the opcode to the non-SP version, because
+/// we're replacing the frame index with a non-SP register.
+static unsigned convertToNonSPOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::tLDRspi:
+ case ARM::tRestore: // FIXME: Should this opcode be here?
+ return ARM::tLDRi;
+
+ case ARM::tSTRspi:
+ case ARM::tSpill: // FIXME: Should this opcode be here?
+ return ARM::tSTRi;
+ }
+
+ return Opcode;
+}
+
bool Thumb1RegisterInfo::
rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
@@ -464,55 +468,51 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
}
return true;
} else {
- unsigned ImmIdx = 0;
- int InstrOffs = 0;
- unsigned NumBits = 0;
- unsigned Scale = 1;
- switch (AddrMode) {
- case ARMII::AddrModeT1_s: {
- ImmIdx = FrameRegIdx+1;
- InstrOffs = MI.getOperand(ImmIdx).getImm();
- NumBits = (FrameReg == ARM::SP) ? 8 : 5;
- Scale = 4;
- break;
- }
- default:
+ if (AddrMode != ARMII::AddrModeT1_s)
llvm_unreachable("Unsupported addressing mode!");
- break;
- }
+
+ unsigned ImmIdx = FrameRegIdx + 1;
+ int InstrOffs = MI.getOperand(ImmIdx).getImm();
+ unsigned NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+ unsigned Scale = 4;
Offset += InstrOffs * Scale;
- assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ assert((Offset & (Scale - 1)) == 0 && "Can't encode this offset!");
// Common case: small offset, fits into instruction.
MachineOperand &ImmOp = MI.getOperand(ImmIdx);
int ImmedOffset = Offset / Scale;
unsigned Mask = (1 << NumBits) - 1;
+
if ((unsigned)Offset <= Mask * Scale) {
- // Replace the FrameIndex with sp
+ // Replace the FrameIndex with the frame register (e.g., sp).
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
ImmOp.ChangeToImmediate(ImmedOffset);
+
+ // If we're using a register where sp was stored, convert the instruction
+ // to the non-SP version.
+ unsigned NewOpc = convertToNonSPOpcode(Opcode);
+ if (NewOpc != Opcode && FrameReg != ARM::SP)
+ MI.setDesc(TII.get(NewOpc));
+
return true;
}
- bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
- if (AddrMode == ARMII::AddrModeT1_s) {
- // Thumb tLDRspi, tSTRspi. These will change to instructions that use
- // a different base register.
- NumBits = 5;
- Mask = (1 << NumBits) - 1;
- }
+ NumBits = 5;
+ Mask = (1 << NumBits) - 1;
+
// If this is a thumb spill / restore, we will be using a constpool load to
// materialize the offset.
- if (AddrMode == ARMII::AddrModeT1_s && isThumSpillRestore)
+ if (Opcode == ARM::tRestore || Opcode == ARM::tSpill) {
ImmOp.ChangeToImmediate(0);
- else {
+ } else {
// Otherwise, it didn't fit. Pull in what we can to simplify the immed.
ImmedOffset = ImmedOffset & Mask;
ImmOp.ChangeToImmediate(ImmedOffset);
- Offset &= ~(Mask*Scale);
+ Offset &= ~(Mask * Scale);
}
}
+
return Offset == 0;
}
@@ -602,7 +602,8 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
Offset -= AFI->getGPRCalleeSavedArea2Offset();
else if (MF.getFrameInfo()->hasVarSizedObjects()) {
- assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
+ assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
+ "Unexpected");
// There are alloca()'s in this function, must reference off the frame
// pointer or base pointer instead.
if (!hasBasePointer(MF)) {
@@ -655,13 +656,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
*this, dl);
}
- MI.setDesc(TII.get(ARM::tLDR));
+ MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
if (UseRR)
- // Use [reg, reg] addrmode.
- MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
- else // tLDR has an extra register operand.
- MI.addOperand(MachineOperand::CreateReg(0, false));
+ // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+ // register. The offset is already handled in the vreg value.
+ MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
} else if (Desc.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
bool UseRR = false;
@@ -677,14 +677,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else
emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII,
*this, dl);
- MI.setDesc(TII.get(ARM::tSTR));
+ MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
- if (UseRR) // Use [reg, reg] addrmode.
- MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
- else // tSTR has an extra register operand.
- MI.addOperand(MachineOperand::CreateReg(0, false));
- } else
+ if (UseRR)
+ // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+ // register. The offset is already handled in the vreg value.
+ MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+ } else {
assert(false && "Unexpected opcode!");
+ }
// Add predicate back if it's needed.
if (MI.getDesc().isPredicable()) {
@@ -692,206 +693,3 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
AddDefaultPred(MIB);
}
}
-
-void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
- unsigned NumBytes = MFI->getStackSize();
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
- NumBytes = (NumBytes + 3) & ~3;
- MFI->setStackSize(NumBytes);
-
- // Determine the sizes of each callee-save spill areas and record which frame
- // belongs to which callee-save spill areas.
- unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
- int FramePtrSpillFI = 0;
-
- if (VARegSaveSize)
- emitSPUpdate(MBB, MBBI, TII, dl, *this, -VARegSaveSize);
-
- if (!AFI->hasStackFrame()) {
- if (NumBytes != 0)
- emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
- return;
- }
-
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- int FI = CSI[i].getFrameIdx();
- switch (Reg) {
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
- AFI->addGPRCalleeSavedArea1Frame(FI);
- GPRCS1Size += 4;
- break;
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
- if (STI.isTargetDarwin()) {
- AFI->addGPRCalleeSavedArea2Frame(FI);
- GPRCS2Size += 4;
- } else {
- AFI->addGPRCalleeSavedArea1Frame(FI);
- GPRCS1Size += 4;
- }
- break;
- default:
- AFI->addDPRCalleeSavedAreaFrame(FI);
- DPRCSSize += 8;
- }
- }
-
- if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
- ++MBBI;
- if (MBBI != MBB.end())
- dl = MBBI->getDebugLoc();
- }
-
- // Adjust FP so it point to the stack slot that contains the previous FP.
- if (hasFP(MF)) {
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addFrameIndex(FramePtrSpillFI).addImm(0);
- AFI->setShouldRestoreSPFromFP(true);
- }
-
- // Determine starting offsets of spill areas.
- unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
- unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
- unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
- AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
- AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
- AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
-
- NumBytes = DPRCSOffset;
- if (NumBytes) {
- // Insert it after all the callee-save spills.
- emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
- }
-
- if (STI.isTargetELF() && hasFP(MF))
- MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
- AFI->getFramePtrSpillOffset());
-
- AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
- AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
- AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
-
- // If we need a base pointer, set it up here. It's whatever the value
- // of the stack pointer is at this point. Any variable size objects
- // will be allocated after this, so we can still use the base pointer
- // to reference locals.
- if (hasBasePointer(MF))
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
-}
-
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
- for (unsigned i = 0; CSRegs[i]; ++i)
- if (Reg == CSRegs[i])
- return true;
- return false;
-}
-
-static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
- if (MI->getOpcode() == ARM::tRestore &&
- MI->getOperand(1).isFI() &&
- isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
- return true;
- else if (MI->getOpcode() == ARM::tPOP) {
- // The first two operands are predicates. The last two are
- // imp-def and imp-use of SP. Check everything in between.
- for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
- if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
- return false;
- return true;
- }
- return false;
-}
-
-void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- assert((MBBI->getOpcode() == ARM::tBX_RET ||
- MBBI->getOpcode() == ARM::tPOP_RET) &&
- "Can only insert epilog into returning blocks");
- DebugLoc dl = MBBI->getDebugLoc();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
- int NumBytes = (int)MFI->getStackSize();
- const unsigned *CSRegs = getCalleeSavedRegs();
-
- if (!AFI->hasStackFrame()) {
- if (NumBytes != 0)
- emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
- } else {
- // Unwind MBBI to point to first LDR / VLDRD.
- if (MBBI != MBB.begin()) {
- do
- --MBBI;
- while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
- if (!isCSRestore(MBBI, CSRegs))
- ++MBBI;
- }
-
- // Move SP to start of FP callee save spill area.
- NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
- AFI->getGPRCalleeSavedArea2Size() +
- AFI->getDPRCalleeSavedAreaSize());
-
- if (AFI->shouldRestoreSPFromFP()) {
- NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
- // Reset SP based on frame pointer only if the stack frame extends beyond
- // frame pointer stack slot or target is ELF and the function has FP.
- if (NumBytes)
- emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
- TII, *this, dl);
- else
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
- .addReg(FramePtr);
- } else {
- if (MBBI->getOpcode() == ARM::tBX_RET &&
- &MBB.front() != MBBI &&
- prior(MBBI)->getOpcode() == ARM::tPOP) {
- MachineBasicBlock::iterator PMBBI = prior(MBBI);
- emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes);
- } else
- emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
- }
- }
-
- if (VARegSaveSize) {
- // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
- // to LR, and we can't pop the value directly to the PC since
- // we need to update the SP after popping the value. Therefore, we
- // pop the old LR into R3 as a temporary.
-
- // Move back past the callee-saved register restoration
- while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
- ++MBBI;
- // Epilogue for vararg functions: pop LR to R3 and branch off it.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
- .addReg(ARM::R3, RegState::Define);
-
- emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize);
-
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
- .addReg(ARM::R3, RegState::Kill);
- // erase the old tBX_RET instruction
- MBB.erase(MBBI);
- }
-}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
index c578054..8a87cc5 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -38,8 +38,6 @@ public:
unsigned PredReg = 0) const;
/// Code Generation virtual methods...
- bool hasReservedCallFrame(const MachineFunction &MF) const;
-
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
@@ -59,9 +57,6 @@ public:
unsigned Reg) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
-
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
};
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2HazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/Thumb2HazardRecognizer.cpp
deleted file mode 100644
index 172908d..0000000
--- a/contrib/llvm/lib/Target/ARM/Thumb2HazardRecognizer.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-//===-- Thumb2HazardRecognizer.cpp - Thumb2 postra hazard recognizer ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM.h"
-#include "Thumb2HazardRecognizer.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-using namespace llvm;
-
-ScheduleHazardRecognizer::HazardType
-Thumb2HazardRecognizer::getHazardType(SUnit *SU) {
- if (ITBlockSize) {
- MachineInstr *MI = SU->getInstr();
- if (!MI->isDebugValue() && MI != ITBlockMIs[ITBlockSize-1])
- return Hazard;
- }
-
- return PostRAHazardRecognizer::getHazardType(SU);
-}
-
-void Thumb2HazardRecognizer::Reset() {
- ITBlockSize = 0;
- PostRAHazardRecognizer::Reset();
-}
-
-void Thumb2HazardRecognizer::EmitInstruction(SUnit *SU) {
- MachineInstr *MI = SU->getInstr();
- unsigned Opcode = MI->getOpcode();
- if (ITBlockSize) {
- --ITBlockSize;
- } else if (Opcode == ARM::t2IT) {
- unsigned Mask = MI->getOperand(1).getImm();
- unsigned NumTZ = CountTrailingZeros_32(Mask);
- assert(NumTZ <= 3 && "Invalid IT mask!");
- ITBlockSize = 4 - NumTZ;
- MachineBasicBlock::iterator I = MI;
- for (unsigned i = 0; i < ITBlockSize; ++i) {
- // Advance to the next instruction, skipping any dbg_value instructions.
- do {
- ++I;
- } while (I->isDebugValue());
- ITBlockMIs[ITBlockSize-1-i] = &*I;
- }
- }
-
- PostRAHazardRecognizer::EmitInstruction(SU);
-}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2HazardRecognizer.h b/contrib/llvm/lib/Target/ARM/Thumb2HazardRecognizer.h
deleted file mode 100644
index 4726658..0000000
--- a/contrib/llvm/lib/Target/ARM/Thumb2HazardRecognizer.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- Thumb2HazardRecognizer.h - Thumb2 Hazard Recognizers ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines hazard recognizers for scheduling Thumb2 functions on
-// ARM processors.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef THUMB2HAZARDRECOGNIZER_H
-#define THUMB2HAZARDRECOGNIZER_H
-
-#include "llvm/CodeGen/PostRAHazardRecognizer.h"
-
-namespace llvm {
-
-class MachineInstr;
-
-class Thumb2HazardRecognizer : public PostRAHazardRecognizer {
- unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
- MachineInstr *ITBlockMIs[4];
-
-public:
- Thumb2HazardRecognizer(const InstrItineraryData &ItinData) :
- PostRAHazardRecognizer(ItinData) {}
-
- virtual HazardType getHazardType(SUnit *SU);
- virtual void Reset();
- virtual void EmitInstruction(SUnit *SU);
-};
-
-
-} // end namespace llvm
-
-#endif // THUMB2HAZARDRECOGNIZER_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 442f41d..2f67257 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -17,7 +17,6 @@
#include "ARMAddressingModes.h"
#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
-#include "Thumb2HazardRecognizer.h"
#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,15 +27,10 @@
using namespace llvm;
-static cl::opt<unsigned>
-IfCvtLimit("thumb2-ifcvt-limit", cl::Hidden,
- cl::desc("Thumb2 if-conversion limit (default 3)"),
- cl::init(3));
-
-static cl::opt<unsigned>
-IfCvtDiamondLimit("thumb2-ifcvt-diamond-limit", cl::Hidden,
- cl::desc("Thumb2 diamond if-conversion limit (default 3)"),
- cl::init(3));
+static cl::opt<bool>
+OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
+ cl::desc("Use old-style Thumb2 if-conversion heuristics"),
+ cl::init(false));
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI(*this, STI) {
@@ -105,21 +99,6 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
}
-bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumInstrs) const {
- return NumInstrs && NumInstrs <= IfCvtLimit;
-}
-
-bool Thumb2InstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
- MachineBasicBlock &FMBB, unsigned NumF) const {
- // FIXME: Catch optimization such as:
- // r0 = movne
- // r0 = moveq
- return NumT && NumF &&
- NumT <= (IfCvtDiamondLimit) && NumF <= (IfCvtDiamondLimit);
-}
-
void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -155,8 +134,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- MachineMemOperand::MOStore, 0,
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
@@ -181,8 +161,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- MachineMemOperand::MOLoad, 0,
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
@@ -193,11 +174,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
-ScheduleHazardRecognizer *Thumb2InstrInfo::
-CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
- return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II);
-}
-
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index 3a9f8b1..f2637d7 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -38,11 +38,6 @@ public:
bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const;
- bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const;
-
- bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs,
- MachineBasicBlock &FMBB, unsigned NumFInstrs) const;
-
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -70,9 +65,6 @@ public:
/// always be able to get register info as well (through this method).
///
const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
-
- ScheduleHazardRecognizer *
- CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const;
};
/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 07dd0be..099b8f7 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -29,7 +29,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index 0c3962d..cc8f61c 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -58,7 +58,7 @@ namespace {
{ ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 },
{ ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 },
// Note: immediate scale is 4.
- { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0 },
+ { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 1 },
{ ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 1 },
{ ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 1 },
{ ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 },
@@ -68,9 +68,7 @@ namespace {
//FIXME: Disable CMN, as CCodes are backwards from compare expectations
//{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0 },
{ ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0 },
- { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0 },
- { ARM::t2CMPzri,ARM::tCMPzi8, 0, 8, 0, 1, 0, 2,0, 0 },
- { ARM::t2CMPzrr,ARM::tCMPzhir,0, 0, 0, 0, 0, 2,0, 0 },
+ { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 1 },
{ ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 },
// FIXME: adr.n immediate offset must be multiple of 4.
//{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 },
@@ -106,26 +104,27 @@ namespace {
// FIXME: Clean this up after splitting each Thumb load / store opcode
// into multiple ones.
- { ARM::t2LDRi12,ARM::tLDR, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 },
- { ARM::t2LDRs, ARM::tLDR, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2LDRBi12,ARM::tLDRB, 0, 5, 0, 1, 0, 0,0, 1 },
- { ARM::t2LDRBs, ARM::tLDRB, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2LDRHi12,ARM::tLDRH, 0, 5, 0, 1, 0, 0,0, 1 },
- { ARM::t2LDRHs, ARM::tLDRH, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 },
+ { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 1 },
+ { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 1 },
+ { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 1 },
{ ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 },
{ ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2STRi12,ARM::tSTR, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 },
- { ARM::t2STRs, ARM::tSTR, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2STRBi12,ARM::tSTRB, 0, 5, 0, 1, 0, 0,0, 1 },
- { ARM::t2STRBs, ARM::tSTRB, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2STRHi12,ARM::tSTRH, 0, 5, 0, 1, 0, 0,0, 1 },
- { ARM::t2STRHs, ARM::tSTRH, 0, 0, 0, 1, 0, 0,0, 1 },
-
- { ARM::t2LDM, ARM::tLDM, 0, 0, 0, 1, 1, 1,1, 1 },
- { ARM::t2LDM_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 },
- { ARM::t2LDM_UPD,ARM::tLDM_UPD,ARM::tPOP, 0, 0, 1, 1, 1,1, 1 },
+ { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 },
+ { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 1 },
+ { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 1 },
+ { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 1 },
+ { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 1 },
+
+ { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 1 },
+ { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 },
+ { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 1 },
// ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
- { ARM::t2STM_UPD,ARM::tSTM_UPD,ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 },
+ { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 1 },
+ { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 },
};
class Thumb2SizeReduce : public MachineFunctionPass {
@@ -217,8 +216,8 @@ Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
/// Old opcode has an optional def of CPSR.
if (HasCC)
return true;
- // If both old opcode does not implicit CPSR def, then it's not ok since
- // these new opcodes CPSR def is not meant to be thrown away. e.g. CMP.
+ // If old opcode does not implicitly define CPSR, then it's not ok since
+ // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
if (!HasImplicitCPSRDef(MI->getDesc()))
return false;
HasCC = true;
@@ -233,9 +232,10 @@ Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
static bool VerifyLowRegs(MachineInstr *MI) {
unsigned Opc = MI->getOpcode();
- bool isPCOk = (Opc == ARM::t2LDM_RET || Opc == ARM::t2LDM ||
- Opc == ARM::t2LDM_UPD);
- bool isLROk = (Opc == ARM::t2STM_UPD);
+ bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA ||
+ Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD ||
+ Opc == ARM::t2LDMDB_UPD);
+ bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
@@ -275,29 +275,32 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
unsigned Opc = Entry.NarrowOpc1;
unsigned OpNum = 3; // First 'rest' of operands.
uint8_t ImmLimit = Entry.Imm1Limit;
+
switch (Entry.WideOpc) {
default:
llvm_unreachable("Unexpected Thumb2 load / store opcode!");
case ARM::t2LDRi12:
- case ARM::t2STRi12: {
- unsigned BaseReg = MI->getOperand(1).getReg();
- if (BaseReg == ARM::SP) {
+ case ARM::t2STRi12:
+ if (MI->getOperand(1).getReg() == ARM::SP) {
Opc = Entry.NarrowOpc2;
ImmLimit = Entry.Imm2Limit;
HasOffReg = false;
}
+
Scale = 4;
HasImmOffset = true;
+ HasOffReg = false;
break;
- }
case ARM::t2LDRBi12:
case ARM::t2STRBi12:
HasImmOffset = true;
+ HasOffReg = false;
break;
case ARM::t2LDRHi12:
case ARM::t2STRHi12:
Scale = 2;
HasImmOffset = true;
+ HasOffReg = false;
break;
case ARM::t2LDRs:
case ARM::t2LDRBs:
@@ -310,11 +313,12 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
HasShift = true;
OpNum = 4;
break;
- case ARM::t2LDM: {
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB: {
unsigned BaseReg = MI->getOperand(0).getReg();
- ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
- if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia)
+ if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
return false;
+
// For the non-writeback version (this one), the base register must be
// one of the registers being loaded.
bool isOK = false;
@@ -324,6 +328,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
break;
}
}
+
if (!isOK)
return false;
@@ -331,28 +336,33 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
isLdStMul = true;
break;
}
- case ARM::t2LDM_RET: {
+ case ARM::t2LDMIA_RET: {
unsigned BaseReg = MI->getOperand(1).getReg();
if (BaseReg != ARM::SP)
return false;
Opc = Entry.NarrowOpc2; // tPOP_RET
- OpNum = 3;
+ OpNum = 2;
isLdStMul = true;
break;
}
- case ARM::t2LDM_UPD:
- case ARM::t2STM_UPD: {
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
OpNum = 0;
+
unsigned BaseReg = MI->getOperand(1).getReg();
- ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(2).getImm());
if (BaseReg == ARM::SP &&
- ((Entry.WideOpc == ARM::t2LDM_UPD && Mode == ARM_AM::ia) ||
- (Entry.WideOpc == ARM::t2STM_UPD && Mode == ARM_AM::db))) {
+ (Entry.WideOpc == ARM::t2LDMIA_UPD ||
+ Entry.WideOpc == ARM::t2STMDB_UPD)) {
Opc = Entry.NarrowOpc2; // tPOP or tPUSH
- OpNum = 3;
- } else if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia) {
+ OpNum = 2;
+ } else if (!isARMLowRegister(BaseReg) ||
+ (Entry.WideOpc != ARM::t2LDMIA_UPD &&
+ Entry.WideOpc != ARM::t2STMIA_UPD)) {
return false;
}
+
isLdStMul = true;
break;
}
@@ -363,6 +373,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
if (HasShift) {
OffsetReg = MI->getOperand(2).getReg();
OffsetKill = MI->getOperand(2).isKill();
+
if (MI->getOperand(3).getImm())
// Thumb1 addressing mode doesn't support shift.
return false;
@@ -372,23 +383,22 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
if (HasImmOffset) {
OffsetImm = MI->getOperand(2).getImm();
unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
- if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset)
+
+ if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
// Make sure the immediate field fits.
return false;
}
// Add the 16-bit load / store instruction.
- // FIXME: Thumb1 addressing mode encode both immediate and register offset.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
if (!isLdStMul) {
- MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1));
- if (Opc != ARM::tLDRSB && Opc != ARM::tLDRSH) {
- // tLDRSB and tLDRSH do not have an immediate offset field. On the other
- // hand, it must have an offset register.
- // FIXME: Remove this special case.
- MIB.addImm(OffsetImm/Scale);
- }
+ MIB.addOperand(MI->getOperand(0));
+ MIB.addOperand(MI->getOperand(1));
+
+ if (HasImmOffset)
+ MIB.addImm(OffsetImm / Scale);
+
assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
if (HasOffReg)
@@ -423,7 +433,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
unsigned Opc = MI->getOpcode();
switch (Opc) {
default: break;
- case ARM::t2ADDSri:
+ case ARM::t2ADDSri:
case ARM::t2ADDSrr: {
unsigned PredReg = 0;
if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
@@ -451,6 +461,25 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
if (MI->getOperand(1).isImm())
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
break;
+ case ARM::t2CMPrr: {
+ // Try to reduce to the lo-reg only version first. Why there are two
+ // versions of the instruction is a mystery.
+ // It would be nice to just have two entries in the master table that
+ // are prioritized, but the table assumes a unique entry for each
+ // source insn opcode. So for now, we hack a local entry record to use.
+ static const ReduceEntry NarrowEntry =
+ { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 1 };
+ if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR))
+ return true;
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ }
+ case ARM::t2ADDrSPi: {
+ static const ReduceEntry NarrowEntry =
+ { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 1 };
+ if (MI->getOperand(0).getReg() == ARM::SP)
+ return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ }
}
return false;
}
diff --git a/contrib/llvm/lib/Target/Alpha/Alpha.h b/contrib/llvm/lib/Target/Alpha/Alpha.h
index 5cf4866..2c359da 100644
--- a/contrib/llvm/lib/Target/Alpha/Alpha.h
+++ b/contrib/llvm/lib/Target/Alpha/Alpha.h
@@ -18,6 +18,13 @@
#include "llvm/Target/TargetMachine.h"
namespace llvm {
+ namespace Alpha {
+ // These describe LDAx
+
+ static const int IMM_LOW = -32768;
+ static const int IMM_HIGH = 32767;
+ static const int IMM_MULT = 65536;
+ }
class AlphaTargetMachine;
class FunctionPass;
diff --git a/contrib/llvm/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp
index 5428cb9..46ae286 100644
--- a/contrib/llvm/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Alpha/AlphaAsmPrinter.cpp
@@ -91,7 +91,7 @@ void AlphaAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
return;
case MachineOperand::MO_Immediate:
- llvm_unreachable("printOp() does not handle immediate values");
+ assert(0 && "printOp() does not handle immediate values");
return;
case MachineOperand::MO_MachineBasicBlock:
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaCodeEmitter.cpp b/contrib/llvm/lib/Target/Alpha/AlphaCodeEmitter.cpp
deleted file mode 100644
index 3aec070..0000000
--- a/contrib/llvm/lib/Target/Alpha/AlphaCodeEmitter.cpp
+++ /dev/null
@@ -1,222 +0,0 @@
-//===-- Alpha/AlphaCodeEmitter.cpp - Convert Alpha code to machine code ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the pass that transforms the Alpha machine instructions
-// into relocatable machine code.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "alpha-emitter"
-#include "AlphaTargetMachine.h"
-#include "AlphaRelocations.h"
-#include "Alpha.h"
-#include "llvm/PassManager.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
- class AlphaCodeEmitter : public MachineFunctionPass {
- JITCodeEmitter &MCE;
- const AlphaInstrInfo *II;
- public:
- static char ID;
-
- AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(ID),
- MCE(mce) {}
-
- /// getBinaryCodeForInstr - This function, generated by the
- /// CodeEmitterGenerator using TableGen, produces the binary encoding for
- /// machine instructions.
-
- unsigned getBinaryCodeForInstr(const MachineInstr &MI);
-
- /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
-
- unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO);
-
- bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {
- return "Alpha Machine Code Emitter";
- }
-
- private:
- void emitBasicBlock(MachineBasicBlock &MBB);
- };
-}
-
-char AlphaCodeEmitter::ID = 0;
-
-
-/// createAlphaCodeEmitterPass - Return a pass that emits the collected Alpha
-/// code to the specified MCE object.
-
-FunctionPass *llvm::createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
- JITCodeEmitter &JCE) {
- return new AlphaCodeEmitter(JCE);
-}
-
-bool AlphaCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
- II = ((AlphaTargetMachine&)MF.getTarget()).getInstrInfo();
-
- do {
- MCE.startFunction(MF);
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- emitBasicBlock(*I);
- } while (MCE.finishFunction(MF));
-
- return false;
-}
-
-void AlphaCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
- MCE.StartMachineBasicBlock(&MBB);
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- const MachineInstr &MI = *I;
- MCE.processDebugLoc(MI.getDebugLoc(), true);
- switch(MI.getOpcode()) {
- default:
- MCE.emitWordLE(getBinaryCodeForInstr(*I));
- break;
- case Alpha::ALTENT:
- case Alpha::PCLABEL:
- case Alpha::MEMLABEL:
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- break; //skip these
- }
- MCE.processDebugLoc(MI.getDebugLoc(), false);
- }
-}
-
-static unsigned getAlphaRegNumber(unsigned Reg) {
- switch (Reg) {
- case Alpha::R0 : case Alpha::F0 : return 0;
- case Alpha::R1 : case Alpha::F1 : return 1;
- case Alpha::R2 : case Alpha::F2 : return 2;
- case Alpha::R3 : case Alpha::F3 : return 3;
- case Alpha::R4 : case Alpha::F4 : return 4;
- case Alpha::R5 : case Alpha::F5 : return 5;
- case Alpha::R6 : case Alpha::F6 : return 6;
- case Alpha::R7 : case Alpha::F7 : return 7;
- case Alpha::R8 : case Alpha::F8 : return 8;
- case Alpha::R9 : case Alpha::F9 : return 9;
- case Alpha::R10 : case Alpha::F10 : return 10;
- case Alpha::R11 : case Alpha::F11 : return 11;
- case Alpha::R12 : case Alpha::F12 : return 12;
- case Alpha::R13 : case Alpha::F13 : return 13;
- case Alpha::R14 : case Alpha::F14 : return 14;
- case Alpha::R15 : case Alpha::F15 : return 15;
- case Alpha::R16 : case Alpha::F16 : return 16;
- case Alpha::R17 : case Alpha::F17 : return 17;
- case Alpha::R18 : case Alpha::F18 : return 18;
- case Alpha::R19 : case Alpha::F19 : return 19;
- case Alpha::R20 : case Alpha::F20 : return 20;
- case Alpha::R21 : case Alpha::F21 : return 21;
- case Alpha::R22 : case Alpha::F22 : return 22;
- case Alpha::R23 : case Alpha::F23 : return 23;
- case Alpha::R24 : case Alpha::F24 : return 24;
- case Alpha::R25 : case Alpha::F25 : return 25;
- case Alpha::R26 : case Alpha::F26 : return 26;
- case Alpha::R27 : case Alpha::F27 : return 27;
- case Alpha::R28 : case Alpha::F28 : return 28;
- case Alpha::R29 : case Alpha::F29 : return 29;
- case Alpha::R30 : case Alpha::F30 : return 30;
- case Alpha::R31 : case Alpha::F31 : return 31;
- default:
- llvm_unreachable("Unhandled reg");
- }
-}
-
-unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) {
-
- unsigned rv = 0; // Return value; defaults to 0 for unhandled cases
- // or things that get fixed up later by the JIT.
-
- if (MO.isReg()) {
- rv = getAlphaRegNumber(MO.getReg());
- } else if (MO.isImm()) {
- rv = MO.getImm();
- } else if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) {
- DEBUG(errs() << MO << " is a relocated op for " << MI << "\n");
- unsigned Reloc = 0;
- int Offset = 0;
- bool useGOT = false;
- switch (MI.getOpcode()) {
- case Alpha::BSR:
- Reloc = Alpha::reloc_bsr;
- break;
- case Alpha::LDLr:
- case Alpha::LDQr:
- case Alpha::LDBUr:
- case Alpha::LDWUr:
- case Alpha::LDSr:
- case Alpha::LDTr:
- case Alpha::LDAr:
- case Alpha::STQr:
- case Alpha::STLr:
- case Alpha::STWr:
- case Alpha::STBr:
- case Alpha::STSr:
- case Alpha::STTr:
- Reloc = Alpha::reloc_gprellow;
- break;
- case Alpha::LDAHr:
- Reloc = Alpha::reloc_gprelhigh;
- break;
- case Alpha::LDQl:
- Reloc = Alpha::reloc_literal;
- useGOT = true;
- break;
- case Alpha::LDAg:
- case Alpha::LDAHg:
- Reloc = Alpha::reloc_gpdist;
- Offset = MI.getOperand(3).getImm();
- break;
- default:
- llvm_unreachable("unknown relocatable instruction");
- }
- if (MO.isGlobal())
- MCE.addRelocation(MachineRelocation::getGV(
- MCE.getCurrentPCOffset(),
- Reloc,
- const_cast<GlobalValue *>(MO.getGlobal()),
- Offset,
- isa<Function>(MO.getGlobal()),
- useGOT));
- else if (MO.isSymbol())
- MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- Reloc, MO.getSymbolName(),
- Offset, true));
- else
- MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- Reloc, MO.getIndex(), Offset));
- } else if (MO.isMBB()) {
- MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- Alpha::reloc_bsr, MO.getMBB()));
- } else {
-#ifndef NDEBUG
- errs() << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
-#endif
- llvm_unreachable(0);
- }
-
- return rv;
-}
-
-#include "AlphaGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp
new file mode 100644
index 0000000..690cd1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.cpp
@@ -0,0 +1,143 @@
+//=====- AlphaFrameLowering.cpp - Alpha Frame Information ------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaFrameLowering.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/Twine.h"
+
+using namespace llvm;
+
+static long getUpper16(long l) {
+ long y = l / Alpha::IMM_MULT;
+ if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
+ ++y;
+ return y;
+}
+
+static long getLower16(long l) {
+ long h = getUpper16(l);
+ return l - h * Alpha::IMM_MULT;
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool AlphaFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasVarSizedObjects();
+}
+
+void AlphaFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc());
+ bool FP = hasFP(MF);
+
+ // Handle GOP offset
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
+ .addGlobalAddress(MF.getFunction()).addReg(Alpha::R27).addImm(++curgpdist);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
+ .addGlobalAddress(MF.getFunction()).addReg(Alpha::R29).addImm(curgpdist);
+
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
+ .addGlobalAddress(MF.getFunction());
+
+ // Get the number of bytes to allocate from the FrameInfo
+ long NumBytes = MFI->getStackSize();
+
+ if (FP)
+ NumBytes += 8; //reserve space for the old FP
+
+ // Do we need to allocate space on the stack?
+ if (NumBytes == 0) return;
+
+ unsigned Align = getStackAlignment();
+ NumBytes = (NumBytes+Align-1)/Align*Align;
+
+ // Update frame info to pretend that this is part of the stack...
+ MFI->setStackSize(NumBytes);
+
+ // adjust stack pointer: r30 -= numbytes
+ NumBytes = -NumBytes;
+ if (NumBytes >= Alpha::IMM_LOW) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+ .addReg(Alpha::R30);
+ } else if (getUpper16(NumBytes) >= Alpha::IMM_LOW) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+ .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+ } else {
+ report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
+ }
+
+ // Now if we need to, save the old FP and set the new
+ if (FP) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ))
+ .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
+ // This must be the last instr in the prolog
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15)
+ .addReg(Alpha::R30).addReg(Alpha::R30);
+ }
+
+}
+
+void AlphaFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ assert((MBBI->getOpcode() == Alpha::RETDAG ||
+ MBBI->getOpcode() == Alpha::RETDAGp)
+ && "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ bool FP = hasFP(MF);
+
+ // Get the number of bytes allocated from the FrameInfo...
+ long NumBytes = MFI->getStackSize();
+
+ //now if we need to, restore the old FP
+ if (FP) {
+ //copy the FP into the SP (discards allocas)
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
+ .addReg(Alpha::R15);
+ //restore the FP
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15)
+ .addImm(0).addReg(Alpha::R15);
+ }
+
+ if (NumBytes != 0) {
+ if (NumBytes <= Alpha::IMM_HIGH) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+ .addReg(Alpha::R30);
+ } else if (getUpper16(NumBytes) <= Alpha::IMM_HIGH) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+ .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+ } else {
+ report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h
new file mode 100644
index 0000000..ebd9e1b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Alpha/AlphaFrameLowering.h
@@ -0,0 +1,43 @@
+//==-- AlphaFrameLowering.h - Define frame lowering for Alpha --*- C++ -*---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Alpha.h"
+#include "AlphaSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class AlphaSubtarget;
+
+class AlphaFrameLowering : public TargetFrameLowering {
+ const AlphaSubtarget &STI;
+ // FIXME: This should end in MachineFunctionInfo, not here!
+ mutable int curgpdist;
+public:
+ explicit AlphaFrameLowering(const AlphaSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, 16, 0), STI(sti), curgpdist(0) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index d197bd1..7b91fea 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -130,19 +130,6 @@ namespace {
return (x - y) == r;
}
- static bool isFPZ(SDValue N) {
- ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
- return (CN && (CN->getValueAPF().isZero()));
- }
- static bool isFPZn(SDValue N) {
- ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
- return (CN && CN->getValueAPF().isNegZero());
- }
- static bool isFPZp(SDValue N) {
- ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
- return (CN && CN->getValueAPF().isPosZero());
- }
-
public:
explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM)
: SelectionDAGISel(TM)
@@ -253,7 +240,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0,
Chain.getValue(1));
SDNode *CNode =
- CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Flag,
+ CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Glue,
Chain, Chain.getValue(1));
Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64,
SDValue(CNode, 1));
@@ -416,13 +403,13 @@ void AlphaDAGToDAGISel::SelectCALL(SDNode *N) {
Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag);
InFlag = Chain.getValue(1);
Chain = SDValue(CurDAG->getMachineNode(Alpha::BSR, dl, MVT::Other,
- MVT::Flag, Addr.getOperand(0),
+ MVT::Glue, Addr.getOperand(0),
Chain, InFlag), 0);
} else {
Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag);
InFlag = Chain.getValue(1);
Chain = SDValue(CurDAG->getMachineNode(Alpha::JSR, dl, MVT::Other,
- MVT::Flag, Chain, InFlag), 0);
+ MVT::Glue, Chain, InFlag), 0);
}
InFlag = Chain.getValue(1);
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
index ea78bf3..9137d65 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -27,6 +27,7 @@
#include "llvm/Function.h"
#include "llvm/Module.h"
#include "llvm/Intrinsics.h"
+#include "llvm/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -124,7 +125,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::f32, Promote);
- setOperationAction(ISD::BIT_CONVERT, MVT::f32, Promote);
+ setOperationAction(ISD::BITCAST, MVT::f32, Promote);
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
@@ -284,8 +285,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
DAG.getIntPtrConstant(VA.getLocMemOffset()));
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), 0,
- false, false, 0));
+ MachinePointerInfo(),false, false, 0));
}
}
@@ -306,7 +306,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
// Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
@@ -431,7 +431,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
false, false, 0);
}
InVals.push_back(ArgVal);
@@ -448,7 +448,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true);
if (i == 0) FuncInfo->setVarArgsBase(FI);
SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
- LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
+ LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
false, false, 0));
if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
@@ -456,7 +456,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true);
SDFI = DAG.getFrameIndex(FI, MVT::i64);
- LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
+ LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
false, false, 0));
}
@@ -537,12 +537,14 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
DebugLoc dl = N->getDebugLoc();
- SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0,
+ SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP,
+ MachinePointerInfo(VAListS),
false, false, 0);
SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
DAG.getConstant(8, MVT::i64));
- SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Base.getValue(1),
- Tmp, NULL, 0, MVT::i32, false, false, 0);
+ SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
+ Tmp, MachinePointerInfo(),
+ MVT::i32, false, false, 0);
DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
if (N->getValueType(0).isFloatingPoint())
{
@@ -556,7 +558,8 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
DAG.getConstant(8, MVT::i64));
- Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0,
+ Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp,
+ MachinePointerInfo(),
MVT::i32, false, false, 0);
}
@@ -613,7 +616,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
"Unhandled SINT_TO_FP type in custom expander!");
SDValue LD;
bool isDouble = Op.getValueType() == MVT::f64;
- LD = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op.getOperand(0));
+ LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl,
isDouble?MVT::f64:MVT::f32, LD);
return FP;
@@ -627,7 +630,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src);
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, src);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src);
}
case ISD::ConstantPool: {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
@@ -645,11 +648,11 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
case ISD::GlobalAddress: {
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64,
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64,
GSDN->getOffset());
// FIXME there isn't really any debug info here
- // if (!GV->hasWeakLinkage() && !GV->isDeclaration()
+ // if (!GV->hasWeakLinkage() && !GV->isDeclaration()
// && !GV->hasLinkOnceLinkage()) {
if (GV->hasLocalLinkage()) {
SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA,
@@ -706,10 +709,11 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
SDValue Result;
if (Op.getValueType() == MVT::i32)
- Result = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Chain, DataPtr,
- NULL, 0, MVT::i32, false, false, 0);
+ Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
+ MachinePointerInfo(), MVT::i32, false, false, 0);
else
- Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0,
+ Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr,
+ MachinePointerInfo(),
false, false, 0);
return Result;
}
@@ -720,17 +724,20 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0,
+ SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP,
+ MachinePointerInfo(SrcS),
false, false, 0);
- SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0,
+ SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP,
+ MachinePointerInfo(DestS),
false, false, 0);
SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
DAG.getConstant(8, MVT::i64));
- Val = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Result,
- NP, NULL,0, MVT::i32, false, false, 0);
+ Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
+ NP, MachinePointerInfo(), MVT::i32, false, false, 0);
SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
DAG.getConstant(8, MVT::i64));
- return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32,
+ return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD,
+ MachinePointerInfo(), MVT::i32,
false, false, 0);
}
case ISD::VASTART: {
@@ -743,14 +750,15 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
// vastart stores the address of the VarArgsBase and VarArgsOffset
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
- SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0,
- false, false, 0);
+ SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP,
+ MachinePointerInfo(VAListS), false, false, 0);
SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
DAG.getConstant(8, MVT::i64));
return DAG.getTruncStore(S1, dl,
DAG.getConstant(FuncInfo->getVarArgsOffset(),
MVT::i64),
- SA2, NULL, 0, MVT::i32, false, false, 0);
+ SA2, MachinePointerInfo(),
+ MVT::i32, false, false, 0);
}
case ISD::RETURNADDR:
return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc(), MVT::i64);
@@ -771,7 +779,8 @@ void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Chain, DataPtr;
LowerVAARG(N, Chain, DataPtr, DAG);
- SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0,
+ SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr,
+ MachinePointerInfo(),
false, false, 0);
Results.push_back(Res);
Results.push_back(SDValue(Res.getNode(), 1));
@@ -795,6 +804,30 @@ AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
return TargetLowering::getConstraintType(Constraint);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+AlphaTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'f':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
std::vector<unsigned> AlphaTargetLowering::
getRegClassForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const {
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h
index 46e0c7d..b429e9f 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h
+++ b/contrib/llvm/lib/Target/Alpha/AlphaISelLowering.h
@@ -87,6 +87,11 @@ namespace llvm {
ConstraintType getConstraintType(const std::string &Constraint) const;
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
std::vector<unsigned>
getRegClassForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td
index 92de78a..099d715 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/contrib/llvm/lib/Target/Alpha/AlphaInstrInfo.td
@@ -27,7 +27,7 @@ def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi", SDTIntBinOp, []>;
def Alpha_rellit : SDNode<"AlphaISD::RelLit", SDTIntBinOp, [SDNPMayLoad]>;
def retflag : SDNode<"AlphaISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOptInGlue]>;
// These are target-independent nodes, but have target-specific formats.
def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>;
@@ -35,9 +35,9 @@ def SDT_AlphaCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i64>,
SDTCisVT<1, i64> ]>;
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AlphaCallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
//********************
//Paterns for matching
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaJITInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaJITInfo.cpp
deleted file mode 100644
index 12685ed..0000000
--- a/contrib/llvm/lib/Target/Alpha/AlphaJITInfo.cpp
+++ /dev/null
@@ -1,310 +0,0 @@
-//===-- AlphaJITInfo.cpp - Implement the JIT interfaces for the Alpha ---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the JIT interfaces for the Alpha target.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "jit"
-#include "AlphaJITInfo.h"
-#include "AlphaRelocations.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
-using namespace llvm;
-
-#define BUILD_OFormatI(Op, RA, LIT, FUN, RC) \
- ((Op << 26) | (RA << 21) | (LIT << 13) | (1 << 12) | (FUN << 5) | (RC))
-#define BUILD_OFormat(Op, RA, RB, FUN, RC) \
- ((Op << 26) | (RA << 21) | (RB << 16) | (FUN << 5) | (RC))
-
-#define BUILD_LDA(RD, RS, IMM16) \
- ((0x08 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
-#define BUILD_LDAH(RD, RS, IMM16) \
- ((0x09 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
-
-#define BUILD_LDQ(RD, RS, IMM16) \
- ((0x29 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 0xFFFF))
-
-#define BUILD_JMP(RD, RS, IMM16) \
- ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x00 << 14) | ((IMM16) & 0x3FFF))
-#define BUILD_JSR(RD, RS, IMM16) \
- ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x01 << 14) | ((IMM16) & 0x3FFF))
-
-#define BUILD_SLLi(RD, RS, IMM8) \
- (BUILD_OFormatI(0x12, RS, IMM8, 0x39, RD))
-
-#define BUILD_ORi(RD, RS, IMM8) \
- (BUILD_OFormatI(0x11, RS, IMM8, 0x20, RD))
-
-#define BUILD_OR(RD, RS, RT) \
- (BUILD_OFormat(0x11, RS, RT, 0x20, RD))
-
-
-
-static void EmitBranchToAt(void *At, void *To) {
- unsigned long Fn = (unsigned long)To;
-
- unsigned *AtI = (unsigned*)At;
-
- AtI[0] = BUILD_OR(0, 27, 27);
-
- DEBUG(errs() << "Stub targeting " << To << "\n");
-
- for (int x = 1; x <= 8; ++x) {
- AtI[2*x - 1] = BUILD_SLLi(27,27,8);
- unsigned d = (Fn >> (64 - 8 * x)) & 0x00FF;
- //DEBUG(errs() << "outputing " << hex << d << dec << "\n");
- AtI[2*x] = BUILD_ORi(27, 27, d);
- }
- AtI[17] = BUILD_JMP(31,27,0); //jump, preserving ra, and setting pv
- AtI[18] = 0x00FFFFFF; //mark this as a stub
-}
-
-void AlphaJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- //FIXME
- llvm_unreachable(0);
-}
-
-static TargetJITInfo::JITCompilerFn JITCompilerFunction;
-//static AlphaJITInfo* AlphaJTI;
-
-extern "C" {
-#ifdef __alpha
-
- void AlphaCompilationCallbackC(long* oldpv, void* CameFromStub)
- {
- void* Target = JITCompilerFunction(CameFromStub);
-
- //rewrite the stub to an unconditional branch
- if (((unsigned*)CameFromStub)[18] == 0x00FFFFFF) {
- DEBUG(errs() << "Came from a stub, rewriting\n");
- EmitBranchToAt(CameFromStub, Target);
- } else {
- DEBUG(errs() << "confused, didn't come from stub at " << CameFromStub
- << " old jump vector " << oldpv
- << " new jump vector " << Target << "\n");
- }
-
- //Change pv to new Target
- *oldpv = (long)Target;
- }
-
- void AlphaCompilationCallback(void);
-
- asm(
- ".text\n"
- ".globl AlphaCompilationCallbackC\n"
- ".align 4\n"
- ".globl AlphaCompilationCallback\n"
- ".ent AlphaCompilationCallback\n"
-"AlphaCompilationCallback:\n"
- // //get JIT's GOT
- "ldgp $29, 0($27)\n"
- //Save args, callee saved, and perhaps others?
- //args: $16-$21 $f16-$f21 (12)
- //callee: $9-$14 $f2-$f9 (14)
- //others: fp:$15 ra:$26 pv:$27 (3)
- "lda $30, -232($30)\n"
- "stq $16, 0($30)\n"
- "stq $17, 8($30)\n"
- "stq $18, 16($30)\n"
- "stq $19, 24($30)\n"
- "stq $20, 32($30)\n"
- "stq $21, 40($30)\n"
- "stt $f16, 48($30)\n"
- "stt $f17, 56($30)\n"
- "stt $f18, 64($30)\n"
- "stt $f19, 72($30)\n"
- "stt $f20, 80($30)\n"
- "stt $f21, 88($30)\n"
- "stq $9, 96($30)\n"
- "stq $10, 104($30)\n"
- "stq $11, 112($30)\n"
- "stq $12, 120($30)\n"
- "stq $13, 128($30)\n"
- "stq $14, 136($30)\n"
- "stt $f2, 144($30)\n"
- "stt $f3, 152($30)\n"
- "stt $f4, 160($30)\n"
- "stt $f5, 168($30)\n"
- "stt $f6, 176($30)\n"
- "stt $f7, 184($30)\n"
- "stt $f8, 192($30)\n"
- "stt $f9, 200($30)\n"
- "stq $15, 208($30)\n"
- "stq $26, 216($30)\n"
- "stq $27, 224($30)\n"
-
- "addq $30, 224, $16\n" //pass the addr of saved pv as the first arg
- "bis $0, $0, $17\n" //pass the roughly stub addr in second arg
- "jsr $26, AlphaCompilationCallbackC\n" //call without saving ra
-
- "ldq $16, 0($30)\n"
- "ldq $17, 8($30)\n"
- "ldq $18, 16($30)\n"
- "ldq $19, 24($30)\n"
- "ldq $20, 32($30)\n"
- "ldq $21, 40($30)\n"
- "ldt $f16, 48($30)\n"
- "ldt $f17, 56($30)\n"
- "ldt $f18, 64($30)\n"
- "ldt $f19, 72($30)\n"
- "ldt $f20, 80($30)\n"
- "ldt $f21, 88($30)\n"
- "ldq $9, 96($30)\n"
- "ldq $10, 104($30)\n"
- "ldq $11, 112($30)\n"
- "ldq $12, 120($30)\n"
- "ldq $13, 128($30)\n"
- "ldq $14, 136($30)\n"
- "ldt $f2, 144($30)\n"
- "ldt $f3, 152($30)\n"
- "ldt $f4, 160($30)\n"
- "ldt $f5, 168($30)\n"
- "ldt $f6, 176($30)\n"
- "ldt $f7, 184($30)\n"
- "ldt $f8, 192($30)\n"
- "ldt $f9, 200($30)\n"
- "ldq $15, 208($30)\n"
- "ldq $26, 216($30)\n"
- "ldq $27, 224($30)\n" //this was updated in the callback with the target
-
- "lda $30, 232($30)\n" //restore sp
- "jmp $31, ($27)\n" //jump to the new function
- ".end AlphaCompilationCallback\n"
- );
-#else
- void AlphaCompilationCallback() {
- llvm_unreachable("Cannot call AlphaCompilationCallback() on a non-Alpha arch!");
- }
-#endif
-}
-
-TargetJITInfo::StubLayout AlphaJITInfo::getStubLayout() {
- // The stub contains 19 4-byte instructions, aligned at 4 bytes:
- // R0 = R27
- // 8 x "R27 <<= 8; R27 |= 8-bits-of-Target" == 16 instructions
- // JMP R27
- // Magic number so the compilation callback can recognize the stub.
- StubLayout Result = {19 * 4, 4};
- return Result;
-}
-
-void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) {
- //assert(Fn == AlphaCompilationCallback && "Where are you going?\n");
- //Do things in a stupid slow way!
- void* Addr = (void*)(intptr_t)JCE.getCurrentPCValue();
- for (int x = 0; x < 19; ++ x)
- JCE.emitWordLE(0);
- EmitBranchToAt(Addr, Fn);
- DEBUG(errs() << "Emitting Stub to " << Fn << " at [" << Addr << "]\n");
- return Addr;
-}
-
-TargetJITInfo::LazyResolverFn
-AlphaJITInfo::getLazyResolverFunction(JITCompilerFn F) {
- JITCompilerFunction = F;
- // setZerothGOTEntry((void*)AlphaCompilationCallback);
- return AlphaCompilationCallback;
-}
-
-//These describe LDAx
-static const int IMM_LOW = -32768;
-static const int IMM_HIGH = 32767;
-static const int IMM_MULT = 65536;
-
-static long getUpper16(long l)
-{
- long y = l / IMM_MULT;
- if (l % IMM_MULT > IMM_HIGH)
- ++y;
- if (l % IMM_MULT < IMM_LOW)
- --y;
- assert((short)y == y && "displacement out of range");
- return y;
-}
-
-static long getLower16(long l)
-{
- long h = getUpper16(l);
- long y = l - h * IMM_MULT;
- assert(y == (short)y && "Displacement out of range");
- return y;
-}
-
-void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) {
- for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
- unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
- long idx = 0;
- bool doCommon = true;
- switch ((Alpha::RelocationType)MR->getRelocationType()) {
- default: llvm_unreachable("Unknown relocation type!");
- case Alpha::reloc_literal:
- //This is a LDQl
- idx = MR->getGOTIndex();
- DEBUG(errs() << "Literal relocation to slot " << idx);
- idx = (idx - GOToffset) * 8;
- DEBUG(errs() << " offset " << idx << "\n");
- break;
- case Alpha::reloc_gprellow:
- idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8];
- idx = getLower16(idx);
- DEBUG(errs() << "gprellow relocation offset " << idx << "\n");
- DEBUG(errs() << " Pointer is " << (void*)MR->getResultPointer()
- << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n");
- break;
- case Alpha::reloc_gprelhigh:
- idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8];
- idx = getUpper16(idx);
- DEBUG(errs() << "gprelhigh relocation offset " << idx << "\n");
- DEBUG(errs() << " Pointer is " << (void*)MR->getResultPointer()
- << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n");
- break;
- case Alpha::reloc_gpdist:
- switch (*RelocPos >> 26) {
- case 0x09: //LDAH
- idx = &GOTBase[GOToffset * 8] - (unsigned char*)RelocPos;
- idx = getUpper16(idx);
- DEBUG(errs() << "LDAH: " << idx << "\n");
- //add the relocation to the map
- gpdistmap[std::make_pair(Function, MR->getConstantVal())] = RelocPos;
- break;
- case 0x08: //LDA
- assert(gpdistmap[std::make_pair(Function, MR->getConstantVal())] &&
- "LDAg without seeing LDAHg");
- idx = &GOTBase[GOToffset * 8] -
- (unsigned char*)gpdistmap[std::make_pair(Function, MR->getConstantVal())];
- idx = getLower16(idx);
- DEBUG(errs() << "LDA: " << idx << "\n");
- break;
- default:
- llvm_unreachable("Cannot handle gpdist yet");
- }
- break;
- case Alpha::reloc_bsr: {
- idx = (((unsigned char*)MR->getResultPointer() -
- (unsigned char*)RelocPos) >> 2) + 1; //skip first 2 inst of fun
- *RelocPos |= (idx & ((1 << 21)-1));
- doCommon = false;
- break;
- }
- }
- if (doCommon) {
- short x = (short)idx;
- assert(x == idx);
- *(short*)RelocPos = x;
- }
- }
-}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaJITInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaJITInfo.h
deleted file mode 100644
index bd358a4..0000000
--- a/contrib/llvm/lib/Target/Alpha/AlphaJITInfo.h
+++ /dev/null
@@ -1,53 +0,0 @@
-//===- AlphaJITInfo.h - Alpha impl. of the JIT interface ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Alpha implementation of the TargetJITInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHA_JITINFO_H
-#define ALPHA_JITINFO_H
-
-#include "llvm/Target/TargetJITInfo.h"
-#include <map>
-
-namespace llvm {
- class TargetMachine;
-
- class AlphaJITInfo : public TargetJITInfo {
- protected:
- TargetMachine &TM;
-
- //because gpdist are paired and relative to the pc of the first inst,
- //we need to have some state
- std::map<std::pair<void*, int>, void*> gpdistmap;
- public:
- explicit AlphaJITInfo(TargetMachine &tm) : TM(tm)
- { useGOT = true; }
-
- virtual StubLayout getStubLayout();
- virtual void *emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE);
- virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
- virtual void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase);
-
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- virtual void replaceMachineCodeForFunction(void *Old, void *New);
- private:
- static const unsigned GOToffset = 4096;
-
- };
-}
-
-#endif
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp
index 327ddb4..7667fd8 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -22,7 +22,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -35,29 +35,21 @@
#include <cstdlib>
using namespace llvm;
-//These describe LDAx
-static const int IMM_LOW = -32768;
-static const int IMM_HIGH = 32767;
-static const int IMM_MULT = 65536;
+AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
+ : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+ TII(tii) {
+}
-static long getUpper16(long l)
-{
- long y = l / IMM_MULT;
- if (l % IMM_MULT > IMM_HIGH)
+static long getUpper16(long l) {
+ long y = l / Alpha::IMM_MULT;
+ if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
++y;
return y;
}
-static long getLower16(long l)
-{
+static long getLower16(long l) {
long h = getUpper16(l);
- return l - h * IMM_MULT;
-}
-
-AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
- : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
- TII(tii), curgpdist(0)
-{
+ return l - h * Alpha::IMM_MULT;
}
const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
@@ -86,19 +78,12 @@ BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
-bool AlphaRegisterInfo::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->hasVarSizedObjects();
-}
-
void AlphaRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (hasFP(MF)) {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF)) {
// If we have a frame pointer, turn the adjcallstackup instruction into a
// 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
// <amt>'
@@ -108,7 +93,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Align = TFI->getStackAlignment();
Amount = (Amount+Align-1)/Align*Align;
MachineInstr *New;
@@ -146,7 +131,9 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- bool FP = hasFP(MF);
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ bool FP = TFI->hasFP(MF);
while (!MI.getOperand(i).isFI()) {
++i;
@@ -168,7 +155,7 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
DEBUG(errs() << "Corrected Offset " << Offset
<< " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n");
- if (Offset > IMM_HIGH || Offset < IMM_LOW) {
+ if (Offset > Alpha::IMM_HIGH || Offset < Alpha::IMM_LOW) {
DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: "
<< Offset << "\n");
//so in this case, we need to use a temporary register, and move the
@@ -186,111 +173,14 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-
-void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc());
- bool FP = hasFP(MF);
-
- //handle GOP offset
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
- .addGlobalAddress(MF.getFunction())
- .addReg(Alpha::R27).addImm(++curgpdist);
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
- .addGlobalAddress(MF.getFunction())
- .addReg(Alpha::R29).addImm(curgpdist);
-
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
- .addGlobalAddress(MF.getFunction());
-
- // Get the number of bytes to allocate from the FrameInfo
- long NumBytes = MFI->getStackSize();
-
- if (FP)
- NumBytes += 8; //reserve space for the old FP
-
- // Do we need to allocate space on the stack?
- if (NumBytes == 0) return;
-
- unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
- NumBytes = (NumBytes+Align-1)/Align*Align;
-
- // Update frame info to pretend that this is part of the stack...
- MFI->setStackSize(NumBytes);
-
- // adjust stack pointer: r30 -= numbytes
- NumBytes = -NumBytes;
- if (NumBytes >= IMM_LOW) {
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
- .addReg(Alpha::R30);
- } else if (getUpper16(NumBytes) >= IMM_LOW) {
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
- .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
- .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
- } else {
- report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
- }
-
- //now if we need to, save the old FP and set the new
- if (FP)
- {
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ))
- .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
- //this must be the last instr in the prolog
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15)
- .addReg(Alpha::R30).addReg(Alpha::R30);
- }
-
-}
-
-void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- assert((MBBI->getOpcode() == Alpha::RETDAG ||
- MBBI->getOpcode() == Alpha::RETDAGp)
- && "Can only insert epilog into returning blocks");
- DebugLoc dl = MBBI->getDebugLoc();
-
- bool FP = hasFP(MF);
-
- // Get the number of bytes allocated from the FrameInfo...
- long NumBytes = MFI->getStackSize();
-
- //now if we need to, restore the old FP
- if (FP) {
- //copy the FP into the SP (discards allocas)
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
- .addReg(Alpha::R15);
- //restore the FP
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15)
- .addImm(0).addReg(Alpha::R15);
- }
-
- if (NumBytes != 0) {
- if (NumBytes <= IMM_HIGH) {
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
- .addReg(Alpha::R30);
- } else if (getUpper16(NumBytes) <= IMM_HIGH) {
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
- .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
- BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
- .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
- } else {
- report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
- }
- }
-}
-
unsigned AlphaRegisterInfo::getRARegister() const {
return Alpha::R26;
}
unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return hasFP(MF) ? Alpha::R15 : Alpha::R30;
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? Alpha::R15 : Alpha::R30;
}
unsigned AlphaRegisterInfo::getEHExceptionRegister() const {
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h
index b164979..b0d4dd0 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -32,8 +32,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasFP(const MachineFunction &MF) const;
-
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
@@ -41,11 +39,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
- //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -57,9 +50,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
static std::string getPrettyName(unsigned reg);
-
-private:
- mutable int curgpdist;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td b/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td
index 4dc04b8..3703dd4 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td
+++ b/contrib/llvm/lib/Target/Alpha/AlphaSchedule.td
@@ -50,11 +50,11 @@ def s_ftoi : InstrItinClass;
def s_itof : InstrItinClass;
def s_pseudo : InstrItinClass;
-//Table 2­4 Instruction Class Latency in Cycles
+//Table 2-4 Instruction Class Latency in Cycles
//modified some
def Alpha21264Itineraries : ProcessorItineraries<
- [L0, L1, FST0, FST1, U0, U1, FA, FM], [
+ [L0, L1, FST0, FST1, U0, U1, FA, FM], [], [
InstrItinData<s_ild , [InstrStage<3, [L0, L1]>]>,
InstrItinData<s_fld , [InstrStage<4, [L0, L1]>]>,
InstrItinData<s_ist , [InstrStage<0, [L0, L1]>]>,
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp
index fc9be03..b53533b 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "Alpha.h"
-#include "AlphaJITInfo.h"
#include "AlphaMCAsmInfo.h"
#include "AlphaTargetMachine.h"
#include "llvm/PassManager.h"
@@ -29,8 +28,7 @@ AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: LLVMTargetMachine(T, TT),
DataLayout("e-f128:128:128-n64"),
- FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
- JITInfo(*this),
+ FrameLowering(Subtarget),
Subtarget(TT, FS),
TLInfo(*this),
TSInfo(*this) {
@@ -54,9 +52,3 @@ bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM,
PM.add(createAlphaLLRPPass(*this));
return false;
}
-bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel,
- JITCodeEmitter &JCE) {
- PM.add(createAlphaJITCodeEmitterPass(*this, JCE));
- return false;
-}
diff --git a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h
index 153944e..26238fb 100644
--- a/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/contrib/llvm/lib/Target/Alpha/AlphaTargetMachine.h
@@ -14,14 +14,14 @@
#ifndef ALPHA_TARGETMACHINE_H
#define ALPHA_TARGETMACHINE_H
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "AlphaInstrInfo.h"
-#include "AlphaJITInfo.h"
#include "AlphaISelLowering.h"
+#include "AlphaFrameLowering.h"
#include "AlphaSelectionDAGInfo.h"
#include "AlphaSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -30,8 +30,7 @@ class GlobalValue;
class AlphaTargetMachine : public LLVMTargetMachine {
const TargetData DataLayout; // Calculates type size & alignment
AlphaInstrInfo InstrInfo;
- TargetFrameInfo FrameInfo;
- AlphaJITInfo JITInfo;
+ AlphaFrameLowering FrameLowering;
AlphaSubtarget Subtarget;
AlphaTargetLowering TLInfo;
AlphaSelectionDAGInfo TSInfo;
@@ -41,7 +40,9 @@ public:
const std::string &FS);
virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
virtual const AlphaSubtarget *getSubtargetImpl() const{ return &Subtarget; }
virtual const AlphaRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
@@ -53,15 +54,10 @@ public:
return &TSInfo;
}
virtual const TargetData *getTargetData() const { return &DataLayout; }
- virtual AlphaJITInfo* getJITInfo() {
- return &JITInfo;
- }
// Pass Pipeline Configuration
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
- virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
- JITCodeEmitter &JCE);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
index 6ba258b..6ba258b 100644
--- a/contrib/llvm/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp
new file mode 100644
index 0000000..08bb952
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.cpp
@@ -0,0 +1,124 @@
+//====- BlackfinFrameLowering.cpp - Blackfin Frame Information --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinFrameLowering.h"
+#include "BlackfinInstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool BlackfinFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return DisableFramePointerElim(MF) ||
+ MFI->adjustsStack() || MFI->hasVarSizedObjects();
+}
+
+// Emit a prologue that sets up a stack frame.
+// On function entry, R0-R2 and P0 may hold arguments.
+// R3, P1, and P2 may be used as scratch registers
+void BlackfinFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const BlackfinRegisterInfo *RegInfo =
+ static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const BlackfinInstrInfo &TII =
+ *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ int FrameSize = MFI->getStackSize();
+ if (FrameSize%4) {
+ FrameSize = (FrameSize+3) & ~3;
+ MFI->setStackSize(FrameSize);
+ }
+
+ if (!hasFP(MF)) {
+ assert(!MFI->adjustsStack() &&
+ "FP elimination on a non-leaf function is not supported");
+ RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
+ return;
+ }
+
+ // emit a LINK instruction
+ if (FrameSize <= 0x3ffff) {
+ BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize);
+ return;
+ }
+
+ // Frame is too big, do a manual LINK:
+ // [--SP] = RETS;
+ // [--SP] = FP;
+ // FP = SP;
+ // P1 = -FrameSize;
+ // SP = SP + P1;
+ BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+ .addReg(BF::RETS, RegState::Kill);
+ BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+ .addReg(BF::FP, RegState::Kill);
+ BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP)
+ .addReg(BF::SP);
+ RegInfo->loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP)
+ .addReg(BF::SP, RegState::Kill)
+ .addReg(BF::P1, RegState::Kill);
+
+}
+
+void BlackfinFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const BlackfinRegisterInfo *RegInfo =
+ static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const BlackfinInstrInfo &TII =
+ *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ int FrameSize = MFI->getStackSize();
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+ if (!hasFP(MF)) {
+ assert(!MFI->adjustsStack() &&
+ "FP elimination on a non-leaf function is not supported");
+ RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
+ return;
+ }
+
+ // emit an UNLINK instruction
+ BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK));
+}
+
+void BlackfinFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const BlackfinRegisterInfo *RegInfo =
+ static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const TargetRegisterClass *RC = BF::DPRegisterClass;
+
+ if (RegInfo->requiresRegisterScavenging(MF)) {
+ // Reserve a slot close to SP or frame pointer.
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h
new file mode 100644
index 0000000..3d2ee25
--- /dev/null
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinFrameLowering.h
@@ -0,0 +1,46 @@
+//=- BlackfinFrameLowering.h - Define frame lowering for Blackfin -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Blackfin.h"
+#include "BlackfinSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class BlackfinSubtarget;
+
+class BlackfinFrameLowering : public TargetFrameLowering {
+protected:
+ const BlackfinSubtarget &STI;
+
+public:
+ explicit BlackfinFrameLowering(const BlackfinSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index 80ee107..9df2aee 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -51,8 +51,7 @@ namespace {
private:
SDNode *Select(SDNode *N);
- bool SelectADDRspii(SDNode *Op, SDValue Addr,
- SDValue &Base, SDValue &Offset);
+ bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
// Walk the DAG after instruction selection, fixing register class issues.
void FixRegisterClasses(SelectionDAG &DAG);
@@ -94,8 +93,7 @@ SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
-bool BlackfinDAGToDAGISel::SelectADDRspii(SDNode *Op,
- SDValue Addr,
+bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Addr,
SDValue &Base,
SDValue &Offset) {
FrameIndexSDNode *FIN = 0;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp
index 6e828e1..dd27d0a 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -15,6 +15,7 @@
#include "BlackfinISelLowering.h"
#include "BlackfinTargetMachine.h"
#include "llvm/Function.h"
+#include "llvm/Type.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -207,7 +208,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned ObjSize = VA.getLocVT().getStoreSize();
int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo(),
false, false, 0));
}
}
@@ -332,8 +334,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDValue OffsetN = DAG.getIntPtrConstant(Offset);
OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
- PseudoSourceValue::getStack(),
- Offset, false, false, 0));
+ MachinePointerInfo(),false, false, 0));
}
}
@@ -364,7 +365,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
std::vector<EVT> NodeTys;
NodeTys.push_back(MVT::Other); // Returns a chain
- NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+ NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
SDValue Ops[] = { Chain, Callee, InFlag };
Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops,
InFlag.getNode() ? 3 : 2);
@@ -431,7 +432,7 @@ SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const {
SDValue(CarryIn, 0));
// Add operands, produce sum and carry flag
- SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Flag,
+ SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
Op.getOperand(0), Op.getOperand(1));
// Store intermediate carry from Sum
@@ -439,11 +440,11 @@ SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const {
/* flag= */ SDValue(Sum, 1));
// Add incoming carry, again producing an output flag
- Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Flag,
+ Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
SDValue(Sum, 0), SDValue(CarryIn, 0));
// Update AC0 with the intermediate carry, producing a flag.
- SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Flag,
+ SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Glue,
SDValue(Carry1, 0));
// Compose (i32, flag) pair
@@ -549,6 +550,52 @@ BlackfinTargetLowering::getConstraintType(const std::string &Constraint) const {
return TargetLowering::getConstraintType(Constraint);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+BlackfinTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+
+ // Blackfin-specific constraints
+ case 'a':
+ case 'd':
+ case 'z':
+ case 'D':
+ case 'W':
+ case 'e':
+ case 'b':
+ case 'v':
+ case 'f':
+ case 'c':
+ case 't':
+ case 'u':
+ case 'k':
+ case 'x':
+ case 'y':
+ case 'w':
+ return CW_Register;
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'Z':
+ case 'Y':
+ return CW_SpecificReg;
+ }
+ return weight;
+}
+
/// getRegForInlineAsmConstraint - Return register no and class for a C_Register
/// constraint.
std::pair<unsigned, const TargetRegisterClass*> BlackfinTargetLowering::
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h
index 6bebcc3..15a745f 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -39,6 +39,12 @@ namespace llvm {
SelectionDAG &DAG) const;
ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
std::vector<unsigned>
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td
index 8034a7f..5b59d77 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -23,17 +23,17 @@ def SDT_BfinCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
def BfinCallseqStart : SDNode<"ISD::CALLSEQ_START", SDT_BfinCallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def BfinCallseqEnd : SDNode<"ISD::CALLSEQ_END", SDT_BfinCallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def SDT_BfinCall : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def BfinCall : SDNode<"BFISD::CALL", SDT_BfinCall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOptInGlue]>;
def BfinWrapper: SDNode<"BFISD::Wrapper", SDTIntUnaryOp>;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index a518312..b4a9b84 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -22,7 +22,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -50,6 +50,8 @@ BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector
BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
using namespace BF;
BitVector Reserved(getNumRegs());
Reserved.set(AZ);
@@ -70,20 +72,11 @@ BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(L3);
Reserved.set(SP);
Reserved.set(RETS);
- if (hasFP(MF))
+ if (TFI->hasFP(MF))
Reserved.set(FP);
return Reserved;
}
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) ||
- MFI->adjustsStack() || MFI->hasVarSizedObjects();
-}
-
bool BlackfinRegisterInfo::
requiresRegisterScavenging(const MachineFunction &MF) const {
return true;
@@ -161,7 +154,9 @@ void BlackfinRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (!hasReservedCallFrame(MF)) {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
int64_t Amount = I->getOperand(0).getImm();
if (Amount != 0) {
assert(Amount%4 == 0 && "Unaligned call frame size");
@@ -196,6 +191,7 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc DL = MI.getDebugLoc();
unsigned FIPos;
@@ -208,7 +204,7 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex)
+ MI.getOperand(FIPos+1).getImm();
unsigned BaseReg = BF::FP;
- if (hasFP(MF)) {
+ if (TFI->hasFP(MF)) {
assert(SPAdj==0 && "Unexpected SP adjust in function with frame pointer");
} else {
BaseReg = BF::SP;
@@ -329,93 +325,15 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-void BlackfinRegisterInfo::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetRegisterClass *RC = BF::DPRegisterClass;
- if (requiresRegisterScavenging(MF)) {
- // Reserve a slot close to SP or frame pointer.
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
-}
-
-// Emit a prologue that sets up a stack frame.
-// On function entry, R0-R2 and P0 may hold arguments.
-// R3, P1, and P2 may be used as scratch registers
-void BlackfinRegisterInfo::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- int FrameSize = MFI->getStackSize();
- if (FrameSize%4) {
- FrameSize = (FrameSize+3) & ~3;
- MFI->setStackSize(FrameSize);
- }
-
- if (!hasFP(MF)) {
- assert(!MFI->adjustsStack() &&
- "FP elimination on a non-leaf function is not supported");
- adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
- return;
- }
-
- // emit a LINK instruction
- if (FrameSize <= 0x3ffff) {
- BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize);
- return;
- }
-
- // Frame is too big, do a manual LINK:
- // [--SP] = RETS;
- // [--SP] = FP;
- // FP = SP;
- // P1 = -FrameSize;
- // SP = SP + P1;
- BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
- .addReg(BF::RETS, RegState::Kill);
- BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
- .addReg(BF::FP, RegState::Kill);
- BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP)
- .addReg(BF::SP);
- loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP)
- .addReg(BF::SP, RegState::Kill)
- .addReg(BF::P1, RegState::Kill);
-
-}
-
-void BlackfinRegisterInfo::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- DebugLoc dl = MBBI->getDebugLoc();
-
- int FrameSize = MFI->getStackSize();
- assert(FrameSize%4 == 0 && "Misaligned frame size");
-
- if (!hasFP(MF)) {
- assert(!MFI->adjustsStack() &&
- "FP elimination on a non-leaf function is not supported");
- adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
- return;
- }
-
- // emit an UNLINK instruction
- BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK));
-}
-
unsigned BlackfinRegisterInfo::getRARegister() const {
return BF::RETS;
}
unsigned
BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return hasFP(MF) ? BF::FP : BF::SP;
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? BF::FP : BF::SP;
}
unsigned BlackfinRegisterInfo::getEHExceptionRegister() const {
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h
index bb83c34..642b8ad 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -41,8 +41,6 @@ namespace llvm {
return &BF::PRegClass;
}
- bool hasFP(const MachineFunction &MF) const;
-
// bool hasReservedCallFrame(MachineFunction &MF) const;
bool requiresRegisterScavenging(const MachineFunction &MF) const;
@@ -54,12 +52,6 @@ namespace llvm {
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const;
-
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getRARegister() const;
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td
index e1cfae9..f5dd439 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -252,9 +252,9 @@ def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> {
PClass::iterator
PClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
return allocation_order_begin(MF)
- + (RI->hasFP(MF) ? 7 : 6);
+ + (TFI->hasFP(MF) ? 7 : 6);
}
}];
}
@@ -275,9 +275,9 @@ def DP : RegisterClass<"BF", [i32], 32,
DPClass::iterator
DPClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
return allocation_order_begin(MF)
- + (RI->hasFP(MF) ? 15 : 14);
+ + (TFI->hasFP(MF) ? 15 : 14);
}
}];
}
@@ -295,9 +295,9 @@ def GR : RegisterClass<"BF", [i32], 32,
GRClass::iterator
GRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
return allocation_order_begin(MF)
- + (RI->hasFP(MF) ? 31 : 30);
+ + (TFI->hasFP(MF) ? 31 : 30);
}
}];
}
@@ -318,9 +318,9 @@ def ALL : RegisterClass<"BF", [i32], 32,
ALLClass::iterator
ALLClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
return allocation_order_begin(MF)
- + (RI->hasFP(MF) ? 31 : 30);
+ + (TFI->hasFP(MF) ? 31 : 30);
}
}];
}
@@ -334,9 +334,9 @@ def PI : RegisterClass<"BF", [i32], 32,
PIClass::iterator
PIClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
return allocation_order_begin(MF)
- + (RI->hasFP(MF) ? 11 : 10);
+ + (TFI->hasFP(MF) ? 11 : 10);
}
}];
}
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp
index 66a2f68..e11920f 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -33,7 +33,7 @@ BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
TLInfo(*this),
TSInfo(*this),
InstrInfo(Subtarget),
- FrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) {
+ FrameLowering(Subtarget) {
}
bool BlackfinTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h
index a63aa54..29b2b17 100644
--- a/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ b/contrib/llvm/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -14,14 +14,15 @@
#ifndef BLACKFINTARGETMACHINE_H
#define BLACKFINTARGETMACHINE_H
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "BlackfinInstrInfo.h"
-#include "BlackfinSubtarget.h"
+#include "BlackfinIntrinsicInfo.h"
#include "BlackfinISelLowering.h"
+#include "BlackfinFrameLowering.h"
+#include "BlackfinSubtarget.h"
#include "BlackfinSelectionDAGInfo.h"
-#include "BlackfinIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -31,14 +32,16 @@ namespace llvm {
BlackfinTargetLowering TLInfo;
BlackfinSelectionDAGInfo TSInfo;
BlackfinInstrInfo InstrInfo;
- TargetFrameInfo FrameInfo;
+ BlackfinFrameLowering FrameLowering;
BlackfinIntrinsicInfo IntrinsicInfo;
public:
BlackfinTargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
virtual const BlackfinSubtarget *getSubtargetImpl() const {
return &Subtarget;
}
diff --git a/contrib/llvm/lib/Target/CBackend/CBackend.cpp b/contrib/llvm/lib/Target/CBackend/CBackend.cpp
index 270fff6..6c555a3 100644
--- a/contrib/llvm/lib/Target/CBackend/CBackend.cpp
+++ b/contrib/llvm/lib/Target/CBackend/CBackend.cpp
@@ -47,12 +47,16 @@
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/Host.h"
#include "llvm/Config/config.h"
#include <algorithm>
+// Some ms header decided to define setjmp as _setjmp, undo this for this file.
+#ifdef _MSC_VER
+#undef setjmp
+#endif
using namespace llvm;
-extern "C" void LLVMInitializeCBackendTarget() {
+extern "C" void LLVMInitializeCBackendTarget() {
// Register the target.
RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
}
@@ -72,8 +76,10 @@ namespace {
class CBackendNameAllUsedStructsAndMergeFunctions : public ModulePass {
public:
static char ID;
- CBackendNameAllUsedStructsAndMergeFunctions()
- : ModulePass(ID) {}
+ CBackendNameAllUsedStructsAndMergeFunctions()
+ : ModulePass(ID) {
+ initializeFindUsedTypesPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<FindUsedTypes>();
}
@@ -110,9 +116,10 @@ namespace {
public:
static char ID;
explicit CWriter(formatted_raw_ostream &o)
- : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
+ : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
NextAnonValueNumber(0) {
+ initializeLoopInfoPass(*PassRegistry::getPassRegistry());
FPCounter = 0;
}
@@ -183,7 +190,7 @@ namespace {
Out << ")";
}
}
-
+
void writeOperand(Value *Operand, bool Static = false);
void writeInstComputationInline(Instruction &I);
void writeOperandInternal(Value *Operand, bool Static = false);
@@ -224,7 +231,7 @@ namespace {
return ByValParams.count(A);
return isa<GlobalVariable>(V) || isDirectAlloca(V);
}
-
+
// isInlinableInst - Attempt to inline instructions into their uses to build
// trees as much as possible. To do this, we have to consistently decide
// what is acceptable to inline, so that variable declarations don't get
@@ -233,7 +240,7 @@ namespace {
static bool isInlinableInst(const Instruction &I) {
// Always inline cmp instructions, even if they are shared by multiple
// expressions. GCC generates horrible code if we don't.
- if (isa<CmpInst>(I))
+ if (isa<CmpInst>(I))
return true;
// Must be an expression, must be used exactly once. If it is dead, we
@@ -270,14 +277,14 @@ namespace {
return 0;
return AI;
}
-
+
// isInlineAsm - Check if the instruction is a call to an inline asm chunk
static bool isInlineAsm(const Instruction& I) {
if (const CallInst *CI = dyn_cast<CallInst>(&I))
return isa<InlineAsm>(CI->getCalledValue());
return false;
}
-
+
// Instruction visitation functions
friend class InstVisitor<CWriter>;
@@ -310,7 +317,7 @@ namespace {
void visitStoreInst (StoreInst &I);
void visitGetElementPtrInst(GetElementPtrInst &I);
void visitVAArgInst (VAArgInst &I);
-
+
void visitInsertElementInst(InsertElementInst &I);
void visitExtractElementInst(ExtractElementInst &I);
void visitShuffleVectorInst(ShuffleVectorInst &SVI);
@@ -346,7 +353,7 @@ char CWriter::ID = 0;
static std::string CBEMangle(const std::string &S) {
std::string Result;
-
+
for (unsigned i = 0, e = S.size(); i != e; ++i)
if (isalnum(S[i]) || S[i] == '_') {
Result += S[i];
@@ -375,7 +382,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
for (TypeSymbolTable::iterator TI = TST.begin(), TE = TST.end();
TI != TE; ) {
TypeSymbolTable::iterator I = TI++;
-
+
// If this isn't a struct or array type, remove it from our set of types
// to name. This simplifies emission later.
if (!I->second->isStructTy() && !I->second->isOpaqueTy() &&
@@ -403,8 +410,8 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
++RenameCounter;
Changed = true;
}
-
-
+
+
// Loop over all external functions and globals. If we have two with
// identical names, merge them.
// FIXME: This code should disappear when we don't allow values with the same
@@ -440,7 +447,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
}
}
}
-
+
return Changed;
}
@@ -479,20 +486,20 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
FunctionInnards << "void";
}
FunctionInnards << ')';
- printType(Out, RetTy,
+ printType(Out, RetTy,
/*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
}
raw_ostream &
CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
const std::string &NameSoFar) {
- assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
+ assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
"Invalid type for printSimpleType");
switch (Ty->getTypeID()) {
case Type::VoidTyID: return Out << "void " << NameSoFar;
case Type::IntegerTyID: {
unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
- if (NumBits == 1)
+ if (NumBits == 1)
return Out << "bool " << NameSoFar;
else if (NumBits <= 8)
return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
@@ -502,7 +509,7 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
else if (NumBits <= 64)
return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
- else {
+ else {
assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
}
@@ -514,14 +521,18 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
case Type::X86_FP80TyID:
case Type::PPC_FP128TyID:
case Type::FP128TyID: return Out << "long double " << NameSoFar;
-
+
+ case Type::X86_MMXTyID:
+ return printSimpleType(Out, Type::getInt32Ty(Ty->getContext()), isSigned,
+ " __attribute__((vector_size(64))) " + NameSoFar);
+
case Type::VectorTyID: {
const VectorType *VTy = cast<VectorType>(Ty);
return printSimpleType(Out, VTy->getElementType(), isSigned,
" __attribute__((vector_size(" +
utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
}
-
+
default:
#ifndef NDEBUG
errs() << "Unknown primitive type: " << *Ty << "\n";
@@ -575,7 +586,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
FunctionInnards << "void";
}
FunctionInnards << ')';
- printType(Out, FTy->getReturnType(),
+ printType(Out, FTy->getReturnType(),
/*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
return Out;
}
@@ -759,7 +770,7 @@ static bool isFPCSafeToPrint(const ConstantFP *CFP) {
}
/// Print out the casting for a cast operation. This does the double casting
-/// necessary for conversion to the destination type, if necessary.
+/// necessary for conversion to the destination type, if necessary.
/// @brief Print a cast
void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
// Print the destination type cast
@@ -782,7 +793,7 @@ void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
printSimpleType(Out, DstTy, false);
Out << ')';
break;
- case Instruction::SExt:
+ case Instruction::SExt:
case Instruction::FPToSI: // For these, make sure we get a signed dest
Out << '(';
printSimpleType(Out, DstTy, true);
@@ -803,7 +814,7 @@ void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
case Instruction::SIToFP:
case Instruction::SExt:
Out << '(';
- printSimpleType(Out, SrcTy, true);
+ printSimpleType(Out, SrcTy, true);
Out << ')';
break;
case Instruction::IntToPtr:
@@ -895,7 +906,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
case Instruction::AShr:
{
Out << '(';
- bool NeedsClosingParens = printConstExprCast(CE, Static);
+ bool NeedsClosingParens = printConstExprCast(CE, Static);
printConstantWithCast(CE->getOperand(0), CE->getOpcode());
switch (CE->getOpcode()) {
case Instruction::Add:
@@ -905,10 +916,10 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
case Instruction::Mul:
case Instruction::FMul: Out << " * "; break;
case Instruction::URem:
- case Instruction::SRem:
+ case Instruction::SRem:
case Instruction::FRem: Out << " % "; break;
- case Instruction::UDiv:
- case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
case Instruction::FDiv: Out << " / "; break;
case Instruction::And: Out << " & "; break;
case Instruction::Or: Out << " | "; break;
@@ -920,7 +931,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
switch (CE->getPredicate()) {
case ICmpInst::ICMP_EQ: Out << " == "; break;
case ICmpInst::ICMP_NE: Out << " != "; break;
- case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_ULT: Out << " < "; break;
case ICmpInst::ICMP_SLE:
case ICmpInst::ICMP_ULE: Out << " <= "; break;
@@ -940,8 +951,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
return;
}
case Instruction::FCmp: {
- Out << '(';
- bool NeedsClosingParens = printConstExprCast(CE, Static);
+ Out << '(';
+ bool NeedsClosingParens = printConstExprCast(CE, Static);
if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
Out << "0";
else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
@@ -1006,18 +1017,18 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
else {
Out << "((";
printSimpleType(Out, Ty, false) << ')';
- if (CI->isMinValue(true))
+ if (CI->isMinValue(true))
Out << CI->getZExtValue() << 'u';
else
Out << CI->getSExtValue();
Out << ')';
}
return;
- }
+ }
switch (CPV->getType()->getTypeID()) {
case Type::FloatTyID:
- case Type::DoubleTyID:
+ case Type::DoubleTyID:
case Type::X86_FP80TyID:
case Type::PPC_FP128TyID:
case Type::FP128TyID: {
@@ -1027,8 +1038,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
// Because of FP precision problems we must load from a stack allocated
// value that holds the value in hex.
Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ?
- "float" :
- FPC->getType() == Type::getDoubleTy(CPV->getContext()) ?
+ "float" :
+ FPC->getType() == Type::getDoubleTy(CPV->getContext()) ?
"double" :
"long double")
<< "*)&FPConstant" << I->second << ')';
@@ -1047,7 +1058,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo);
V = Tmp.convertToDouble();
}
-
+
if (IsNAN(V)) {
// The value is NaN
@@ -1211,10 +1222,10 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
// We need to cast integer arithmetic so that it is always performed
// as unsigned, to avoid undefined behavior on overflow.
case Instruction::LShr:
- case Instruction::URem:
+ case Instruction::URem:
case Instruction::UDiv: NeedsExplicitCast = true; break;
case Instruction::AShr:
- case Instruction::SRem:
+ case Instruction::SRem:
case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
case Instruction::SExt:
Ty = CE->getType();
@@ -1267,7 +1278,7 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
switch (Opcode) {
default:
// for most instructions, it doesn't matter
- break;
+ break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
@@ -1294,7 +1305,7 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
Out << ")";
printConstant(CPV, false);
Out << ")";
- } else
+ } else
printConstant(CPV, false);
}
@@ -1312,16 +1323,16 @@ std::string CWriter::GetValueName(const Value *Operand) {
Mang->getNameWithPrefix(Str, GV, false);
return CBEMangle(Str.str().str());
}
-
+
std::string Name = Operand->getName();
-
+
if (Name.empty()) { // Assign unique names to local temporaries.
unsigned &No = AnonValueNumbers[Operand];
if (No == 0)
No = ++NextAnonValueNumber;
Name = "tmp__" + utostr(No);
}
-
+
std::string VarName;
VarName.reserve(Name.capacity());
@@ -1348,7 +1359,7 @@ void CWriter::writeInstComputationInline(Instruction &I) {
// Validate this.
const Type *Ty = I.getType();
if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
- Ty!=Type::getInt8Ty(I.getContext()) &&
+ Ty!=Type::getInt8Ty(I.getContext()) &&
Ty!=Type::getInt16Ty(I.getContext()) &&
Ty!=Type::getInt32Ty(I.getContext()) &&
Ty!=Type::getInt64Ty(I.getContext()))) {
@@ -1364,12 +1375,12 @@ void CWriter::writeInstComputationInline(Instruction &I) {
if (I.getType() == Type::getInt1Ty(I.getContext()) &&
!isa<ICmpInst>(I) && !isa<FCmpInst>(I))
NeedBoolTrunc = true;
-
+
if (NeedBoolTrunc)
Out << "((";
-
+
visit(I);
-
+
if (NeedBoolTrunc)
Out << ")&1)";
}
@@ -1404,9 +1415,9 @@ void CWriter::writeOperand(Value *Operand, bool Static) {
Out << ')';
}
-// Some instructions need to have their result value casted back to the
-// original types because their operands were casted to the expected type.
-// This function takes care of detecting that case and printing the cast
+// Some instructions need to have their result value casted back to the
+// original types because their operands were casted to the expected type.
+// This function takes care of detecting that case and printing the cast
// for the Instruction.
bool CWriter::writeInstructionCast(const Instruction &I) {
const Type *Ty = I.getOperand(0)->getType();
@@ -1417,15 +1428,15 @@ bool CWriter::writeInstructionCast(const Instruction &I) {
// We need to cast integer arithmetic so that it is always performed
// as unsigned, to avoid undefined behavior on overflow.
case Instruction::LShr:
- case Instruction::URem:
- case Instruction::UDiv:
+ case Instruction::URem:
+ case Instruction::UDiv:
Out << "((";
printSimpleType(Out, Ty, false);
Out << ")(";
return true;
case Instruction::AShr:
- case Instruction::SRem:
- case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::SDiv:
Out << "((";
printSimpleType(Out, Ty, true);
Out << ")(";
@@ -1437,7 +1448,7 @@ bool CWriter::writeInstructionCast(const Instruction &I) {
// Write the operand with a cast to another type based on the Opcode being used.
// This will be used in cases where an instruction has specific type
-// requirements (usually signedness) for its operands.
+// requirements (usually signedness) for its operands.
void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
// Extract the operand's type, we'll need it.
@@ -1455,7 +1466,7 @@ void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
switch (Opcode) {
default:
// for most instructions, it doesn't matter
- break;
+ break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
@@ -1484,14 +1495,14 @@ void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
Out << ")";
writeOperand(Operand);
Out << ")";
- } else
+ } else
writeOperand(Operand);
}
-// Write the operand with a cast to another type based on the icmp predicate
-// being used.
+// Write the operand with a cast to another type based on the icmp predicate
+// being used.
void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
- // This has to do a cast to ensure the operand has the right signedness.
+ // This has to do a cast to ensure the operand has the right signedness.
// Also, if the operand is a pointer, we make sure to cast to an integer when
// doing the comparison both for signedness and so that the C compiler doesn't
// optimize things like "p < NULL" to false (p may contain an integer value
@@ -1504,7 +1515,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
writeOperand(Operand);
return;
}
-
+
// Should this be a signed comparison? If so, convert to signed.
bool castIsSigned = Cmp.isSigned();
@@ -1512,7 +1523,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
const Type* OpTy = Operand->getType();
if (OpTy->isPointerTy())
OpTy = TD->getIntPtrType(Operand->getContext());
-
+
Out << "((";
printSimpleType(Out, OpTy, castIsSigned);
Out << ")";
@@ -1579,7 +1590,7 @@ static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
Out << "#if defined(__GNUC__)\n"
<< "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
<< "#endif\n\n";
-
+
// Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
// From the GCC documentation:
//
@@ -1635,7 +1646,7 @@ static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
<< "#define __ATTRIBUTE_DTOR__\n"
<< "#define LLVM_ASM(X)\n"
<< "#endif\n\n";
-
+
Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
<< "#define __builtin_stack_save() 0 /* not implemented */\n"
<< "#define __builtin_stack_restore(X) /* noop */\n"
@@ -1658,11 +1669,11 @@ static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){
ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
if (!InitList) return;
-
+
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
-
+
if (CS->getOperand(1)->isNullValue())
return; // Found a null terminator, exit printing.
Constant *FP = CS->getOperand(1);
@@ -1690,12 +1701,12 @@ static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
else if (GV->getName() == "llvm.global_dtors")
return GlobalDtors;
}
-
+
// Otherwise, if it is other metadata, don't print it. This catches things
// like debug information.
if (GV->getSection() == "llvm.metadata")
return NotPrinted;
-
+
return NotSpecial;
}
@@ -1726,7 +1737,7 @@ static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
bool CWriter::doInitialization(Module &M) {
FunctionPass::doInitialization(M);
-
+
// Initialize
TheModule = &M;
@@ -1738,13 +1749,13 @@ bool CWriter::doInitialization(Module &M) {
std::string Triple = TheModule->getTargetTriple();
if (Triple.empty())
Triple = llvm::sys::getHostTriple();
-
+
std::string E;
if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
TAsm = Match->createAsmInfo(Triple);
-#endif
+#endif
TAsm = new CBEMCAsmInfo();
- TCtx = new MCContext(*TAsm);
+ TCtx = new MCContext(*TAsm, NULL);
Mang = new Mangler(*TCtx, *TD);
// Keep track of which functions are static ctors/dtors so they can have
@@ -1762,7 +1773,7 @@ bool CWriter::doInitialization(Module &M) {
break;
}
}
-
+
// get declaration for alloca
Out << "/* Provide Declarations */\n";
Out << "#include <stdarg.h>\n"; // Varargs support
@@ -1819,7 +1830,7 @@ bool CWriter::doInitialization(Module &M) {
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
- if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() ||
+ if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() ||
I->hasCommonLinkage())
Out << "extern ";
else if (I->hasDLLImportLinkage())
@@ -1844,7 +1855,7 @@ bool CWriter::doInitialization(Module &M) {
Out << "double fmod(double, double);\n"; // Support for FP rem
Out << "float fmodf(float, float);\n";
Out << "long double fmodl(long double, long double);\n";
-
+
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
// Don't print declarations for intrinsic functions.
if (!I->isIntrinsic() && I->getName() != "setjmp" &&
@@ -1852,7 +1863,7 @@ bool CWriter::doInitialization(Module &M) {
if (I->hasExternalWeakLinkage())
Out << "extern ";
printFunctionSignature(I, true);
- if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+ if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
Out << " __ATTRIBUTE_WEAK__";
if (I->hasExternalWeakLinkage())
Out << " __EXTERNAL_WEAK__";
@@ -1862,10 +1873,10 @@ bool CWriter::doInitialization(Module &M) {
Out << " __ATTRIBUTE_DTOR__";
if (I->hasHiddenVisibility())
Out << " __HIDDEN__";
-
+
if (I->hasName() && I->getName()[0] == 1)
Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
-
+
Out << ";\n";
}
}
@@ -1889,7 +1900,7 @@ bool CWriter::doInitialization(Module &M) {
if (I->isThreadLocal())
Out << "__thread ";
- printType(Out, I->getType()->getElementType(), false,
+ printType(Out, I->getType()->getElementType(), false,
GetValueName(I));
if (I->hasLinkOnceLinkage())
@@ -1909,7 +1920,7 @@ bool CWriter::doInitialization(Module &M) {
// Output the global variable definitions and contents...
if (!M.global_empty()) {
Out << "\n\n/* Global Variable Definitions and Initialization */\n";
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I)
if (!I->isDeclaration()) {
// Ignore special globals, such as debug info.
@@ -1927,7 +1938,7 @@ bool CWriter::doInitialization(Module &M) {
if (I->isThreadLocal())
Out << "__thread ";
- printType(Out, I->getType()->getElementType(), false,
+ printType(Out, I->getType()->getElementType(), false,
GetValueName(I));
if (I->hasLinkOnceLinkage())
Out << " __attribute__((common))";
@@ -1938,7 +1949,7 @@ bool CWriter::doInitialization(Module &M) {
if (I->hasHiddenVisibility())
Out << " __HIDDEN__";
-
+
// If the initializer is not null, emit the initializer. If it is null,
// we try to avoid emitting large amounts of zeros. The problem with
// this, however, occurs when the variable has weak linkage. In this
@@ -1972,7 +1983,7 @@ bool CWriter::doInitialization(Module &M) {
if (!M.empty())
Out << "\n\n/* Function Bodies */\n";
- // Emit some helper functions for dealing with FCMP instruction's
+ // Emit some helper functions for dealing with FCMP instruction's
// predicates
Out << "static inline int llvm_fcmp_ord(double X, double Y) { ";
Out << "return X == X && Y == Y; }\n";
@@ -2027,7 +2038,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
printFloatingPointConstants(CE->getOperand(i));
return;
}
-
+
// Otherwise, check for a FP constant that we need to print.
const ConstantFP *FPC = dyn_cast<ConstantFP>(C);
if (FPC == 0 ||
@@ -2038,7 +2049,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
return;
FPConstantMap[FPC] = FPCounter; // Number the FP constants
-
+
if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) {
double Val = FPC->getValueAPF().convertToDouble();
uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
@@ -2057,7 +2068,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
APInt api = FPC->getValueAPF().bitcastToAPInt();
const uint64_t *p = api.getRawData();
Out << "static const ConstantFP80Ty FPConstant" << FPCounter++
- << " = { 0x" << utohexstr(p[0])
+ << " = { 0x" << utohexstr(p[0])
<< "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
<< "}; /* Long double constant */\n";
} else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) ||
@@ -2068,7 +2079,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
<< " = { 0x"
<< utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
<< "}; /* Long double constant */\n";
-
+
} else {
llvm_unreachable("Unknown float type!");
}
@@ -2140,12 +2151,12 @@ void CWriter::printContainedStructs(const Type *Ty,
// Don't walk through pointers.
if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
return;
-
+
// Print all contained types first.
for (Type::subtype_iterator I = Ty->subtype_begin(),
E = Ty->subtype_end(); I != E; ++I)
printContainedStructs(*I, StructPrinted);
-
+
if (Ty->isStructTy() || Ty->isArrayTy()) {
// Check to see if we have already printed this struct.
if (StructPrinted.insert(Ty).second) {
@@ -2160,10 +2171,10 @@ void CWriter::printContainedStructs(const Type *Ty,
void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
/// isStructReturn - Should this function actually return a struct by-value?
bool isStructReturn = F->hasStructRetAttr();
-
+
if (F->hasLocalLinkage()) Out << "static ";
if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) ";
- if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
+ if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
switch (F->getCallingConv()) {
case CallingConv::X86_StdCall:
Out << "__attribute__((stdcall)) ";
@@ -2177,7 +2188,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
default:
break;
}
-
+
// Loop over the arguments, printing them...
const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
const AttrListPtr &PAL = F->getAttributes();
@@ -2193,7 +2204,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
if (!F->arg_empty()) {
Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
unsigned Idx = 1;
-
+
// If this is a struct-return function, don't print the hidden
// struct-return argument.
if (isStructReturn) {
@@ -2201,7 +2212,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
++I;
++Idx;
}
-
+
std::string ArgName;
for (; I != E; ++I) {
if (PrintedArg) FunctionInnards << ", ";
@@ -2225,7 +2236,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
// Loop over the arguments, printing them.
FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
unsigned Idx = 1;
-
+
// If this is a struct-return function, don't print the hidden
// struct-return argument.
if (isStructReturn) {
@@ -2233,7 +2244,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
++I;
++Idx;
}
-
+
for (; I != E; ++I) {
if (PrintedArg) FunctionInnards << ", ";
const Type *ArgTy = *I;
@@ -2262,7 +2273,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
FunctionInnards << "void"; // ret() -> ret(void) in C.
}
FunctionInnards << ')';
-
+
// Get the return tpe for the function.
const Type *RetTy;
if (!isStructReturn)
@@ -2271,9 +2282,9 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
// If this is a struct-return function, print the struct-return type.
RetTy = cast<PointerType>(FT->getParamType(0))->getElementType();
}
-
+
// Print out the return type and the signature built above.
- printType(Out, RetTy,
+ printType(Out, RetTy,
/*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt),
FunctionInnards.str());
}
@@ -2293,7 +2304,7 @@ void CWriter::printFunction(Function &F) {
printFunctionSignature(&F, false);
Out << " {\n";
-
+
// If this is a struct return function, handle the result with magic.
if (isStructReturn) {
const Type *StructTy =
@@ -2303,13 +2314,13 @@ void CWriter::printFunction(Function &F) {
Out << "; /* Struct return temporary */\n";
Out << " ";
- printType(Out, F.arg_begin()->getType(), false,
+ printType(Out, F.arg_begin()->getType(), false,
GetValueName(F.arg_begin()));
Out << " = &StructReturn;\n";
}
bool PrintedVar = false;
-
+
// print local variable information for the function
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
if (const AllocaInst *AI = isDirectAlloca(&*I)) {
@@ -2317,7 +2328,7 @@ void CWriter::printFunction(Function &F) {
printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
Out << "; /* Address-exposed local */\n";
PrintedVar = true;
- } else if (I->getType() != Type::getVoidTy(F.getContext()) &&
+ } else if (I->getType() != Type::getVoidTy(F.getContext()) &&
!isInlinableInst(*I)) {
Out << " ";
printType(Out, I->getType(), false, GetValueName(&*I));
@@ -2333,7 +2344,7 @@ void CWriter::printFunction(Function &F) {
}
// We need a temporary for the BitCast to use so it can pluck a value out
// of a union to do the BitCast. This is separate from the need for a
- // variable to hold the result of the BitCast.
+ // variable to hold the result of the BitCast.
if (isFPIntBitCast(*I)) {
Out << " llvmBitCastUnion " << GetValueName(&*I)
<< "__BITCAST_TEMPORARY;\n";
@@ -2421,7 +2432,7 @@ void CWriter::visitReturnInst(ReturnInst &I) {
Out << " return StructReturn;\n";
return;
}
-
+
// Don't output a void return if this is the last basic block in the function
if (I.getNumOperands() == 0 &&
&*--I.getParent()->getParent()->end() == I.getParent() &&
@@ -2578,7 +2589,7 @@ void CWriter::visitBinaryOperator(Instruction &I) {
// We must cast the results of binary operations which might be promoted.
bool needsCast = false;
if ((I.getType() == Type::getInt8Ty(I.getContext())) ||
- (I.getType() == Type::getInt16Ty(I.getContext()))
+ (I.getType() == Type::getInt16Ty(I.getContext()))
|| (I.getType() == Type::getFloatTy(I.getContext()))) {
needsCast = true;
Out << "((";
@@ -2630,7 +2641,7 @@ void CWriter::visitBinaryOperator(Instruction &I) {
case Instruction::SRem:
case Instruction::FRem: Out << " % "; break;
case Instruction::UDiv:
- case Instruction::SDiv:
+ case Instruction::SDiv:
case Instruction::FDiv: Out << " / "; break;
case Instruction::And: Out << " & "; break;
case Instruction::Or: Out << " | "; break;
@@ -2638,7 +2649,7 @@ void CWriter::visitBinaryOperator(Instruction &I) {
case Instruction::Shl : Out << " << "; break;
case Instruction::LShr:
case Instruction::AShr: Out << " >> "; break;
- default:
+ default:
#ifndef NDEBUG
errs() << "Invalid operator type!" << I;
#endif
@@ -2681,7 +2692,7 @@ void CWriter::visitICmpInst(ICmpInst &I) {
case ICmpInst::ICMP_SGT: Out << " > "; break;
default:
#ifndef NDEBUG
- errs() << "Invalid icmp predicate!" << I;
+ errs() << "Invalid icmp predicate!" << I;
#endif
llvm_unreachable(0);
}
@@ -2754,7 +2765,7 @@ void CWriter::visitCastInst(CastInst &I) {
if (isFPIntBitCast(I)) {
Out << '(';
// These int<->float and long<->double casts need to be handled specially
- Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
+ Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
<< getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
writeOperand(I.getOperand(0));
Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY."
@@ -2762,7 +2773,7 @@ void CWriter::visitCastInst(CastInst &I) {
Out << ')';
return;
}
-
+
Out << '(';
printCast(I.getOpcode(), SrcTy, DstTy);
@@ -2770,15 +2781,15 @@ void CWriter::visitCastInst(CastInst &I) {
if (SrcTy == Type::getInt1Ty(I.getContext()) &&
I.getOpcode() == Instruction::SExt)
Out << "0-";
-
+
writeOperand(I.getOperand(0));
-
- if (DstTy == Type::getInt1Ty(I.getContext()) &&
+
+ if (DstTy == Type::getInt1Ty(I.getContext()) &&
(I.getOpcode() == Instruction::Trunc ||
I.getOpcode() == Instruction::FPToUI ||
I.getOpcode() == Instruction::FPToSI ||
I.getOpcode() == Instruction::PtrToInt)) {
- // Make sure we really get a trunc to bool by anding the operand with 1
+ // Make sure we really get a trunc to bool by anding the operand with 1
Out << "&1u";
}
Out << ')';
@@ -2835,7 +2846,7 @@ void CWriter::lowerIntrinsics(Function &F) {
#undef GET_GCC_BUILTIN_NAME
// If we handle it, don't lower it.
if (BuiltinName[0]) break;
-
+
// All other intrinsic calls we must lower.
Instruction *Before = 0;
if (CI != &BB->front())
@@ -2858,7 +2869,7 @@ void CWriter::lowerIntrinsics(Function &F) {
break;
}
- // We may have collected some prototypes to emit in the loop above.
+ // We may have collected some prototypes to emit in the loop above.
// Emit them now, before the function that uses them is emitted. But,
// be careful not to emit them twice.
std::vector<Function*>::iterator I = prototypesToGen.begin();
@@ -2898,9 +2909,9 @@ void CWriter::visitCallInst(CallInst &I) {
writeOperandDeref(I.getArgOperand(0));
Out << " = ";
}
-
+
if (I.isTailCall()) Out << " /*tail*/ ";
-
+
if (!WroteCallee) {
// If this is an indirect call to a struct return function, we need to cast
// the pointer. Ditto for indirect calls with byval arguments.
@@ -2924,7 +2935,7 @@ void CWriter::visitCallInst(CallInst &I) {
NeedsCast = true;
Callee = RF;
}
-
+
if (NeedsCast) {
// Ok, just cast the pointer type.
Out << "((";
@@ -2957,14 +2968,14 @@ void CWriter::visitCallInst(CallInst &I) {
++AI;
++ArgNo;
}
-
+
for (; AI != AE; ++AI, ++ArgNo) {
if (PrintedArg) Out << ", ";
if (ArgNo < NumDeclaredParams &&
(*AI)->getType() != FTy->getParamType(ArgNo)) {
Out << '(';
- printType(Out, FTy->getParamType(ArgNo),
+ printType(Out, FTy->getParamType(ArgNo),
/*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt));
Out << ')';
}
@@ -2993,7 +3004,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
#include "llvm/Intrinsics.gen"
#undef GET_GCC_BUILTIN_NAME
assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
-
+
Out << BuiltinName;
WroteCallee = true;
return false;
@@ -3003,7 +3014,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
return true;
case Intrinsic::vastart:
Out << "0; ";
-
+
Out << "va_start(*(va_list*)";
writeOperand(I.getArgOperand(0));
Out << ", ";
@@ -3081,7 +3092,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
case Intrinsic::x86_sse2_cmp_pd:
Out << '(';
printType(Out, I.getType());
- Out << ')';
+ Out << ')';
// Multiple GCC builtins multiplex onto this intrinsic.
switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) {
default: llvm_unreachable("Invalid llvm.x86.sse.cmp!");
@@ -3102,7 +3113,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
Out << 's';
else
Out << 'd';
-
+
Out << "(";
writeOperand(I.getArgOperand(0));
Out << ", ";
@@ -3112,7 +3123,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
case Intrinsic::ppc_altivec_lvsl:
Out << '(';
printType(Out, I.getType());
- Out << ')';
+ Out << ')';
Out << "__builtin_altivec_lvsl(0, (void*)";
writeOperand(I.getArgOperand(0));
Out << ")";
@@ -3132,13 +3143,13 @@ std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
std::string Triple = TheModule->getTargetTriple();
if (Triple.empty())
Triple = llvm::sys::getHostTriple();
-
+
std::string E;
if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
TargetAsm = Match->createAsmInfo(Triple);
else
return c.Codes[0];
-
+
const char *const *table = TargetAsm->getAsmCBE();
// Search the translation table if it exists.
@@ -3164,7 +3175,7 @@ static std::string gccifyAsm(std::string asmstr) {
if (asmstr[i + 1] == '{') {
std::string::size_type a = asmstr.find_first_of(':', i + 1);
std::string::size_type b = asmstr.find_first_of('}', i + 1);
- std::string n = "%" +
+ std::string n = "%" +
asmstr.substr(a + 1, b - a - 1) +
asmstr.substr(i + 2, a - i - 2);
asmstr.replace(i, b - i + 1, n);
@@ -3174,7 +3185,7 @@ static std::string gccifyAsm(std::string asmstr) {
}
else if (asmstr[i] == '%')//grr
{ asmstr.replace(i, 1, "%%"); ++i;}
-
+
return asmstr;
}
@@ -3182,8 +3193,8 @@ static std::string gccifyAsm(std::string asmstr) {
// handle communitivity
void CWriter::visitInlineAsm(CallInst &CI) {
InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
- std::vector<InlineAsm::ConstraintInfo> Constraints = as->ParseConstraints();
-
+ InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints();
+
std::vector<std::pair<Value*, int> > ResultVals;
if (CI.getType() == Type::getVoidTy(CI.getContext()))
;
@@ -3193,27 +3204,27 @@ void CWriter::visitInlineAsm(CallInst &CI) {
} else {
ResultVals.push_back(std::make_pair(&CI, -1));
}
-
+
// Fix up the asm string for gcc and emit it.
Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
Out << " :";
unsigned ValueCount = 0;
bool IsFirst = true;
-
+
// Convert over all the output constraints.
- for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+ for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
E = Constraints.end(); I != E; ++I) {
-
+
if (I->Type != InlineAsm::isOutput) {
++ValueCount;
continue; // Ignore non-output constraints.
}
-
+
assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
std::string C = InterpretASMConstraint(*I);
if (C.empty()) continue;
-
+
if (!IsFirst) {
Out << ", ";
IsFirst = false;
@@ -3222,7 +3233,7 @@ void CWriter::visitInlineAsm(CallInst &CI) {
// Unpack the dest.
Value *DestVal;
int DestValNo = -1;
-
+
if (ValueCount < ResultVals.size()) {
DestVal = ResultVals[ValueCount].first;
DestValNo = ResultVals[ValueCount].second;
@@ -3231,38 +3242,38 @@ void CWriter::visitInlineAsm(CallInst &CI) {
if (I->isEarlyClobber)
C = "&"+C;
-
+
Out << "\"=" << C << "\"(" << GetValueName(DestVal);
if (DestValNo != -1)
Out << ".field" << DestValNo; // Multiple retvals.
Out << ")";
++ValueCount;
}
-
-
+
+
// Convert over all the input constraints.
Out << "\n :";
IsFirst = true;
ValueCount = 0;
- for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+ for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
E = Constraints.end(); I != E; ++I) {
if (I->Type != InlineAsm::isInput) {
++ValueCount;
continue; // Ignore non-input constraints.
}
-
+
assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
std::string C = InterpretASMConstraint(*I);
if (C.empty()) continue;
-
+
if (!IsFirst) {
Out << ", ";
IsFirst = false;
}
-
+
assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
-
+
Out << "\"" << C << "\"(";
if (!I->isIndirect)
writeOperand(SrcVal);
@@ -3270,10 +3281,10 @@ void CWriter::visitInlineAsm(CallInst &CI) {
writeOperandDeref(SrcVal);
Out << ")";
}
-
+
// Convert over the clobber constraints.
IsFirst = true;
- for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+ for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
E = Constraints.end(); I != E; ++I) {
if (I->Type != InlineAsm::isClobber)
continue; // Ignore non-input constraints.
@@ -3281,15 +3292,15 @@ void CWriter::visitInlineAsm(CallInst &CI) {
assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
std::string C = InterpretASMConstraint(*I);
if (C.empty()) continue;
-
+
if (!IsFirst) {
Out << ", ";
IsFirst = false;
}
-
+
Out << '\"' << C << '"';
}
-
+
Out << ")";
}
@@ -3308,13 +3319,13 @@ void CWriter::visitAllocaInst(AllocaInst &I) {
void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
gep_type_iterator E, bool Static) {
-
+
// If there are no indices, just print out the pointer.
if (I == E) {
writeOperand(Ptr);
return;
}
-
+
// Find out if the last index is into a vector. If so, we have to print this
// specially. Since vectors can't have elements of indexable type, only the
// last index could possibly be of a vector element.
@@ -3323,9 +3334,9 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
}
-
+
Out << "(";
-
+
// If the last index is into a vector, we can't print it as &a[i][j] because
// we can't index into a vector with j in GCC. Instead, emit this as
// (((float*)&a[i])+j)
@@ -3334,7 +3345,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType()));
Out << ")(";
}
-
+
Out << '&';
// If the first index is 0 (very typical) we can do a number of
@@ -3444,7 +3455,7 @@ void CWriter::visitStoreInst(StoreInst &I) {
if (BitMask) {
Out << ") & ";
printConstant(BitMask, false);
- Out << ")";
+ Out << ")";
}
}
@@ -3477,7 +3488,7 @@ void CWriter::visitInsertElementInst(InsertElementInst &I) {
void CWriter::visitExtractElementInst(ExtractElementInst &I) {
// We know that our operand is not inlined.
Out << "((";
- const Type *EltTy =
+ const Type *EltTy =
cast<VectorType>(I.getOperand(0)->getType())->getElementType();
printType(Out, PointerType::getUnqual(EltTy));
Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU.h b/contrib/llvm/lib/Target/CellSPU/SPU.h
index 1f21511..72f8430 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPU.h
+++ b/contrib/llvm/lib/Target/CellSPU/SPU.h
@@ -23,6 +23,7 @@ namespace llvm {
class formatted_raw_ostream;
FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
+ FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
extern Target TheCellSPUTarget;
}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td
index 069a182..5ef5716 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -54,8 +54,8 @@ class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
// The i64 seteq fragment that does the scalar->vector conversion and
// comparison:
def CEQr64compare:
- CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
- (ORv2i64_i64 R64C:$rB))), 0xb)>;
+ CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
// The i64 seteq fragment that does the vector comparison
def CEQv2i64compare:
@@ -67,12 +67,14 @@ def CEQv2i64compare:
// v2i64 seteq (equality): the setcc result is v4i32
multiclass CompareEqual64 {
// Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
- def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
// SELB mask from FSM:
- def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
- def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CEQr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
}
defm I64EQ: CompareEqual64;
@@ -89,10 +91,12 @@ def : I64SELECTNegCond<setne, I64EQr64>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def CLGTr64ugt:
- CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+ CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
def CLGTr64eq:
- CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+ CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
def CLGTr64compare:
CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
@@ -112,12 +116,14 @@ def CLGTv2i64compare:
multiclass CompareLogicalGreaterThan64 {
// Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>;
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
// SELB mask from FSM:
- def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>;
- def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>;
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
}
defm I64LGT: CompareLogicalGreaterThan64;
@@ -144,12 +150,14 @@ def CLGEv2i64compare:
multiclass CompareLogicalGreaterEqual64 {
// Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>;
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
// SELB mask from FSM:
- def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>;
- def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>;
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
}
defm I64LGE: CompareLogicalGreaterEqual64;
@@ -168,10 +176,12 @@ def : I64SELECTNegCond<setult, I64LGEr64>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def CGTr64sgt:
- CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+ CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
def CGTr64eq:
- CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+ CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
def CGTr64compare:
CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
@@ -191,12 +201,14 @@ def CGTv2i64compare:
multiclass CompareGreaterThan64 {
// Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>;
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
// SELB mask from FSM:
- def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>;
- def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>;
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CGTr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
}
defm I64GT: CompareLogicalGreaterThan64;
@@ -223,12 +235,12 @@ def CGEv2i64compare:
multiclass CompareGreaterEqual64 {
// Plain old comparison, converts back to i32 scalar
- def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>;
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
// SELB mask from FSM:
- def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>;
- def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>;
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
}
defm I64GE: CompareGreaterEqual64;
@@ -255,9 +267,9 @@ class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
- (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA),
- (ORv2i64_i64 R64C:$rB),
- (v4i32 VECREG:$rCGmask)>.Fragment)>;
+ (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)),
@@ -275,11 +287,12 @@ class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
- (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA),
- (ORv2i64_i64 R64C:$rB),
- v2i64_sub_bg<(ORv2i64_i64 R64C:$rA),
- (ORv2i64_i64 R64C:$rB)>.Fragment,
- (v4i32 VECREG:$rCGmask)>.Fragment)>;
+ (COPY_TO_REGCLASS
+ v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)),
@@ -374,9 +387,9 @@ class v2i64_mul<dag rA, dag rB, dag rCGmask>:
rCGmask>;
def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
- (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA),
- (ORv2i64_i64 R64C:$rB),
- (v4i32 VECREG:$rCGmask)>.Fragment)>;
+ (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
(v4i32 VECREG:$rCGmask)),
diff --git a/contrib/llvm/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
index 3e95531..4040461 100644
--- a/contrib/llvm/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -46,10 +46,6 @@ namespace {
return "STI CBEA SPU Assembly Printer";
}
- SPUTargetMachine &getTM() {
- return static_cast<SPUTargetMachine&>(TM);
- }
-
/// printInstruction - This method is automatically generated by tablegen
/// from the instruction set description.
void printInstruction(const MachineInstr *MI, raw_ostream &OS);
@@ -64,15 +60,6 @@ namespace {
}
void printOp(const MachineOperand &MO, raw_ostream &OS);
- /// printRegister - Print register according to target requirements.
- ///
- void printRegister(const MachineOperand &MO, bool R0AsZero, raw_ostream &O){
- unsigned RegNo = MO.getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(RegNo) &&
- "Not physreg??");
- O << getRegisterName(RegNo);
- }
-
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNo);
if (MO.isReg()) {
@@ -93,17 +80,6 @@ namespace {
void
- printS7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- int value = MI->getOperand(OpNo).getImm();
- value = (value << (32 - 7)) >> (32 - 7);
-
- assert((value >= -(1 << 8) && value <= (1 << 7) - 1)
- && "Invalid s7 argument");
- O << value;
- }
-
- void
printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
{
unsigned int value = MI->getOperand(OpNo).getImm();
@@ -134,12 +110,6 @@ namespace {
}
void
- printU32ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
- {
- O << (unsigned)MI->getOperand(OpNo).getImm();
- }
-
- void
printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
// When used as the base register, r0 reads constant zero rather than
// the value contained in the register. For this reason, the darwin
@@ -221,13 +191,6 @@ namespace {
printOp(MI->getOperand(OpNo), O);
}
- void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- // HBR operands are generated in front of branches, hence, the
- // program counter plus the target.
- O << ".+";
- printOp(MI->getOperand(OpNo), O);
- }
-
void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
if (MI->getOperand(OpNo).isImm()) {
printS16ImmOperand(MI, OpNo, O);
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUFrameInfo.cpp
deleted file mode 100644
index 60d7ba7..0000000
--- a/contrib/llvm/lib/Target/CellSPU/SPUFrameInfo.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Top-level implementation for the Cell SPU target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPU.h"
-#include "SPUFrameInfo.h"
-#include "SPURegisterNames.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// SPUFrameInfo:
-//===----------------------------------------------------------------------===//
-
-SPUFrameInfo::SPUFrameInfo(const TargetMachine &tm):
- TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
- TM(tm)
-{
- LR[0].first = SPU::R0;
- LR[0].second = 16;
-}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
new file mode 100644
index 0000000..432f4a1
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -0,0 +1,276 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUFrameLowering.h"
+#include "SPURegisterNames.h"
+#include "SPUInstrBuilder.h"
+#include "SPUInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// SPUFrameLowering:
+//===----------------------------------------------------------------------===//
+
+SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+ Subtarget(sti) {
+ LR[0].first = SPU::R0;
+ LR[0].second = 16;
+}
+
+
+//--------------------------------------------------------------------------
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register. This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ return MFI->getStackSize() &&
+ (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
+}
+
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned TargetAlign = getStackAlignment();
+ unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
+ assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
+ unsigned AlignMask = Align - 1;
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SPUInstrInfo &TII =
+ *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineModuleInfo &MMI = MF.getMMI();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Prepare for debug frame info.
+ bool hasDebugInfo = MMI.hasDebugInfo();
+ MCSymbol *FrameLabel = 0;
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ determineFrameLayout(MF);
+ int FrameSize = MFI->getStackSize();
+
+ assert((FrameSize & 0xf) == 0
+ && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
+
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->adjustsStack()) {
+ FrameSize = -(FrameSize + SPUFrameLowering::minStackSize());
+ if (hasDebugInfo) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
+ }
+
+ // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
+ // for the ABI
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
+ .addReg(SPU::R1);
+ if (isInt<10>(FrameSize)) {
+ // Spill $sp to adjusted $sp
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
+ .addReg(SPU::R1);
+ // Adjust $sp by required amout
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (isInt<16>(FrameSize)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(-16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
+ .addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+ }
+
+ if (hasDebugInfo) {
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ // Show update of SP.
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == SPU::R0) continue;
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+ }
+
+ // Mark effective beginning of when frame pointer is ready.
+ MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
+
+ MachineLocation FPDst(SPU::R1);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+ }
+ } else {
+ // This is a leaf function -- insert a branch hint iff there are
+ // sufficient number instructions in the basic block. Note that
+ // this is just a best guess based on the basic block's size.
+ if (MBB.size() >= (unsigned) SPUFrameLowering::branchHintPenalty()) {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ dl = MBBI->getDebugLoc();
+
+ // Insert terminator label
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
+ .addSym(MMI.getContext().CreateTempSymbol());
+ }
+ }
+}
+
+void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SPUInstrInfo &TII =
+ *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FrameSize = MFI->getStackSize();
+ int LinkSlotOffset = SPUFrameLowering::stackSlotSize();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ assert(MBBI->getOpcode() == SPU::RET &&
+ "Can only insert epilog into returning blocks");
+ assert((FrameSize & 0xf) == 0 && "FrameSize not aligned");
+
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->adjustsStack()) {
+ FrameSize = FrameSize + SPUFrameLowering::minStackSize();
+ if (isInt<10>(FrameSize + LinkSlotOffset)) {
+ // Reload $lr, adjust $sp by required amount
+ // Note: We do this to slightly improve dual issue -- not by much, but it
+ // is an opportunity for dual issue.
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(FrameSize + LinkSlotOffset)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
+ .addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
+ addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+ }
+ }
+}
+
+void SPUFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(SPU::R1, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const{
+ // Mark LR and SP unused, since the prolog spills them to stack and
+ // we don't want anyone else to spill them for us.
+ //
+ // Also, unless R2 is really used someday, don't spill it automatically.
+ MF.getRegInfo().setPhysRegUnused(SPU::R0);
+ MF.getRegInfo().setPhysRegUnused(SPU::R1);
+ MF.getRegInfo().setPhysRegUnused(SPU::R2);
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterClass *RC = &SPU::R32CRegClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameInfo.h b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
index f511acd..4fee72d 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUFrameInfo.h
+++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
@@ -1,4 +1,4 @@
-//===-- SPUFrameInfo.h - Top-level interface for Cell SPU Target -*- C++ -*-==//
+//=====-- SPUFrameLowering.h - SPU Frame Lowering stuff -*- C++ -*----========//
//
// The LLVM Compiler Infrastructure
//
@@ -12,19 +12,39 @@
//
//===----------------------------------------------------------------------===//
-#if !defined(SPUFRAMEINFO_H)
+#ifndef SPU_FRAMEINFO_H
+#define SPU_FRAMEINFO_H
-#include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
- class SPUFrameInfo: public TargetFrameInfo {
- const TargetMachine &TM;
+ class SPUSubtarget;
+
+ class SPUFrameLowering: public TargetFrameLowering {
+ const SPUSubtarget &Subtarget;
std::pair<unsigned, int> LR[1];
public:
- SPUFrameInfo(const TargetMachine &tm);
+ SPUFrameLowering(const SPUSubtarget &sti);
+
+ //! Determine the frame's layour
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ //! Prediate: Target has dedicated frame pointer
+ bool hasFP(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ //! Perform target-specific stack frame setup.
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
//! Return a function's saved spill slots
/*!
@@ -71,5 +91,4 @@ namespace llvm {
};
}
-#define SPUFRAMEINFO_H 1
#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp
index 9dbab1d..403d7ef 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp
@@ -41,12 +41,14 @@ SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) :
///
/// \return NoHazard
ScheduleHazardRecognizer::HazardType
-SPUHazardRecognizer::getHazardType(SUnit *SU)
+SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls)
{
// Initial thoughts on how to do this, but this code cannot work unless the
// function's prolog and epilog code are also being scheduled so that we can
// accurately determine which pipeline is being scheduled.
#if 0
+ assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead");
+
const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
ScheduleHazardRecognizer::HazardType retval = NoHazard;
bool mustBeOdd = false;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h
index d0ae2d8..675632c 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h
+++ b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h
@@ -20,7 +20,7 @@
namespace llvm {
class TargetInstrInfo;
-
+
/// SPUHazardRecognizer
class SPUHazardRecognizer : public ScheduleHazardRecognizer
{
@@ -30,7 +30,7 @@ private:
public:
SPUHazardRecognizer(const TargetInstrInfo &TII);
- virtual HazardType getHazardType(SUnit *SU);
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void EmitInstruction(SUnit *SU);
virtual void AdvanceCycle();
virtual void EmitNoop();
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 2f15984..d226156 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -15,7 +15,7 @@
#include "SPU.h"
#include "SPUTargetMachine.h"
#include "SPUHazardRecognizers.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
#include "SPURegisterNames.h"
#include "SPUTargetMachine.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -111,55 +111,6 @@ namespace {
return false;
}
- //===------------------------------------------------------------------===//
- //! EVT to "useful stuff" mapping structure:
-
- struct valtype_map_s {
- EVT VT;
- unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined)
- bool ldresult_imm; /// LDRESULT instruction requires immediate?
- unsigned lrinst; /// LR instruction
- };
-
- const valtype_map_s valtype_map[] = {
- { MVT::i8, SPU::ORBIr8, true, SPU::LRr8 },
- { MVT::i16, SPU::ORHIr16, true, SPU::LRr16 },
- { MVT::i32, SPU::ORIr32, true, SPU::LRr32 },
- { MVT::i64, SPU::ORr64, false, SPU::LRr64 },
- { MVT::f32, SPU::ORf32, false, SPU::LRf32 },
- { MVT::f64, SPU::ORf64, false, SPU::LRf64 },
- // vector types... (sigh!)
- { MVT::v16i8, 0, false, SPU::LRv16i8 },
- { MVT::v8i16, 0, false, SPU::LRv8i16 },
- { MVT::v4i32, 0, false, SPU::LRv4i32 },
- { MVT::v2i64, 0, false, SPU::LRv2i64 },
- { MVT::v4f32, 0, false, SPU::LRv4f32 },
- { MVT::v2f64, 0, false, SPU::LRv2f64 }
- };
-
- const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
-
- const valtype_map_s *getValueTypeMapEntry(EVT VT)
- {
- const valtype_map_s *retval = 0;
- for (size_t i = 0; i < n_valtype_map; ++i) {
- if (valtype_map[i].VT == VT) {
- retval = valtype_map + i;
- break;
- }
- }
-
-
-#ifndef NDEBUG
- if (retval == 0) {
- report_fatal_error("SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns"
- "NULL for " + Twine(VT.getEVTString()));
- }
-#endif
-
- return retval;
- }
-
//! Generate the carry-generate shuffle mask.
SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
SmallVector<SDValue, 16 > ShufBytes;
@@ -221,16 +172,10 @@ namespace {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
- /// getI64Imm - Return a target constant with the specified value, of type
- /// i64.
- inline SDValue getI64Imm(uint64_t Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i64);
- }
-
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
- }
+ }
SDNode *emitBuildVector(SDNode *bvNode) {
EVT vecVT = bvNode->getValueType(0);
@@ -268,10 +213,10 @@ namespace {
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue CGPoolOffset =
SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
-
+
HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
CurDAG->getEntryNode(), CGPoolOffset,
- PseudoSourceValue::getConstantPool(),0,
+ MachinePointerInfo::getConstantPool(),
false, false, Alignment));
CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
@@ -356,13 +301,8 @@ namespace {
return "Cell SPU DAG->DAG Pattern Instruction Selection";
}
- /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
- /// this target when scheduling the DAG.
- virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
- const TargetInstrInfo *II = TM.getInstrInfo();
- assert(II && "No InstrInfo?");
- return new SPUHazardRecognizer(*II);
- }
+ private:
+ SDValue getRC( MVT );
// Include the pieces autogenerated from the target description.
#include "SPUGenDAGISel.inc"
@@ -450,8 +390,8 @@ bool
SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Index) {
return DFormAddressPredicate(Op, N, Base, Index,
- SPUFrameInfo::minFrameOffset(),
- SPUFrameInfo::maxFrameOffset());
+ SPUFrameLowering::minFrameOffset(),
+ SPUFrameLowering::maxFrameOffset());
}
bool
@@ -467,7 +407,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
int FI = int(FIN->getIndex());
DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
<< FI << "\n");
- if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
Base = CurDAG->getTargetConstant(0, PtrTy);
Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
return true;
@@ -493,7 +433,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
<< " frame index = " << FI << "\n");
- if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
Base = CurDAG->getTargetConstant(offset, PtrTy);
Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
return true;
@@ -514,7 +454,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
<< " frame index = " << FI << "\n");
- if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
Base = CurDAG->getTargetConstant(offset, PtrTy);
Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
return true;
@@ -564,8 +504,8 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
Base = CurDAG->getTargetConstant(0, N.getValueType());
Index = N;
return true;
- } else if (Opc == ISD::Register
- ||Opc == ISD::CopyFromReg
+ } else if (Opc == ISD::Register
+ ||Opc == ISD::CopyFromReg
||Opc == ISD::UNDEF
||Opc == ISD::Constant) {
unsigned OpOpc = Op->getOpcode();
@@ -625,6 +565,46 @@ SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
return false;
}
+/*!
+ Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue
+ to be used as the last parameter of a
+CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call
+ \arg VT the value type for which we want a register class
+*/
+SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
+ switch( VT.SimpleTy ) {
+ case MVT::i8:
+ return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i16:
+ return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i32:
+ return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::f32:
+ return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i64:
+ return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
+ break;
+ case MVT::i128:
+ return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
+ break;
+ default:
+ assert( false && "add a new case here" );
+ }
+ return SDValue();
+}
+
//! Convert the operand from a target-independent to a target-specific node
/*!
*/
@@ -632,7 +612,7 @@ SDNode *
SPUDAGToDAGISel::Select(SDNode *N) {
unsigned Opc = N->getOpcode();
int n_ops = -1;
- unsigned NewOpc;
+ unsigned NewOpc = 0;
EVT OpVT = N->getValueType(0);
SDValue Ops[8];
DebugLoc dl = N->getDebugLoc();
@@ -654,7 +634,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
NewOpc = SPU::Ar32;
Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
- N->getValueType(0), TFI, Imm0),
+ N->getValueType(0), TFI),
0);
n_ops = 2;
}
@@ -669,7 +649,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
EVT Op0VT = Op0.getValueType();
EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
Op0VT, (128 / Op0VT.getSizeInBits()));
- EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
OpVT, (128 / OpVT.getSizeInBits()));
SDValue shufMask;
@@ -703,19 +683,19 @@ SPUDAGToDAGISel::Select(SDNode *N) {
}
SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
-
+
HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
Op0VecVT, Op0));
-
+
SDValue PromScalar;
if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
PromScalar = SDValue(N, 0);
else
PromScalar = PromoteScalar.getValue();
-
+
SDValue zextShuffle =
CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
- PromScalar, PromScalar,
+ PromScalar, PromScalar,
SDValue(shufMaskLoad, 0));
HandleSDNode Dummy2(zextShuffle);
@@ -725,7 +705,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
zextShuffle = Dummy2.getValue();
HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
zextShuffle));
-
+
CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
SelectCode(Dummy.getValue().getNode());
return Dummy.getValue().getNode();
@@ -736,7 +716,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
N->getOperand(0), N->getOperand(1),
SDValue(CGLoad, 0)));
-
+
CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
return N;
@@ -748,7 +728,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
N->getOperand(0), N->getOperand(1),
SDValue(CGLoad, 0)));
-
+
CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
return N;
@@ -779,8 +759,8 @@ SPUDAGToDAGISel::Select(SDNode *N) {
if (shift_amt >= 32) {
SDNode *hi32 =
- CurDAG->getMachineNode(SPU::ORr32_r64, dl, OpVT,
- Op0.getOperand(0));
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ Op0.getOperand(0), getRC(MVT::i32));
shift_amt -= 32;
if (shift_amt > 0) {
@@ -862,23 +842,12 @@ SPUDAGToDAGISel::Select(SDNode *N) {
SDValue Arg = N->getOperand(0);
SDValue Chain = N->getOperand(1);
SDNode *Result;
- const valtype_map_s *vtm = getValueTypeMapEntry(VT);
-
- if (vtm->ldresult_ins == 0) {
- report_fatal_error("LDRESULT for unsupported type: " +
- Twine(VT.getEVTString()));
- }
-
- Opc = vtm->ldresult_ins;
- if (vtm->ldresult_imm) {
- SDValue Zero = CurDAG->getTargetConstant(0, VT);
-
- Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Zero, Chain);
- } else {
- Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Arg, Chain);
- }
+ Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
+ MVT::Other, Arg,
+ getRC( VT.getSimpleVT()), Chain);
return Result;
+
} else if (Opc == SPUISD::IndirectAddr) {
// Look at the operands: SelectCode() will catch the cases that aren't
// specifically handled here.
@@ -904,10 +873,10 @@ SPUDAGToDAGISel::Select(SDNode *N) {
NewOpc = SPU::AIr32;
Ops[1] = Op1;
} else {
- Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
- N->getValueType(0),
+ Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
+ N->getValueType(0),
Op1),
- 0);
+ 0);
}
}
Ops[0] = Op0;
@@ -939,7 +908,7 @@ SPUDAGToDAGISel::Select(SDNode *N) {
SDNode *
SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
SDValue Op0 = N->getOperand(0);
- EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
OpVT, (128 / OpVT.getSizeInBits()));
SDValue ShiftAmt = N->getOperand(1);
EVT ShiftAmtVT = ShiftAmt.getValueType();
@@ -947,7 +916,8 @@ SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
SDValue SelMaskVal;
DebugLoc dl = N->getDebugLoc();
- VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+ VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+ Op0, getRC(MVT::v2i64) );
SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
@@ -991,7 +961,8 @@ SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
SDValue(Shift, 0), SDValue(Bits, 0));
}
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
}
/*!
@@ -1012,7 +983,8 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
SDNode *VecOp0, *Shift = 0;
DebugLoc dl = N->getDebugLoc();
- VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+ VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+ Op0, getRC(MVT::v2i64) );
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
@@ -1058,7 +1030,8 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
SDValue(Shift, 0), SDValue(Bits, 0));
}
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
}
/*!
@@ -1072,21 +1045,23 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
SDNode *
SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
// Promote Op0 to vector
- EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
OpVT, (128 / OpVT.getSizeInBits()));
SDValue ShiftAmt = N->getOperand(1);
EVT ShiftAmtVT = ShiftAmt.getValueType();
DebugLoc dl = N->getDebugLoc();
SDNode *VecOp0 =
- CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, N->getOperand(0));
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ VecVT, N->getOperand(0), getRC(MVT::v2i64));
SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
SDNode *SignRot =
CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
SDValue(VecOp0, 0), SignRotAmt);
SDNode *UpperHalfSign =
- CurDAG->getMachineNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0));
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32));
SDNode *UpperHalfSignMask =
CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
@@ -1133,7 +1108,8 @@ SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
SDValue(Shift, 0), SDValue(NegShift, 0));
}
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
}
/*!
@@ -1154,20 +1130,21 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
// Here's where it gets interesting, because we have to parse out the
// subtree handed back in i64vec:
- if (i64vec.getOpcode() == ISD::BIT_CONVERT) {
+ if (i64vec.getOpcode() == ISD::BITCAST) {
// The degenerate case where the upper and lower bits in the splat are
// identical:
SDValue Op0 = i64vec.getOperand(0);
ReplaceUses(i64vec, Op0);
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
- SDValue(emitBuildVector(Op0.getNode()), 0));
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ SDValue(emitBuildVector(Op0.getNode()), 0),
+ getRC(MVT::i64));
} else if (i64vec.getOpcode() == SPUISD::SHUFB) {
SDValue lhs = i64vec.getOperand(0);
SDValue rhs = i64vec.getOperand(1);
SDValue shufmask = i64vec.getOperand(2);
- if (lhs.getOpcode() == ISD::BIT_CONVERT) {
+ if (lhs.getOpcode() == ISD::BITCAST) {
ReplaceUses(lhs, lhs.getOperand(0));
lhs = lhs.getOperand(0);
}
@@ -1176,7 +1153,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
? lhs.getNode()
: emitBuildVector(lhs.getNode()));
- if (rhs.getOpcode() == ISD::BIT_CONVERT) {
+ if (rhs.getOpcode() == ISD::BITCAST) {
ReplaceUses(rhs, rhs.getOperand(0));
rhs = rhs.getOperand(0);
}
@@ -1185,7 +1162,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
? rhs.getNode()
: emitBuildVector(rhs.getNode()));
- if (shufmask.getOpcode() == ISD::BIT_CONVERT) {
+ if (shufmask.getOpcode() == ISD::BITCAST) {
ReplaceUses(shufmask, shufmask.getOperand(0));
shufmask = shufmask.getOperand(0);
}
@@ -1201,11 +1178,13 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
HandleSDNode Dummy(shufNode);
SDNode *SN = SelectCode(Dummy.getValue().getNode());
if (SN == 0) SN = Dummy.getValue().getNode();
-
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(SN, 0));
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(SN, 0), getRC(MVT::i64));
} else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
- SDValue(emitBuildVector(i64vec.getNode()), 0));
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ SDValue(emitBuildVector(i64vec.getNode()), 0),
+ getRC(MVT::i64));
} else {
report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
"condition");
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
index 46f3189..e6511d0 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -1,4 +1,3 @@
-//
//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
// The LLVM Compiler Infrastructure
//
@@ -14,12 +13,13 @@
#include "SPURegisterNames.h"
#include "SPUISelLowering.h"
#include "SPUTargetMachine.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
#include "SPUMachineFunction.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
#include "llvm/CallingConv.h"
+#include "llvm/Type.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -41,41 +41,12 @@ using namespace llvm;
namespace {
std::map<unsigned, const char *> node_names;
- //! EVT mapping to useful data for Cell SPU
- struct valtype_map_s {
- EVT valtype;
- int prefslot_byte;
- };
-
- const valtype_map_s valtype_map[] = {
- { MVT::i1, 3 },
- { MVT::i8, 3 },
- { MVT::i16, 2 },
- { MVT::i32, 0 },
- { MVT::f32, 0 },
- { MVT::i64, 0 },
- { MVT::f64, 0 },
- { MVT::i128, 0 }
- };
-
- const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
-
- const valtype_map_s *getValueTypeMapEntry(EVT VT) {
- const valtype_map_s *retval = 0;
-
- for (size_t i = 0; i < n_valtype_map; ++i) {
- if (valtype_map[i].valtype == VT) {
- retval = valtype_map + i;
- break;
- }
- }
-
-#ifndef NDEBUG
- if (retval == 0) {
- report_fatal_error("getValueTypeMapEntry returns NULL for " +
- Twine(VT.getEVTString()));
- }
-#endif
+ // Byte offset of the preferred slot (counted from the MSB)
+ int prefslotOffset(EVT VT) {
+ int retval=0;
+ if (VT==MVT::i1) retval=3;
+ if (VT==MVT::i8) retval=3;
+ if (VT==MVT::i16) retval=2;
return retval;
}
@@ -125,8 +96,6 @@ namespace {
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()),
SPUTM(TM) {
- // Fold away setcc operations if possible.
- setPow2DivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(true);
@@ -376,10 +345,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
- setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
- setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
- setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f64, Legal);
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -439,9 +408,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::AND, VT, Legal);
setOperationAction(ISD::OR, VT, Legal);
setOperationAction(ISD::XOR, VT, Legal);
- setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::SELECT, VT, Legal);
- setOperationAction(ISD::STORE, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Custom);
// These operations need to be expanded:
setOperationAction(ISD::SDIV, VT, Expand);
@@ -502,8 +471,8 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
- node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
- node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
+ node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
+ node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
@@ -531,10 +500,20 @@ unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
//===----------------------------------------------------------------------===//
MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
- // i16 and i32 are valid SETCC result types
- return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
- VT.getSimpleVT().SimpleTy :
- MVT::i32);
+ // i8, i16 and i32 are valid SETCC result types
+ MVT::SimpleValueType retval;
+
+ switch(VT.getSimpleVT().SimpleTy){
+ case MVT::i1:
+ case MVT::i8:
+ retval = MVT::i8; break;
+ case MVT::i16:
+ retval = MVT::i16; break;
+ case MVT::i32:
+ default:
+ retval = MVT::i32;
+ }
+ return retval;
}
//===----------------------------------------------------------------------===//
@@ -572,113 +551,174 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
EVT OutVT = Op.getValueType();
ISD::LoadExtType ExtType = LN->getExtensionType();
unsigned alignment = LN->getAlignment();
- const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
+ int pso = prefslotOffset(InVT);
DebugLoc dl = Op.getDebugLoc();
-
- switch (LN->getAddressingMode()) {
- case ISD::UNINDEXED: {
- SDValue result;
- SDValue basePtr = LN->getBasePtr();
- SDValue rotate;
-
- if (alignment == 16) {
- ConstantSDNode *CN;
-
- // Special cases for a known aligned load to simplify the base pointer
- // and the rotation amount:
- if (basePtr.getOpcode() == ISD::ADD
- && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
- // Known offset into basePtr
- int64_t offset = CN->getSExtValue();
- int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
-
- if (rotamt < 0)
- rotamt += 16;
-
- rotate = DAG.getConstant(rotamt, MVT::i16);
-
- // Simplify the base pointer for this case:
- basePtr = basePtr.getOperand(0);
- if ((offset & ~0xf) > 0) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & ~0xf), PtrVT));
- }
- } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
- || (basePtr.getOpcode() == SPUISD::IndirectAddr
- && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
- && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
- // Plain aligned a-form address: rotate into preferred slot
- // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
- int64_t rotamt = -vtm->prefslot_byte;
- if (rotamt < 0)
- rotamt += 16;
- rotate = DAG.getConstant(rotamt, MVT::i16);
- } else {
- // Offset the rotate amount by the basePtr and the preferred slot
- // byte offset
- int64_t rotamt = -vtm->prefslot_byte;
- if (rotamt < 0)
- rotamt += 16;
- rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
- basePtr,
- DAG.getConstant(rotamt, PtrVT));
- }
- } else {
- // Unaligned load: must be more pessimistic about addressing modes:
- if (basePtr.getOpcode() == ISD::ADD) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- SDValue Flag;
-
- SDValue Op0 = basePtr.getOperand(0);
- SDValue Op1 = basePtr.getOperand(1);
-
- if (isa<ConstantSDNode>(Op1)) {
- // Convert the (add <ptr>, <const>) to an indirect address contained
- // in a register. Note that this is done because we need to avoid
- // creating a 0(reg) d-form address due to the SPU's block loads.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
- basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
- } else {
- // Convert the (add <arg1>, <arg2>) to an indirect address, which
- // will likely be lowered as a reg(reg) x-form address.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- }
- } else {
+ EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
+ (128 / InVT.getSizeInBits()));
+
+ // two sanity checks
+ assert( LN->getAddressingMode() == ISD::UNINDEXED
+ && "we should get only UNINDEXED adresses");
+ // clean aligned loads can be selected as-is
+ if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
+ return SDValue();
+
+ // Get pointerinfos to the memory chunk(s) that contain the data to load
+ uint64_t mpi_offset = LN->getPointerInfo().Offset;
+ mpi_offset -= mpi_offset%16;
+ MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
+ MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
+
+ SDValue result;
+ SDValue basePtr = LN->getBasePtr();
+ SDValue rotate;
+
+ if ((alignment%16) == 0) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and the rotation amount:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+ int64_t rotamt = int64_t((offset & 0xf) - pso);
+
+ if (rotamt < 0)
+ rotamt += 16;
+
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ if ((offset & ~0xf) > 0) {
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
- DAG.getConstant(0, PtrVT));
+ DAG.getConstant((offset & ~0xf), PtrVT));
}
-
+ } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+ || (basePtr.getOpcode() == SPUISD::IndirectAddr
+ && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+ && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+ // Plain aligned a-form address: rotate into preferred slot
+ // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+ int64_t rotamt = -pso;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+ } else {
// Offset the rotate amount by the basePtr and the preferred slot
// byte offset
+ int64_t rotamt = -pso;
+ if (rotamt < 0)
+ rotamt += 16;
rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
basePtr,
- DAG.getConstant(-vtm->prefslot_byte, PtrVT));
+ DAG.getConstant(rotamt, PtrVT));
}
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(-pso, PtrVT));
+ }
- // Re-emit as a v16i8 vector load
- result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
- LN->getSrcValue(), LN->getSrcValueOffset(),
- LN->isVolatile(), LN->isNonTemporal(), 16);
+ // Do the load as a i128 to allow possible shifting
+ SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
+ lowMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(), 16);
+ // When the size is not greater than alignment we get all data with just
+ // one load
+ if (alignment >= InVT.getSizeInBits()/8) {
// Update the chain
- the_chain = result.getValue(1);
+ the_chain = low.getValue(1);
// Rotate into the preferred slot:
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
- result.getValue(0), rotate);
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
+ low.getValue(0), rotate);
// Convert the loaded v16i8 vector to the appropriate vector type
// specified by the operand:
- EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
InVT, (128 / InVT.getSizeInBits()));
result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
- DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
+ DAG.getNode(ISD::BITCAST, dl, vecVT, result));
+ }
+ // When alignment is less than the size, we might need (known only at
+ // run-time) two loads
+ // TODO: if the memory address is composed only from constants, we have
+ // extra kowledge, and might avoid the second load
+ else {
+ // storage position offset from lower 16 byte aligned memory chunk
+ SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+ basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
+ // get a registerfull of ones. (this implementation is a workaround: LLVM
+ // cannot handle 128 bit signed int constants)
+ SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
+ ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+ SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
+ DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(16, PtrVT)),
+ highMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(), 16);
+
+ the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+ high.getValue(1));
+
+ // Shift the (possible) high part right to compensate the misalignemnt.
+ // if there is no highpart (i.e. value is i64 and offset is 4), this
+ // will zero out the high value.
+ high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
+ DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ offset
+ ));
+
+ // Shift the low similarily
+ // TODO: add SPUISD::SHL_BYTES
+ low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
+
+ // Merge the two parts
+ result = DAG.getNode(ISD::BITCAST, dl, vecVT,
+ DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
+
+ if (!InVT.isVector()) {
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
+ }
+ }
// Handle extending loads by extending the scalar result:
if (ExtType == ISD::SEXTLOAD) {
result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
@@ -702,21 +742,6 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
retops, sizeof(retops) / sizeof(retops[0]));
return result;
- }
- case ISD::PRE_INC:
- case ISD::PRE_DEC:
- case ISD::POST_INC:
- case ISD::POST_DEC:
- case ISD::LAST_INDEXED_MODE:
- {
- report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
- "than UNINDEXED\n" +
- Twine((unsigned)LN->getAddressingMode()));
- /*NOTREACHED*/
- }
- }
-
- return SDValue();
}
/// Custom lower stores for CellSPU
@@ -734,93 +759,103 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
DebugLoc dl = Op.getDebugLoc();
unsigned alignment = SN->getAlignment();
+ SDValue result;
+ EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
+ (128 / StVT.getSizeInBits()));
+ // Get pointerinfos to the memory chunk(s) that contain the data to load
+ uint64_t mpi_offset = SN->getPointerInfo().Offset;
+ mpi_offset -= mpi_offset%16;
+ MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
+ MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
+
+
+ // two sanity checks
+ assert( SN->getAddressingMode() == ISD::UNINDEXED
+ && "we should get only UNINDEXED adresses");
+ // clean aligned loads can be selected as-is
+ if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
+ return SDValue();
+
+ SDValue alignLoadVec;
+ SDValue basePtr = SN->getBasePtr();
+ SDValue the_chain = SN->getChain();
+ SDValue insertEltOffs;
+
+ if ((alignment%16) == 0) {
+ ConstantSDNode *CN;
+ // Special cases for a known aligned load to simplify the base pointer
+ // and insertion byte:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & 0xf), PtrVT));
- switch (SN->getAddressingMode()) {
- case ISD::UNINDEXED: {
- // The vector type we really want to load from the 16-byte chunk.
- EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
- VT, (128 / VT.getSizeInBits()));
-
- SDValue alignLoadVec;
- SDValue basePtr = SN->getBasePtr();
- SDValue the_chain = SN->getChain();
- SDValue insertEltOffs;
-
- if (alignment == 16) {
- ConstantSDNode *CN;
- // Special cases for a known aligned load to simplify the base pointer
- // and insertion byte:
- if (basePtr.getOpcode() == ISD::ADD
- && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
- // Known offset into basePtr
- int64_t offset = CN->getSExtValue();
-
- // Simplify the base pointer for this case:
- basePtr = basePtr.getOperand(0);
- insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & 0xf), PtrVT));
-
- if ((offset & ~0xf) > 0) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant((offset & ~0xf), PtrVT));
- }
- } else {
- // Otherwise, assume it's at byte 0 of basePtr
- insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
- basePtr,
- DAG.getConstant(0, PtrVT));
- }
- } else {
- // Unaligned load: must be more pessimistic about addressing modes:
- if (basePtr.getOpcode() == ISD::ADD) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- SDValue Flag;
-
- SDValue Op0 = basePtr.getOperand(0);
- SDValue Op1 = basePtr.getOperand(1);
-
- if (isa<ConstantSDNode>(Op1)) {
- // Convert the (add <ptr>, <const>) to an indirect address contained
- // in a register. Note that this is done because we need to avoid
- // creating a 0(reg) d-form address due to the SPU's block loads.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
- basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
- } else {
- // Convert the (add <arg1>, <arg2>) to an indirect address, which
- // will likely be lowered as a reg(reg) x-form address.
- basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
- }
- } else {
+ if ((offset & ~0xf) > 0) {
basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
- DAG.getConstant(0, PtrVT));
+ DAG.getConstant((offset & ~0xf), PtrVT));
}
-
- // Insertion point is solely determined by basePtr's contents
- insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+ } else {
+ // Otherwise, assume it's at byte 0 of basePtr
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
DAG.getConstant(0, PtrVT));
}
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
- // Load the memory to which to store.
- alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
- SN->getSrcValue(), SN->getSrcValueOffset(),
- SN->isVolatile(), SN->isNonTemporal(), 16);
+ // Insertion point is solely determined by basePtr's contents
+ insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+ // Load the lower part of the memory to which to store.
+ SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
+ lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
+
+ // if we don't need to store over the 16 byte boundary, one store suffices
+ if (alignment >= StVT.getSizeInBits()/8) {
// Update the chain
- the_chain = alignLoadVec.getValue(1);
+ the_chain = low.getValue(1);
- LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
+ LoadSDNode *LN = cast<LoadSDNode>(low);
SDValue theValue = SN->getValue();
- SDValue result;
if (StVT != VT
&& (theValue.getOpcode() == ISD::AssertZext
@@ -844,48 +879,114 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
insertEltOffs);
- SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+ SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
theValue);
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
- vectorizeOp, alignLoadVec,
- DAG.getNode(ISD::BIT_CONVERT, dl,
+ vectorizeOp, low,
+ DAG.getNode(ISD::BITCAST, dl,
MVT::v4i32, insertEltOp));
result = DAG.getStore(the_chain, dl, result, basePtr,
- LN->getSrcValue(), LN->getSrcValueOffset(),
+ lowMemPtr,
LN->isVolatile(), LN->isNonTemporal(),
- LN->getAlignment());
-
-#if 0 && !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- const SDValue &currentRoot = DAG.getRoot();
-
- DAG.setRoot(result);
- errs() << "------- CellSPU:LowerStore result:\n";
- DAG.dump();
- errs() << "-------\n";
- DAG.setRoot(currentRoot);
- }
-#endif
-
- return result;
- /*UNREACHED*/
- }
- case ISD::PRE_INC:
- case ISD::PRE_DEC:
- case ISD::POST_INC:
- case ISD::POST_DEC:
- case ISD::LAST_INDEXED_MODE:
- {
- report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
- "than UNINDEXED\n" +
- Twine((unsigned)SN->getAddressingMode()));
- /*NOTREACHED*/
- }
+ 16);
+
+ }
+ // do the store when it might cross the 16 byte memory access boundary.
+ else {
+ // TODO issue a warning if SN->isVolatile()== true? This is likely not
+ // what the user wanted.
+
+ // address offset from nearest lower 16byte alinged address
+ SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+ SN->getBasePtr(),
+ DAG.getConstant(0xf, MVT::i32));
+ // 16 - offset
+ SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ offset);
+ // 16 - sizeof(Value)
+ SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ DAG.getConstant( VT.getSizeInBits()/8,
+ MVT::i32));
+ // get a registerfull of ones
+ SDValue ones = DAG.getConstant(-1, MVT::v4i32);
+ ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+ // Create the 128 bit masks that have ones where the data to store is
+ // located.
+ SDValue lowmask, himask;
+ // if the value to store don't fill up the an entire 128 bits, zero
+ // out the last bits of the mask so that only the value we want to store
+ // is masked.
+ // this is e.g. in the case of store i32, align 2
+ if (!VT.isVector()){
+ Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
+ lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
+ lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+ surplus);
+ Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+ Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
+
+ }
+ else {
+ lowmask = ones;
+ Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+ }
+ // this will zero, if there are no data that goes to the high quad
+ himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+ offset_compl);
+ lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
+ offset);
+
+ // Load in the old data and zero out the parts that will be overwritten with
+ // the new data to store.
+ SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
+ DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+ DAG.getConstant( 16, PtrVT)),
+ highMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+ hi.getValue(1));
+
+ low = DAG.getNode(ISD::AND, dl, MVT::i128,
+ DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
+ DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
+ hi = DAG.getNode(ISD::AND, dl, MVT::i128,
+ DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
+ DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
+
+ // Shift the Value to store into place. rlow contains the parts that go to
+ // the lower memory chunk, rhi has the parts that go to the upper one.
+ SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
+ rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
+ SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
+ offset_compl);
+
+ // Merge the old data and the new data and store the results
+ // Need to convert vectors here to integer as 'OR'ing floats assert
+ rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
+ rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
+
+ low = DAG.getStore(the_chain, dl, rlow, basePtr,
+ lowMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ hi = DAG.getStore(the_chain, dl, rhi,
+ DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+ DAG.getConstant( 16, PtrVT)),
+ highMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
+ hi.getValue(0));
}
- return SDValue();
+ return result;
}
//! Generate the address of a constant pool entry.
@@ -993,7 +1094,7 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
SDValue T = DAG.getConstant(dbits, MVT::i64);
SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
}
return SDValue();
@@ -1013,9 +1114,9 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
MachineRegisterInfo &RegInfo = MF.getRegInfo();
SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
- unsigned ArgOffset = SPUFrameInfo::minStackSize();
+ unsigned ArgOffset = SPUFrameLowering::minStackSize();
unsigned ArgRegIdx = 0;
- unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
+ unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -1080,7 +1181,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
// or we're forced to do vararg
int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, 0);
ArgOffset += StackSlotSize;
}
@@ -1091,8 +1193,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
// vararg handling:
if (isVarArg) {
- // FIXME: we should be able to query the argument registers from
- // tablegen generated code.
+ // FIXME: we should be able to query the argument registers from
+ // tablegen generated code.
static const unsigned ArgRegs[] = {
SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
@@ -1117,9 +1219,9 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setVarArgsFrameIndex(
MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
- unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
+ unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass, dl);
SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
- SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
+ SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
false, false, 0);
Chain = Store.getOperand(0);
MemOps.push_back(Store);
@@ -1163,14 +1265,14 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
unsigned NumOps = Outs.size();
- unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
+ unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
- *DAG.getContext());
+ *DAG.getContext());
// FIXME: allow for other calling conventions
CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
-
+
const unsigned NumArgRegs = ArgLocs.size();
@@ -1184,7 +1286,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Figure out which arguments are going to go in registers, and which in
// memory.
- unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
+ unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
unsigned ArgRegIdx = 0;
// Keep track of registers passing arguments
@@ -1219,7 +1321,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (ArgRegIdx != NumArgRegs) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
false, false, 0));
ArgOffset += StackSlotSize;
}
@@ -1230,7 +1333,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Accumulate how many bytes are to be pushed on the stack, including the
// linkage area, and parameter passing area. According to the SPU ABI,
// we minimally need space for [LR] and [SP].
- unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
+ unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
// Insert a call sequence start
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
@@ -1311,7 +1414,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (InFlag.getNode())
Ops.push_back(InFlag);
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
+ Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
&Ops[0], Ops.size());
InFlag = Chain.getValue(1);
@@ -1334,7 +1437,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// If the call has results, copy the values out of the ret val registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
-
+
SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
InFlag);
Chain = Val.getValue(1);
@@ -1567,7 +1670,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
SDValue T = DAG.getConstant(Value32, MVT::i32);
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
break;
}
@@ -1577,7 +1680,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
&& "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
SDValue T = DAG.getConstant(f64val, MVT::i64);
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
break;
}
@@ -1587,7 +1690,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
SmallVector<SDValue, 8> Ops;
Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
}
case MVT::v8i16: {
@@ -1621,7 +1724,7 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
if (upper == lower) {
// Magic constant that can be matched by IL, ILA, et. al.
SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
- return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ return DAG.getNode(ISD::BITCAST, dl, OpVT,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
Val, Val, Val, Val));
} else {
@@ -1650,7 +1753,7 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
// Create lower vector if not a special pattern
if (!lower_special) {
SDValue LO32C = DAG.getConstant(lower, MVT::i32);
- LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
LO32C, LO32C, LO32C, LO32C));
}
@@ -1658,7 +1761,7 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
// Create upper vector if not a special pattern
if (!upper_special) {
SDValue HI32C = DAG.getConstant(upper, MVT::i32);
- HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
HI32C, HI32C, HI32C, HI32C));
}
@@ -1735,14 +1838,14 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
unsigned CurrElt = 0;
unsigned MaxElts = VecVT.getVectorNumElements();
unsigned PrevElt = 0;
- unsigned V0Elt = 0;
bool monotonic = true;
bool rotate = true;
+ int rotamt=0;
EVT maskVT; // which of the c?d instructions to use
if (EltVT == MVT::i8) {
V2EltIdx0 = 16;
- maskVT = MVT::v16i8;
+ maskVT = MVT::v16i8;
} else if (EltVT == MVT::i16) {
V2EltIdx0 = 8;
maskVT = MVT::v8i16;
@@ -1758,7 +1861,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
for (unsigned i = 0; i != MaxElts; ++i) {
if (SVN->getMaskElt(i) < 0)
continue;
-
+
unsigned SrcElt = SVN->getMaskElt(i);
if (monotonic) {
@@ -1782,13 +1885,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if ((PrevElt == SrcElt - 1)
|| (PrevElt == MaxElts - 1 && SrcElt == 0)) {
PrevElt = SrcElt;
- if (SrcElt == 0)
- V0Elt = i;
} else {
rotate = false;
}
- } else if (i == 0) {
- // First time through, need to keep track of previous element
+ } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
+ // First time or after a "wrap around"
+ rotamt = SrcElt-i;
PrevElt = SrcElt;
} else {
// This isn't a rotation, takes elements from vector 2
@@ -1806,15 +1908,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
DAG.getConstant(V2EltOffset, MVT::i32));
- SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
+ SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
maskVT, Pointer);
// Use shuffle mask in SHUFB synthetic instruction:
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
ShufMaskOp);
} else if (rotate) {
- int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
-
+ if (rotamt < 0)
+ rotamt +=MaxElts;
+ rotamt *= EltVT.getSizeInBits()/8;
return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
V1, DAG.getConstant(rotamt, MVT::i16));
} else {
@@ -1999,7 +2102,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(scaleShift, MVT::i32));
}
- vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
+ vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
// Replicate the bytes starting at byte 0 across the entire vector (for
// consistency with the notion of a unified register set)
@@ -2069,7 +2172,7 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
DAG.getRegister(SPU::R1, PtrVT),
DAG.getConstant(Offset, PtrVT));
// widen the mask when dealing with half vectors
- EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+ EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
128/ VT.getVectorElementType().getSizeInBits());
SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
@@ -2077,7 +2180,7 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(SPUISD::SHUFB, dl, VT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
VecOp,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
return result;
}
@@ -2197,12 +2300,12 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
ConstVec = Op.getOperand(0);
Arg = Op.getOperand(1);
if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
- if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+ if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
ConstVec = ConstVec.getOperand(0);
} else {
ConstVec = Op.getOperand(1);
Arg = Op.getOperand(0);
- if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+ if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
ConstVec = ConstVec.getOperand(0);
}
}
@@ -2243,7 +2346,7 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
*/
static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
VT, (128 / VT.getSizeInBits()));
DebugLoc dl = Op.getDebugLoc();
@@ -2419,7 +2522,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
// Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
// selected to a NOP:
- SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
+ SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
SDValue lhsHi32 =
DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRL, dl, IntVT,
@@ -2453,7 +2556,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
ISD::SETGT));
}
- SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
+ SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
SDValue rhsHi32 =
DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRL, dl, IntVT,
@@ -2567,7 +2670,7 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
// Type to truncate to
EVT VT = Op.getValueType();
MVT simpleVT = VT.getSimpleVT();
- EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
VT, (128 / VT.getSizeInBits()));
DebugLoc dl = Op.getDebugLoc();
@@ -2575,7 +2678,7 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
SDValue Op0 = Op.getOperand(0);
EVT Op0VT = Op0.getValueType();
- if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
+ if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
// Create shuffle mask, least significant doubleword of quadword
unsigned maskHigh = 0x08090a0b;
unsigned maskLow = 0x0c0d0e0f;
@@ -2616,6 +2719,12 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
SDValue Op0 = Op.getOperand(0);
MVT Op0VT = Op0.getValueType().getSimpleVT();
+ // extend i8 & i16 via i32
+ if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
+ Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
+ Op0VT = MVT::i32;
+ }
+
// The type to extend to needs to be a i128 and
// the type to extend from needs to be i64 or i32.
assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
@@ -2640,12 +2749,17 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
DAG.getConstant(31, MVT::i32));
+ // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
+ SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ dl, Op0VT, Op0,
+ DAG.getTargetConstant(
+ SPU::GPRCRegClass.getID(),
+ MVT::i32)), 0);
// Shuffle bytes - Copy the sign bits into the upper 64 bits
// and the input value into the lower 64 bits.
SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
- DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
-
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
+ extended, sraVal, shufMask);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
}
//! Custom (target-specific) lowering entry point
@@ -2903,8 +3017,8 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
}
break;
}
- case SPUISD::SHLQUAD_L_BITS:
- case SPUISD::SHLQUAD_L_BYTES:
+ case SPUISD::SHL_BITS:
+ case SPUISD::SHL_BYTES:
case SPUISD::ROTBYTES_LEFT: {
SDValue Op1 = N->getOperand(1);
@@ -2982,6 +3096,38 @@ SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const
return TargetLowering::getConstraintType(ConstraintLetter);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+SPUTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ //FIXME: Seems like the supported constraint letters were just copied
+ // from PPC, as the following doesn't correspond to the GCC docs.
+ // I'm leaving it so until someone adds the corresponding lowering support.
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'd':
+ case 'v':
+ case 'y':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
std::pair<unsigned, const TargetRegisterClass*>
SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const
@@ -3086,3 +3232,28 @@ SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The SPU target isn't yet aware of offsets.
return false;
}
+
+// can we compare to Imm without writing it into a register?
+bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ //ceqi, cgti, etc. all take s10 operand
+ return isInt<10>(Imm);
+}
+
+bool
+SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type * ) const{
+
+ // A-form: 18bit absolute address.
+ if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
+ return true;
+
+ // D-form: reg + 14bit offset
+ if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
+ return true;
+
+ // X-form: reg+reg
+ if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
index 6d3c90b..95d44af 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
@@ -41,8 +41,9 @@ namespace llvm {
CNTB, ///< Count leading ones in bytes
PREFSLOT2VEC, ///< Promote scalar->vector
VEC2PREFSLOT, ///< Extract element 0
- SHLQUAD_L_BITS, ///< Rotate quad left, by bits
- SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes
+ SHL_BITS, ///< Shift quad left, by bits
+ SHL_BYTES, ///< Shift quad left, by bytes
+ SRL_BYTES, ///< Shift quad right, by bytes. Insert zeros.
VEC_ROTL, ///< Vector rotate left
VEC_ROTR, ///< Vector rotate right
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
@@ -129,6 +130,11 @@ namespace llvm {
ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
@@ -170,6 +176,19 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+ virtual bool isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const;
+
+ /// After allocating this many registers, the allocator should feel
+ /// register pressure. The value is a somewhat random guess, based on the
+ /// number of non callee saved registers in the C calling convention.
+ virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
+ MachineFunction &MF) const{
+ return 50;
+ }
};
}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
index 26d6b4f..f9e6c72 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -16,6 +16,7 @@
#include "SPUInstrBuilder.h"
#include "SPUTargetMachine.h"
#include "SPUGenInstrInfo.inc"
+#include "SPUHazardRecognizers.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -54,6 +55,16 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
RI(*TM.getSubtargetImpl(), *this)
{ /* NOP */ }
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer(
+ const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ const TargetInstrInfo *TII = TM->getInstrInfo();
+ assert(TII && "No InstrInfo?");
+ return new SPUHazardRecognizer(*TII);
+}
+
unsigned
SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
@@ -129,7 +140,7 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const
{
unsigned opc;
- bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
+ bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
if (RC == SPU::GPRCRegisterClass) {
opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128);
} else if (RC == SPU::R64CRegisterClass) {
@@ -164,7 +175,7 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const
{
unsigned opc;
- bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
+ bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
if (RC == SPU::GPRCRegisterClass) {
opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128);
} else if (RC == SPU::R64CRegisterClass) {
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h
index 191e55d..e5e9148 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h
@@ -32,6 +32,10 @@ namespace llvm {
///
virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
index ca0fe00..25f6fd0 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
@@ -416,7 +416,7 @@ multiclass ImmLoadAddress
def lo: ILARegInst<R32C, symbolLo, imm18>;
def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
- [/* no pattern */]>;
+ [(set R32C:$rT, imm18:$val)]>;
}
defm ILA : ImmLoadAddress;
@@ -1167,10 +1167,10 @@ class XSHWRegInst<RegisterClass rclass>:
[(set rclass:$rDest, (sext R16C:$rSrc))]>;
multiclass ExtendHalfwordWord {
- def v4i32: XSHWVecInst<v4i32, v8i16>;
-
+ def v4i32: XSHWVecInst<v8i16, v4i32>;
+
def r16: XSHWRegInst<R32C>;
-
+
def r32: XSHWInRegInst<R32C,
[(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
def r64: XSHWInRegInst<R64C, [/* no pattern */]>;
@@ -1385,59 +1385,6 @@ class ORRegInst<RegisterClass rclass>:
ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
[(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
-// ORCvtForm: OR conversion form
-//
-// This is used to "convert" the preferred slot to its vector equivalent, as
-// well as convert a vector back to its preferred slot.
-//
-// These are effectively no-ops, but need to exist for proper type conversion
-// and type coercion.
-
-class ORCvtForm<dag OOL, dag IOL, list<dag> pattern = [/* no pattern */]>
- : SPUInstr<OOL, IOL, "or\t$rT, $rA, $rA", IntegerOp> {
- bits<7> RA;
- bits<7> RT;
-
- let Pattern = pattern;
-
- let Inst{0-10} = 0b10000010000;
- let Inst{11-17} = RA;
- let Inst{18-24} = RA;
- let Inst{25-31} = RT;
-}
-
-class ORPromoteScalar<RegisterClass rclass>:
- ORCvtForm<(outs VECREG:$rT), (ins rclass:$rA)>;
-
-class ORExtractElt<RegisterClass rclass>:
- ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>;
-
-/* class ORCvtRegGPRC<RegisterClass rclass>:
- ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */
-
-/* class ORCvtGPRCReg<RegisterClass rclass>:
- ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */
-
-class ORCvtFormR32Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
- ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>;
-
-class ORCvtFormRegR32<RegisterClass rclass, list<dag> pattern = [ ]>:
- ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>;
-
-class ORCvtFormR64Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
- ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>;
-
-class ORCvtFormRegR64<RegisterClass rclass, list<dag> pattern = [ ]>:
- ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>;
-
-class ORCvtGPRCVec:
- ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>;
-
-class ORCvtVecGPRC:
- ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
-
-class ORCvtVecVec:
- ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>;
multiclass BitwiseOr
{
@@ -1468,119 +1415,48 @@ multiclass BitwiseOr
def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
[/* no pattern */]>;
-
- // scalar->vector promotion, prefslot2vec:
- def v16i8_i8: ORPromoteScalar<R8C>;
- def v8i16_i16: ORPromoteScalar<R16C>;
- def v4i32_i32: ORPromoteScalar<R32C>;
- def v2i64_i64: ORPromoteScalar<R64C>;
- def v4f32_f32: ORPromoteScalar<R32FP>;
- def v2f64_f64: ORPromoteScalar<R64FP>;
-
- // vector->scalar demotion, vec2prefslot:
- def i8_v16i8: ORExtractElt<R8C>;
- def i16_v8i16: ORExtractElt<R16C>;
- def i32_v4i32: ORExtractElt<R32C>;
- def i64_v2i64: ORExtractElt<R64C>;
- def f32_v4f32: ORExtractElt<R32FP>;
- def f64_v2f64: ORExtractElt<R64FP>;
-
- // Conversion from vector to GPRC
- def i128_vec: ORCvtVecGPRC;
-
- // Conversion from GPRC to vector
- def vec_i128: ORCvtGPRCVec;
-
-/*
- // Conversion from register to GPRC
- def i128_r64: ORCvtRegGPRC<R64C>;
- def i128_f64: ORCvtRegGPRC<R64FP>;
- def i128_r32: ORCvtRegGPRC<R32C>;
- def i128_f32: ORCvtRegGPRC<R32FP>;
- def i128_r16: ORCvtRegGPRC<R16C>;
- def i128_r8: ORCvtRegGPRC<R8C>;
-
- // Conversion from GPRC to register
- def r64_i128: ORCvtGPRCReg<R64C>;
- def f64_i128: ORCvtGPRCReg<R64FP>;
- def r32_i128: ORCvtGPRCReg<R32C>;
- def f32_i128: ORCvtGPRCReg<R32FP>;
- def r16_i128: ORCvtGPRCReg<R16C>;
- def r8_i128: ORCvtGPRCReg<R8C>;
-*/
-/*
- // Conversion from register to R32C:
- def r32_r16: ORCvtFormRegR32<R16C>;
- def r32_r8: ORCvtFormRegR32<R8C>;
-
- // Conversion from R32C to register
- def r32_r16: ORCvtFormR32Reg<R16C>;
- def r32_r8: ORCvtFormR32Reg<R8C>;
-*/
-
- // Conversion from R64C to register:
- def r32_r64: ORCvtFormR64Reg<R32C>;
- // def r16_r64: ORCvtFormR64Reg<R16C>;
- // def r8_r64: ORCvtFormR64Reg<R8C>;
-
- // Conversion to R64C from register:
- def r64_r32: ORCvtFormRegR64<R32C>;
- // def r64_r16: ORCvtFormRegR64<R16C>;
- // def r64_r8: ORCvtFormRegR64<R8C>;
-
- // bitconvert patterns:
- def r32_f32: ORCvtFormR32Reg<R32FP,
- [(set R32FP:$rT, (bitconvert R32C:$rA))]>;
- def f32_r32: ORCvtFormRegR32<R32FP,
- [(set R32C:$rT, (bitconvert R32FP:$rA))]>;
-
- def r64_f64: ORCvtFormR64Reg<R64FP,
- [(set R64FP:$rT, (bitconvert R64C:$rA))]>;
- def f64_r64: ORCvtFormRegR64<R64FP,
- [(set R64C:$rT, (bitconvert R64FP:$rA))]>;
}
defm OR : BitwiseOr;
-// scalar->vector promotion patterns (preferred slot to vector):
+//===----------------------------------------------------------------------===//
+// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers
+//===----------------------------------------------------------------------===//
def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
- (ORv16i8_i8 R8C:$rA)>;
+ (COPY_TO_REGCLASS R8C:$rA, VECREG)>;
def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
- (ORv8i16_i16 R16C:$rA)>;
+ (COPY_TO_REGCLASS R16C:$rA, VECREG)>;
def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
- (ORv4i32_i32 R32C:$rA)>;
+ (COPY_TO_REGCLASS R32C:$rA, VECREG)>;
def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
- (ORv2i64_i64 R64C:$rA)>;
+ (COPY_TO_REGCLASS R64C:$rA, VECREG)>;
def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
- (ORv4f32_f32 R32FP:$rA)>;
+ (COPY_TO_REGCLASS R32FP:$rA, VECREG)>;
def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
- (ORv2f64_f64 R64FP:$rA)>;
-
-// ORi*_v*: Used to extract vector element 0 (the preferred slot), otherwise
-// known as converting the vector back to its preferred slot
-
-def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
- (ORi8_v16i8 VECREG:$rA)>;
+ (COPY_TO_REGCLASS R64FP:$rA, VECREG)>;
+
+def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>;
-def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
- (ORi16_v8i16 VECREG:$rA)>;
+def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>;
-def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
- (ORi32_v4i32 VECREG:$rA)>;
+def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>;
-def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
- (ORi64_v2i64 VECREG:$rA)>;
+def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>;
-def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
- (ORf32_v4f32 VECREG:$rA)>;
+def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>;
-def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
- (ORf64_v2f64 VECREG:$rA)>;
+def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>;
// Load Register: This is an assembler alias for a bitwise OR of a register
// against itself. It's here because it brings some clarity to assembly
@@ -2093,7 +1969,7 @@ defm EQV: BitEquivalence;
class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
- IntegerOp, pattern>;
+ ShuffleOp, pattern>;
class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
@@ -2134,7 +2010,7 @@ defm SHUFB : ShuffleBytes;
class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class SHLHVecInst<ValueType vectype>:
SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
@@ -2156,7 +2032,7 @@ defm SHLH : ShiftLeftHalfword;
class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class SHLHIVecInst<ValueType vectype>:
SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
@@ -2182,7 +2058,7 @@ def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
multiclass ShiftLeftWord
{
@@ -2201,7 +2077,7 @@ defm SHL: ShiftLeftWord;
class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
multiclass ShiftLeftWordImm
{
@@ -2230,7 +2106,7 @@ defm SHLI : ShiftLeftWordImm;
class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class SHLQBIVecInst<ValueType vectype>:
SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2259,7 +2135,7 @@ defm SHLQBI : ShiftLeftQuadByBits;
// enforcement, whereas with SHLQBI, we have to "take it on faith."
class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class SHLQBIIVecInst<ValueType vectype>:
SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
@@ -2283,7 +2159,7 @@ defm SHLQBII : ShiftLeftQuadByBitsImm;
class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class SHLQBYVecInst<ValueType vectype>:
SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2306,7 +2182,7 @@ defm SHLQBY: ShiftLeftQuadBytes;
class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class SHLQBYIVecInst<ValueType vectype>:
SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
@@ -2330,7 +2206,7 @@ defm SHLQBYI : ShiftLeftQuadBytesImm;
class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class SHLQBYBIVecInst<ValueType vectype>:
SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2359,7 +2235,7 @@ defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class ROTHVecInst<ValueType vectype>:
ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
@@ -2386,7 +2262,7 @@ def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class ROTHIVecInst<ValueType vectype>:
ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
@@ -2413,7 +2289,7 @@ def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class ROTVecInst<ValueType vectype>:
ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2461,7 +2337,7 @@ def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
@@ -2491,12 +2367,15 @@ defm ROTI : RotateLeftWordImm;
class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
-class ROTQBYVecInst<ValueType vectype>:
- ROTQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- [(set (vectype VECREG:$rT),
- (SPUrotbytes_left (vectype VECREG:$rA), R32C:$rB))]>;
+class ROTQBYGenInst<ValueType type, RegisterClass rc>:
+ ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB),
+ [(set (type rc:$rT),
+ (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>;
+
+class ROTQBYVecInst<ValueType type>:
+ ROTQBYGenInst<type, VECREG>;
multiclass RotateQuadLeftByBytes
{
@@ -2506,6 +2385,7 @@ multiclass RotateQuadLeftByBytes
def v4f32: ROTQBYVecInst<v4f32>;
def v2i64: ROTQBYVecInst<v2i64>;
def v2f64: ROTQBYVecInst<v2f64>;
+ def i128: ROTQBYGenInst<i128, GPRC>;
}
defm ROTQBY: RotateQuadLeftByBytes;
@@ -2516,12 +2396,15 @@ defm ROTQBY: RotateQuadLeftByBytes;
class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
+
+class ROTQBYIGenInst<ValueType type, RegisterClass rclass>:
+ ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val),
+ [(set (type rclass:$rT),
+ (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>;
class ROTQBYIVecInst<ValueType vectype>:
- ROTQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
- [(set (vectype VECREG:$rT),
- (SPUrotbytes_left (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
+ ROTQBYIGenInst<vectype, VECREG>;
multiclass RotateQuadByBytesImm
{
@@ -2531,6 +2414,7 @@ multiclass RotateQuadByBytesImm
def v4f32: ROTQBYIVecInst<v4f32>;
def v2i64: ROTQBYIVecInst<v2i64>;
def vfi64: ROTQBYIVecInst<v2f64>;
+ def i128: ROTQBYIGenInst<i128, GPRC>;
}
defm ROTQBYI: RotateQuadByBytesImm;
@@ -2539,7 +2423,7 @@ defm ROTQBYI: RotateQuadByBytesImm;
class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00110011100, OOL, IOL,
"rotqbybi\t$rT, $rA, $shift",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
@@ -2564,7 +2448,7 @@ defm ROTQBYBI : RotateQuadByBytesByBitshift;
class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQBIVecInst<ValueType vectype>:
ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2589,7 +2473,7 @@ defm ROTQBI: RotateQuadByBitCount;
class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
PatLeaf pred>:
@@ -2624,7 +2508,7 @@ defm ROTQBII : RotateQuadByBitCountImm;
class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
def ROTHMv8i16:
ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2666,7 +2550,7 @@ def : Pat<(srl R16C:$rA, R8C:$rB),
class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
def ROTHMIv8i16:
ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
@@ -2697,7 +2581,7 @@ def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
// ROTM v4i32 form: See the ROTHM v8i16 comments.
class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
def ROTMv4i32:
ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2732,7 +2616,7 @@ def : Pat<(srl R32C:$rA, R8C:$rB),
// ROTMI v4i32 form: See the comment for ROTHM v8i16.
def ROTMIv4i32:
RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
- "rotmi\t$rT, $rA, $val", RotateShift,
+ "rotmi\t$rT, $rA, $val", RotShiftVec,
[(set (v4i32 VECREG:$rT),
(SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
@@ -2745,7 +2629,7 @@ def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)),
// ROTMI r32 form: know how to complement the immediate value.
def ROTMIr32:
RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
- "rotmi\t$rT, $rA, $val", RotateShift,
+ "rotmi\t$rT, $rA, $val", RotShiftVec,
[(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
def : Pat<(srl R32C:$rA, (i16 imm:$val)),
@@ -2762,7 +2646,7 @@ def : Pat<(srl R32C:$rA, (i8 imm:$val)),
class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQMBYVecInst<ValueType vectype>:
ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2785,9 +2669,13 @@ multiclass RotateQuadBytes
defm ROTQMBY : RotateQuadBytes;
+def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB),
+ (ROTQMBYr128 GPRC:$rA,
+ (SFIr32 R32C:$rB, 0))>;
+
class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQMBYIVecInst<ValueType vectype>:
ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
@@ -2827,7 +2715,7 @@ defm ROTQMBYI : RotateQuadBytesImm;
class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQMBYBIVecInst<ValueType vectype>:
ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2839,6 +2727,8 @@ multiclass RotateMaskQuadByBitCount
def v8i16: ROTQMBYBIVecInst<v8i16>;
def v4i32: ROTQMBYBIVecInst<v4i32>;
def v2i64: ROTQMBYBIVecInst<v2i64>;
+ def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+ [/*no pattern*/]>;
}
defm ROTQMBYBI: RotateMaskQuadByBitCount;
@@ -2850,7 +2740,7 @@ defm ROTQMBYBI: RotateMaskQuadByBitCount;
class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQMBIVecInst<ValueType vectype>:
ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
@@ -2873,13 +2763,19 @@ multiclass RotateMaskQuadByBits
defm ROTQMBI: RotateMaskQuadByBits;
+def : Pat<(srl GPRC:$rA, R32C:$rB),
+ (ROTQMBYBIr128 (ROTQMBIr128 GPRC:$rA,
+ (SFIr32 R32C:$rB, 0)),
+ (SFIr32 R32C:$rB, 0))>;
+
+
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// Rotate quad and mask by bits, immediate
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftQuad, pattern>;
class ROTQMBIIVecInst<ValueType vectype>:
ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
@@ -2907,7 +2803,7 @@ defm ROTQMBII: RotateMaskQuadByBitsImm;
def ROTMAHv8i16:
RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- "rotmah\t$rT, $rA, $rB", RotateShift,
+ "rotmah\t$rT, $rA, $rB", RotShiftVec,
[/* see patterns below - $rB must be negated */]>;
def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R32C:$rB),
@@ -2923,7 +2819,7 @@ def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R8C:$rB),
def ROTMAHr16:
RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
- "rotmah\t$rT, $rA, $rB", RotateShift,
+ "rotmah\t$rT, $rA, $rB", RotShiftVec,
[/* see patterns below - $rB must be negated */]>;
def : Pat<(sra R16C:$rA, R32C:$rB),
@@ -2939,7 +2835,7 @@ def : Pat<(sra R16C:$rA, R8C:$rB),
def ROTMAHIv8i16:
RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
- "rotmahi\t$rT, $rA, $val", RotateShift,
+ "rotmahi\t$rT, $rA, $val", RotShiftVec,
[(set (v8i16 VECREG:$rT),
(SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
@@ -2951,7 +2847,7 @@ def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
def ROTMAHIr16:
RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
- "rotmahi\t$rT, $rA, $val", RotateShift,
+ "rotmahi\t$rT, $rA, $val", RotShiftVec,
[(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
def : Pat<(sra R16C:$rA, (i32 imm:$val)),
@@ -2962,7 +2858,7 @@ def : Pat<(sra R16C:$rA, (i8 imm:$val)),
def ROTMAv4i32:
RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
- "rotma\t$rT, $rA, $rB", RotateShift,
+ "rotma\t$rT, $rA, $rB", RotShiftVec,
[/* see patterns below - $rB must be negated */]>;
def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R32C:$rB),
@@ -2978,7 +2874,7 @@ def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R8C:$rB),
def ROTMAr32:
RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
- "rotma\t$rT, $rA, $rB", RotateShift,
+ "rotma\t$rT, $rA, $rB", RotShiftVec,
[/* see patterns below - $rB must be negated */]>;
def : Pat<(sra R32C:$rA, R32C:$rB),
@@ -2995,7 +2891,7 @@ def : Pat<(sra R32C:$rA, R8C:$rB),
class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01011110000, OOL, IOL,
"rotmai\t$rT, $rA, $val",
- RotateShift, pattern>;
+ RotShiftVec, pattern>;
class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
@@ -4010,7 +3906,7 @@ def FCGTf32 :
"fcgt\t$rT, $rA, $rB", SPrecFP,
[(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
-def : Pat<(setugt R32FP:$rA, R32FP:$rB),
+def : Pat<(setogt R32FP:$rA, R32FP:$rB),
(FCGTf32 R32FP:$rA, R32FP:$rB)>;
def FCMGTf32 :
@@ -4018,7 +3914,7 @@ def FCMGTf32 :
"fcmgt\t$rT, $rA, $rB", SPrecFP,
[(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
-def : Pat<(setugt (fabs R32FP:$rA), (fabs R32FP:$rB)),
+def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)),
(FCMGTf32 R32FP:$rA, R32FP:$rB)>;
//--------------------------------------------------------------------------
@@ -4320,7 +4216,7 @@ def : Pat<(fabs (v4f32 VECREG:$rA)),
// in the odd pipeline)
//===----------------------------------------------------------------------===//
-def ENOP : SPUInstr<(outs), (ins), "enop", ExecNOP> {
+def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> {
let Pattern = [];
let Inst{0-10} = 0b10000000010;
@@ -4379,30 +4275,43 @@ def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
- (ORi128_vec VECREG:$src)>;
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
- (ORi128_vec VECREG:$src)>;
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
- (ORi128_vec VECREG:$src)>;
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
- (ORi128_vec VECREG:$src)>;
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
- (ORi128_vec VECREG:$src)>;
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
- (ORi128_vec VECREG:$src)>;
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
- (v16i8 (ORvec_i128 GPRC:$src))>;
+ (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
- (v8i16 (ORvec_i128 GPRC:$src))>;
+ (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
- (v4i32 (ORvec_i128 GPRC:$src))>;
+ (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
- (v2i64 (ORvec_i128 GPRC:$src))>;
+ (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
- (v4f32 (ORvec_i128 GPRC:$src))>;
+ (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
- (v2f64 (ORvec_i128 GPRC:$src))>;
+ (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+
+def : Pat<(i32 (bitconvert R32FP:$rA)),
+ (COPY_TO_REGCLASS R32FP:$rA, R32C)>;
+
+def : Pat<(f32 (bitconvert R32C:$rA)),
+ (COPY_TO_REGCLASS R32C:$rA, R32FP)>;
+
+def : Pat<(i64 (bitconvert R64FP:$rA)),
+ (COPY_TO_REGCLASS R64FP:$rA, R64C)>;
+
+def : Pat<(f64 (bitconvert R64C:$rA)),
+ (COPY_TO_REGCLASS R64C:$rA, R64FP)>;
+
//===----------------------------------------------------------------------===//
// Instruction patterns:
@@ -4453,11 +4362,12 @@ def : Pat<(i32 (zext R8C:$rSrc)),
// zext 8->64: Zero extend bytes to double words
def : Pat<(i64 (zext R8C:$rSrc)),
- (ORi64_v2i64 (SELBv4i32 (ROTQMBYv4i32
- (ORv4i32_i32 (ANDIi8i32 R8C:$rSrc, 0xff)),
+ (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32
+ (COPY_TO_REGCLASS
+ (ANDIi8i32 R8C:$rSrc,0xff), VECREG),
0x4),
(ILv4i32 0x0),
- (FSMBIv4i32 0x0f0f)))>;
+ (FSMBIv4i32 0x0f0f)), R64C)>;
// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
def : Pat<(i16 (anyext R8C:$rSrc)),
@@ -4465,7 +4375,7 @@ def : Pat<(i16 (anyext R8C:$rSrc)),
// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
def : Pat<(i32 (anyext R8C:$rSrc)),
- (ORIi8i32 R8C:$rSrc, 0)>;
+ (COPY_TO_REGCLASS R8C:$rSrc, R32C)>;
// sext 16->64: Sign extend halfword to double word
def : Pat<(sext_inreg R64C:$rSrc, i16),
@@ -4489,7 +4399,7 @@ def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
// anyext 16->32: Extend 16->32 bits, irrespective of sign
def : Pat<(i32 (anyext R16C:$rSrc)),
- (ORIi16i32 R16C:$rSrc, 0)>;
+ (COPY_TO_REGCLASS R16C:$rSrc, R32C)>;
//===----------------------------------------------------------------------===//
// Truncates:
@@ -4498,61 +4408,61 @@ def : Pat<(i32 (anyext R16C:$rSrc)),
//===----------------------------------------------------------------------===//
def : Pat<(i8 (trunc GPRC:$src)),
- (ORi8_v16i8
+ (COPY_TO_REGCLASS
(SHUFBgprc GPRC:$src, GPRC:$src,
- (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)))>;
+ (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>;
def : Pat<(i8 (trunc R64C:$src)),
- (ORi8_v16i8
+ (COPY_TO_REGCLASS
(SHUFBv2i64_m32
- (ORv2i64_i64 R64C:$src),
- (ORv2i64_i64 R64C:$src),
- (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)))>;
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>;
def : Pat<(i8 (trunc R32C:$src)),
- (ORi8_v16i8
+ (COPY_TO_REGCLASS
(SHUFBv4i32_m32
- (ORv4i32_i32 R32C:$src),
- (ORv4i32_i32 R32C:$src),
- (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
def : Pat<(i8 (trunc R16C:$src)),
- (ORi8_v16i8
+ (COPY_TO_REGCLASS
(SHUFBv4i32_m32
- (ORv8i16_i16 R16C:$src),
- (ORv8i16_i16 R16C:$src),
- (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+ (COPY_TO_REGCLASS R16C:$src, VECREG),
+ (COPY_TO_REGCLASS R16C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
def : Pat<(i16 (trunc GPRC:$src)),
- (ORi16_v8i16
+ (COPY_TO_REGCLASS
(SHUFBgprc GPRC:$src, GPRC:$src,
- (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)))>;
+ (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>;
def : Pat<(i16 (trunc R64C:$src)),
- (ORi16_v8i16
+ (COPY_TO_REGCLASS
(SHUFBv2i64_m32
- (ORv2i64_i64 R64C:$src),
- (ORv2i64_i64 R64C:$src),
- (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)))>;
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>;
def : Pat<(i16 (trunc R32C:$src)),
- (ORi16_v8i16
+ (COPY_TO_REGCLASS
(SHUFBv4i32_m32
- (ORv4i32_i32 R32C:$src),
- (ORv4i32_i32 R32C:$src),
- (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)))>;
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>;
def : Pat<(i32 (trunc GPRC:$src)),
- (ORi32_v4i32
+ (COPY_TO_REGCLASS
(SHUFBgprc GPRC:$src, GPRC:$src,
- (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)))>;
+ (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>;
def : Pat<(i32 (trunc R64C:$src)),
- (ORi32_v4i32
+ (COPY_TO_REGCLASS
(SHUFBv2i64_m32
- (ORv2i64_i64 R64C:$src),
- (ORv2i64_i64 R64C:$src),
- (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)))>;
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>;
//===----------------------------------------------------------------------===//
// Address generation: SPU, like PPC, has to split addresses into high and
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUMCAsmInfo.cpp
index 25ba88a..99aaeb0 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -24,9 +24,8 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
- // Has leb128, .loc and .file
+ // Has leb128
HasLEB128 = true;
- HasDotLocAndDotFile = true;
SupportsDebugInformation = true;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUNodes.td b/contrib/llvm/lib/Target/CellSPU/SPUNodes.td
index 647da30..a6e621f 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUNodes.td
+++ b/contrib/llvm/lib/Target/CellSPU/SPUNodes.td
@@ -19,16 +19,16 @@ def SPU_GenControl : SDTypeProfile<1, 1, []>;
def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
//===----------------------------------------------------------------------===//
// Operand constraints:
//===----------------------------------------------------------------------===//
def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
// Operand type constraints for vector shuffle/permute operations
@@ -83,10 +83,6 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
// SPUISelLowering.h):
def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
-// Shift left quadword by bits and bytes
-def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
-def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>;
-
// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
@@ -105,6 +101,12 @@ def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
SPUvecshift_type>;
+// Shift entire quad left by bytes/bits. Zeros are shifted in on the right
+// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128
+def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>;
+def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>;
+def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>;
+
// SPU form select mask for bytes, immediate
def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
@@ -154,4 +156,4 @@ class NoEncode<string E> {
//===----------------------------------------------------------------------===//
def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp b/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp
new file mode 100644
index 0000000..e2bd2d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp
@@ -0,0 +1,153 @@
+//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The final pass just before assembly printing. This pass is the last
+// checkpoint where nops and lnops are added to the instruction stream to
+// satisfy the dual issue requirements. The actual dual issue scheduling is
+// done (TODO: nowhere, currently)
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ struct SPUNopFiller : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ const InstrItineraryData *IID;
+ bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd
+
+ static char ID;
+ SPUNopFiller(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()),
+ IID(tm.getInstrItineraryData())
+ {
+ DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; );
+ }
+
+ virtual const char *getPassName() const {
+ return "SPU nop/lnop Filler";
+ }
+
+ void runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+ bool runOnMachineFunction(MachineFunction &F) {
+ isEvenPlace = true; //all functions get an .align 3 directive at start
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ runOnMachineBasicBlock(*FI);
+ return true; //never-ever do any more modifications, just print it!
+ }
+
+ typedef enum { none = 0, // no more instructions in this function / BB
+ pseudo = 1, // this does not get executed
+ even = 2,
+ odd = 3 } SPUOpPlace;
+ SPUOpPlace getOpPlacement( MachineInstr &instr );
+
+ };
+ char SPUNopFiller::ID = 0;
+
+}
+
+// Fill a BasicBlock to alignment.
+// In the assebly we align the functions to 'even' adresses, but
+// basic blocks have an implicit alignmnet. We hereby define
+// basic blocks to have the same, even, alignment.
+void SPUNopFiller::
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
+{
+ assert( isEvenPlace && "basic block start from odd address");
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ {
+ SPUOpPlace this_optype, next_optype;
+ MachineBasicBlock::iterator J = I;
+ J++;
+
+ this_optype = getOpPlacement( *I );
+ next_optype = none;
+ while (J!=MBB.end()){
+ next_optype = getOpPlacement( *J );
+ ++J;
+ if (next_optype != pseudo )
+ break;
+ }
+
+ // padd: odd(wrong), even(wrong), ...
+ // to: nop(corr), odd(corr), even(corr)...
+ if( isEvenPlace && this_optype == odd && next_optype == even ) {
+ DEBUG( dbgs() <<"Adding NOP before: "; );
+ DEBUG( I->dump(); );
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP));
+ isEvenPlace=false;
+ }
+
+ // padd: even(wrong), odd(wrong), ...
+ // to: lnop(corr), even(corr), odd(corr)...
+ else if ( !isEvenPlace && this_optype == even && next_optype == odd){
+ DEBUG( dbgs() <<"Adding LNOP before: "; );
+ DEBUG( I->dump(); );
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP));
+ isEvenPlace=true;
+ }
+
+ // now go to next mem slot
+ if( this_optype != pseudo )
+ isEvenPlace = !isEvenPlace;
+
+ }
+
+ // padd basicblock end
+ if( !isEvenPlace ){
+ MachineBasicBlock::iterator J = MBB.end();
+ J--;
+ if (getOpPlacement( *J ) == odd) {
+ DEBUG( dbgs() <<"Padding basic block with NOP\n"; );
+ BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP));
+ }
+ else {
+ J++;
+ DEBUG( dbgs() <<"Padding basic block with LNOP\n"; );
+ BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP));
+ }
+ isEvenPlace=true;
+ }
+}
+
+FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) {
+ return new SPUNopFiller(tm);
+}
+
+// Figure out if 'instr' is executed in the even or odd pipeline
+SPUNopFiller::SPUOpPlace
+SPUNopFiller::getOpPlacement( MachineInstr &instr ) {
+ int sc = instr.getDesc().getSchedClass();
+ const InstrStage *stage = IID->beginStage(sc);
+ unsigned FUs = stage->getUnits();
+ SPUOpPlace retval;
+
+ switch( FUs ) {
+ case 0: retval = pseudo; break;
+ case 1: retval = odd; break;
+ case 2: retval = even; break;
+ default: retval= pseudo;
+ assert( false && "got unknown FuncUnit\n");
+ break;
+ };
+ return retval;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUOperands.td b/contrib/llvm/lib/Target/CellSPU/SPUOperands.td
index e1a0358..96cde51 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUOperands.td
+++ b/contrib/llvm/lib/Target/CellSPU/SPUOperands.td
@@ -143,7 +143,7 @@ def immU16 : PatLeaf<(imm), [{
def imm18 : PatLeaf<(imm), [{
// imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
int Value = (int) N->getZExtValue();
- return ((Value & ((1 << 19) - 1)) == Value);
+ return isUInt<18>(Value);
}]>;
def lo16 : PatLeaf<(imm), [{
@@ -203,7 +203,7 @@ def FPimm_sext16 : SDNodeXForm<fpimm, [{
def FPimm_u18 : SDNodeXForm<fpimm, [{
float fval = N->getValueAPF().convertToFloat();
- return getI32Imm(FloatToBits(fval) & ((1 << 19) - 1));
+ return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1));
}]>;
def fpimmSExt16 : PatLeaf<(fpimm), [{
@@ -225,7 +225,7 @@ def hi16_f32 : PatLeaf<(fpimm), [{
def fpimm18 : PatLeaf<(fpimm), [{
if (N->getValueType(0) == MVT::f32) {
uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
- return ((Value & ((1 << 19) - 1)) == Value);
+ return isUInt<18>(Value);
}
return false;
@@ -654,7 +654,11 @@ def memrr : Operand<iPTR> {
// A-form : abs (256K LSA offset)
// D-form(2): [r+I7] (7-bit signed offset + reg)
-def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr", [], []>;
-def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr", [], []>;
-def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr", [], []>;
-def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr", [], []>;
+def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr",
+ [], [SDNPWantRoot]>;
+def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr",
+ [], [SDNPWantRoot]>;
+def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr",
+ [], [SDNPWantRoot]>;
+def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr",
+ [], [SDNPWantRoot]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
index cf71891..0bdd50a 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -18,7 +18,7 @@
#include "SPUInstrBuilder.h"
#include "SPUSubtarget.h"
#include "SPUMachineFunction.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
#include "llvm/Constants.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -30,7 +30,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -240,25 +240,6 @@ BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-// needsFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
-static bool needsFP(const MachineFunction &MF) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
-}
-
-//--------------------------------------------------------------------------
-// hasFP - Return true if the specified function actually has a dedicated frame
-// pointer register. This is true if the function needs a frame pointer and has
-// a non-zero stack size.
-bool
-SPURegisterInfo::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->getStackSize() && needsFP(MF);
-}
-
//--------------------------------------------------------------------------
void
SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
@@ -302,7 +283,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
MachineOperand &MO = MI.getOperand(OpNo);
// Offset is biased by $lr's slot at the bottom.
- Offset += MO.getImm() + MFI->getStackSize() + SPUFrameInfo::minStackSize();
+ Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize();
assert((Offset & 0xf) == 0
&& "16-byte alignment violated in eliminateFrameIndex");
@@ -329,225 +310,6 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
}
}
-/// determineFrameLayout - Determine the size of the frame and maximum call
-/// frame size.
-void
-SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
-{
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize = MFI->getStackSize();
-
- // Get the alignments provided by the target, and the maximum alignment
- // (if any) of the fixed frame objects.
- unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
- unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
- assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
- unsigned AlignMask = Align - 1;
-
- // Get the maximum call frame size of all the calls.
- unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
-
- // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
- // that allocations will be aligned.
- if (MFI->hasVarSizedObjects())
- maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
-
- // Update maximum call frame size.
- MFI->setMaxCallFrameSize(maxCallFrameSize);
-
- // Include call frame size in total.
- FrameSize += maxCallFrameSize;
-
- // Make sure the frame is aligned.
- FrameSize = (FrameSize + AlignMask) & ~AlignMask;
-
- // Update frame info.
- MFI->setStackSize(FrameSize);
-}
-
-void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS)
- const {
- // Mark LR and SP unused, since the prolog spills them to stack and
- // we don't want anyone else to spill them for us.
- //
- // Also, unless R2 is really used someday, don't spill it automatically.
- MF.getRegInfo().setPhysRegUnused(SPU::R0);
- MF.getRegInfo().setPhysRegUnused(SPU::R1);
- MF.getRegInfo().setPhysRegUnused(SPU::R2);
-
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetRegisterClass *RC = &SPU::R32CRegClass;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
-
-
-}
-
-void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
-{
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineModuleInfo &MMI = MF.getMMI();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- // Prepare for debug frame info.
- bool hasDebugInfo = MMI.hasDebugInfo();
- MCSymbol *FrameLabel = 0;
-
- // Move MBBI back to the beginning of the function.
- MBBI = MBB.begin();
-
- // Work out frame sizes.
- determineFrameLayout(MF);
- int FrameSize = MFI->getStackSize();
-
- assert((FrameSize & 0xf) == 0
- && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
-
- // the "empty" frame size is 16 - just the register scavenger spill slot
- if (FrameSize > 16 || MFI->adjustsStack()) {
- FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
- if (hasDebugInfo) {
- // Mark effective beginning of when frame pointer becomes valid.
- FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
- }
-
- // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
- // for the ABI
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
- .addReg(SPU::R1);
- if (isInt<10>(FrameSize)) {
- // Spill $sp to adjusted $sp
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
- .addReg(SPU::R1);
- // Adjust $sp by required amout
- BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
- .addImm(FrameSize);
- } else if (isInt<16>(FrameSize)) {
- // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
- // $r2 to adjust $sp:
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
- .addImm(-16)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
- .addImm(FrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
- .addReg(SPU::R2)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
- .addReg(SPU::R1)
- .addReg(SPU::R2);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
- .addReg(SPU::R2)
- .addImm(16);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
- .addReg(SPU::R2)
- .addReg(SPU::R1);
- } else {
- report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
- }
-
- if (hasDebugInfo) {
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
-
- // Show update of SP.
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
- Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
- unsigned Reg = CSI[I].getReg();
- if (Reg == SPU::R0) continue;
- MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
- MachineLocation CSSrc(Reg);
- Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
- }
-
- // Mark effective beginning of when frame pointer is ready.
- MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
-
- MachineLocation FPDst(SPU::R1);
- MachineLocation FPSrc(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
- }
- } else {
- // This is a leaf function -- insert a branch hint iff there are
- // sufficient number instructions in the basic block. Note that
- // this is just a best guess based on the basic block's size.
- if (MBB.size() >= (unsigned) SPUFrameInfo::branchHintPenalty()) {
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- dl = MBBI->getDebugLoc();
-
- // Insert terminator label
- BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
- .addSym(MMI.getContext().CreateTempSymbol());
- }
- }
-}
-
-void
-SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
-{
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int FrameSize = MFI->getStackSize();
- int LinkSlotOffset = SPUFrameInfo::stackSlotSize();
- DebugLoc dl = MBBI->getDebugLoc();
-
- assert(MBBI->getOpcode() == SPU::RET &&
- "Can only insert epilog into returning blocks");
- assert((FrameSize & 0xf) == 0
- && "SPURegisterInfo::emitEpilogue: FrameSize not aligned");
-
- // the "empty" frame size is 16 - just the register scavenger spill slot
- if (FrameSize > 16 || MFI->adjustsStack()) {
- FrameSize = FrameSize + SPUFrameInfo::minStackSize();
- if (isInt<10>(FrameSize + LinkSlotOffset)) {
- // Reload $lr, adjust $sp by required amount
- // Note: We do this to slightly improve dual issue -- not by much, but it
- // is an opportunity for dual issue.
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
- .addImm(FrameSize + LinkSlotOffset)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
- .addReg(SPU::R1)
- .addImm(FrameSize);
- } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
- // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
- // $r2 to adjust $sp:
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
- .addImm(16)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
- .addImm(FrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
- .addReg(SPU::R1)
- .addReg(SPU::R2);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
- .addImm(16)
- .addReg(SPU::R1);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
- addReg(SPU::R2)
- .addImm(16);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
- .addReg(SPU::R2)
- .addReg(SPU::R1);
- } else {
- report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
- }
- }
-}
-
unsigned
SPURegisterInfo::getRARegister() const
{
@@ -560,26 +322,16 @@ SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
return SPU::R1;
}
-void
-SPURegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const
-{
- // Initial state of the frame pointer is R1.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(SPU::R1, 0);
- Moves.push_back(MachineMove(0, Dst, Src));
-}
-
-
int
SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
// FIXME: Most probably dwarf numbers differs for Linux and Darwin
return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
}
-int
+int
SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
{
- switch(dFormOpcode)
+ switch(dFormOpcode)
{
case SPU::AIr32: return SPU::Ar32;
case SPU::LQDr32: return SPU::LQXr32;
@@ -602,10 +354,10 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
// TODO this is already copied from PPC. Could this convenience function
// be moved to the RegScavenger class?
-unsigned
-SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
+unsigned
+SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
RegScavenger *RS,
- const TargetRegisterClass *RC,
+ const TargetRegisterClass *RC,
int SPAdj) const
{
assert(RS && "Register scavenging must be on");
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
index aedb769..641da04 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
@@ -33,7 +33,7 @@ namespace llvm {
public:
SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
-
+
//! Translate a register's enum value to a register number
/*!
This method translates a register's enum value to it's regiser number,
@@ -56,8 +56,6 @@ namespace llvm {
//! Return the reserved registers
BitVector getReservedRegs(const MachineFunction &MF) const;
- //! Prediate: Target has dedicated frame pointer
- bool hasFP(const MachineFunction &MF) const;
//! Eliminate the call frame setup pseudo-instructions
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -65,21 +63,11 @@ namespace llvm {
//! Convert frame indicies into machine operands
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
RegScavenger *RS = NULL) const;
- //! Determine the frame's layour
- void determineFrameLayout(MachineFunction &MF) const;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
- //! Emit the function prologue
- void emitPrologue(MachineFunction &MF) const;
- //! Emit the function epilogue
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
//! Get return address register (LR, aka R0)
unsigned getRARegister() const;
//! Get the stack frame register (SP, aka R1)
unsigned getFrameRegister(const MachineFunction &MF) const;
- //! Perform target-specific stack frame setup.
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
//------------------------------------------------------------------------
// New methods added:
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td b/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td
index a0b581f..9cd3c23 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td
@@ -32,11 +32,12 @@ def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer)
def ByteOp : InstrItinClass; // EVEN_UNIT
def IntegerOp : InstrItinClass; // EVEN_UNIT
def IntegerMulDiv: InstrItinClass; // EVEN_UNIT
-def RotateShift : InstrItinClass; // EVEN_UNIT
+def RotShiftVec : InstrItinClass; // EVEN_UNIT Inter vector
+def RotShiftQuad : InstrItinClass; // ODD_UNIT Entire quad
def ImmLoad : InstrItinClass; // EVEN_UNIT
/* Note: The itinerary for the Cell SPU is somewhat contrived... */
-def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [
+def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [
InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>,
InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>,
InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>,
@@ -51,7 +52,8 @@ def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [
InstrItinData<FPInt , [InstrStage<2, [EVEN_UNIT]>]>,
InstrItinData<ByteOp , [InstrStage<4, [EVEN_UNIT]>]>,
InstrItinData<IntegerOp , [InstrStage<2, [EVEN_UNIT]>]>,
- InstrItinData<RotateShift , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<RotShiftVec , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<RotShiftQuad, [InstrStage<4, [ODD_UNIT]>]>,
InstrItinData<IntegerMulDiv,[InstrStage<7, [EVEN_UNIT]>]>,
InstrItinData<ImmLoad , [InstrStage<2, [EVEN_UNIT]>]>
]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
index 0f18b7f..07c8352 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -14,6 +14,8 @@
#include "SPUSubtarget.h"
#include "SPU.h"
#include "SPUGenSubtarget.inc"
+#include "llvm/ADT/SmallVector.h"
+#include "SPURegisterInfo.h"
using namespace llvm;
@@ -34,3 +36,22 @@ SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) :
/// producing code for the JIT.
void SPUSubtarget::SetJITMode() {
}
+
+/// Enable PostRA scheduling for optimization levels -O2 and -O3.
+bool SPUSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+ // CriticalPathsRCs seems to be the set of
+ // RegisterClasses that antidep breakings are performed for.
+ // Do it for all register classes
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&SPU::R8CRegClass);
+ CriticalPathRCs.push_back(&SPU::R16CRegClass);
+ CriticalPathRCs.push_back(&SPU::R32CRegClass);
+ CriticalPathRCs.push_back(&SPU::R32FPRegClass);
+ CriticalPathRCs.push_back(&SPU::R64CRegClass);
+ CriticalPathRCs.push_back(&SPU::VECREGRegClass);
+ return OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h
index 88201c6..d7929302 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h
@@ -81,9 +81,13 @@ namespace llvm {
/// properties of this subtarget.
const char *getTargetDataString() const {
return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
- "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128"
+ "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128"
"-s:128:128-n32:64";
}
+
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
index 480ec3f..3ed7361 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -29,7 +29,7 @@ extern "C" void LLVMInitializeCellSPUTarget() {
}
const std::pair<unsigned, int> *
-SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
+SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
NumEntries = 1;
return &LR[0];
}
@@ -40,7 +40,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
Subtarget(TT, FS),
DataLayout(Subtarget.getTargetDataString()),
InstrInfo(*this),
- FrameInfo(*this),
+ FrameLowering(Subtarget),
TLInfo(*this),
TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
@@ -59,3 +59,12 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
PM.add(createSPUISelDag(*this));
return false;
}
+
+// passes to run just before printing the assembly
+bool SPUTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+ //align instructions with nops/lnops for dual issue
+ PM.add(createSPUNopFillerPass(*this));
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
index 7e02701..75abd5e 100644
--- a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
@@ -18,14 +18,14 @@
#include "SPUInstrInfo.h"
#include "SPUISelLowering.h"
#include "SPUSelectionDAGInfo.h"
-#include "SPUFrameInfo.h"
+#include "SPUFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
namespace llvm {
class PassManager;
class GlobalValue;
-class TargetFrameInfo;
+class TargetFrameLowering;
/// SPUTargetMachine
///
@@ -33,7 +33,7 @@ class SPUTargetMachine : public LLVMTargetMachine {
SPUSubtarget Subtarget;
const TargetData DataLayout;
SPUInstrInfo InstrInfo;
- SPUFrameInfo FrameInfo;
+ SPUFrameLowering FrameLowering;
SPUTargetLowering TLInfo;
SPUSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
@@ -48,8 +48,8 @@ public:
virtual const SPUInstrInfo *getInstrInfo() const {
return &InstrInfo;
}
- virtual const SPUFrameInfo *getFrameInfo() const {
- return &FrameInfo;
+ virtual const SPUFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
}
/*!
\note Cell SPU does not support JIT today. It could support JIT at some
@@ -75,13 +75,14 @@ public:
return &DataLayout;
}
- virtual const InstrItineraryData getInstrItineraryData() const {
- return InstrItins;
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
}
// Pass Pipeline Configuration
virtual bool addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
index f08559f..71d6049 100644
--- a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
+++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
@@ -358,6 +358,7 @@ std::string CppWriter::getCppName(const Type* Ty) {
case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())";
case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())";
case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())";
+ case Type::X86_MMXTyID: return "Type::getX86_MMXTy(mod->getContext())";
default:
error("Invalid primitive type");
break;
@@ -1563,11 +1564,25 @@ void CppWriter::printFunctionUses(const Function* F) {
// If the operand references a GVal or Constant, make a note of it
if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
gvs.insert(GV);
- if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- if (GVar->hasInitializer())
- consts.insert(GVar->getInitializer());
- } else if (Constant* C = dyn_cast<Constant>(operand))
+ if (GenerationType != GenFunction)
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasInitializer())
+ consts.insert(GVar->getInitializer());
+ } else if (Constant* C = dyn_cast<Constant>(operand)) {
consts.insert(C);
+ for (unsigned j = 0; j < C->getNumOperands(); ++j) {
+ // If the operand references a GVal or Constant, make a note of it
+ Value* operand = C->getOperand(j);
+ printType(operand->getType());
+ if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+ gvs.insert(GV);
+ if (GenerationType != GenFunction)
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasInitializer())
+ consts.insert(GVar->getInitializer());
+ }
+ }
+ }
}
}
}
@@ -1590,7 +1605,7 @@ void CppWriter::printFunctionUses(const Function* F) {
printVariableHead(F);
}
-// Print the constants found
+ // Print the constants found
nl(Out) << "// Constant Definitions"; nl(Out);
for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
E = consts.end(); I != E; ++I) {
@@ -1600,11 +1615,13 @@ void CppWriter::printFunctionUses(const Function* F) {
// Process the global variables definitions now that all the constants have
// been emitted. These definitions just couple the gvars with their constant
// initializers.
- nl(Out) << "// Global Variable Definitions"; nl(Out);
- for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
- I != E; ++I) {
- if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
- printVariableBody(GV);
+ if (GenerationType != GenFunction) {
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
+ printVariableBody(GV);
+ }
}
}
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt
new file mode 100644
index 0000000..87e7cb5
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeAsmParser
+ MBlazeAsmLexer.cpp
+ MBlazeAsmParser.cpp
+ )
+
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
new file mode 100644
index 0000000..1903796
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -0,0 +1,127 @@
+//===-- MBlazeAsmLexer.cpp - Tokenize MBlaze assembly to AsmTokens --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+
+ class MBlazeBaseAsmLexer : public TargetAsmLexer {
+ const MCAsmInfo &AsmInfo;
+
+ const AsmToken &lexDefinite() {
+ return getLexer()->Lex();
+ }
+
+ AsmToken LexTokenUAL();
+ protected:
+ typedef std::map <std::string, unsigned> rmap_ty;
+
+ rmap_ty RegisterMap;
+
+ void InitRegisterMap(const TargetRegisterInfo *info) {
+ unsigned numRegs = info->getNumRegs();
+
+ for (unsigned i = 0; i < numRegs; ++i) {
+ const char *regName = info->getName(i);
+ if (regName)
+ RegisterMap[regName] = i;
+ }
+ }
+
+ unsigned MatchRegisterName(StringRef Name) {
+ rmap_ty::iterator iter = RegisterMap.find(Name.str());
+ if (iter != RegisterMap.end())
+ return iter->second;
+ else
+ return 0;
+ }
+
+ AsmToken LexToken() {
+ if (!Lexer) {
+ SetError(SMLoc(), "No MCAsmLexer installed");
+ return AsmToken(AsmToken::Error, "", 0);
+ }
+
+ switch (AsmInfo.getAssemblerDialect()) {
+ default:
+ SetError(SMLoc(), "Unhandled dialect");
+ return AsmToken(AsmToken::Error, "", 0);
+ case 0:
+ return LexTokenUAL();
+ }
+ }
+ public:
+ MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : TargetAsmLexer(T), AsmInfo(MAI) {
+ }
+ };
+
+ class MBlazeAsmLexer : public MBlazeBaseAsmLexer {
+ public:
+ MBlazeAsmLexer(const Target &T, const MCAsmInfo &MAI)
+ : MBlazeBaseAsmLexer(T, MAI) {
+ std::string tripleString("mblaze-unknown-unknown");
+ std::string featureString;
+ OwningPtr<const TargetMachine>
+ targetMachine(T.createTargetMachine(tripleString, featureString));
+ InitRegisterMap(targetMachine->getRegisterInfo());
+ }
+ };
+}
+
+AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
+ const AsmToken &lexedToken = lexDefinite();
+
+ switch (lexedToken.getKind()) {
+ default:
+ return AsmToken(lexedToken);
+ case AsmToken::Error:
+ SetError(Lexer->getErrLoc(), Lexer->getErr());
+ return AsmToken(lexedToken);
+ case AsmToken::Identifier:
+ {
+ std::string upperCase = lexedToken.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ StringRef lowerRef(lowerCase);
+
+ unsigned regID = MatchRegisterName(lowerRef);
+
+ if (regID) {
+ return AsmToken(AsmToken::Register,
+ lexedToken.getString(),
+ static_cast<int64_t>(regID));
+ } else {
+ return AsmToken(lexedToken);
+ }
+ }
+ }
+}
+
+extern "C" void LLVMInitializeMBlazeAsmLexer() {
+ RegisterAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
+}
+
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
new file mode 100644
index 0000000..524f33d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -0,0 +1,568 @@
+//===-- MBlazeAsmParser.cpp - Parse MBlaze asm to MCInst instructions -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeISelLowering.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+namespace {
+struct MBlazeOperand;
+
+class MBlazeAsmParser : public TargetAsmParser {
+ MCAsmParser &Parser;
+ TargetMachine &TM;
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+ MBlazeOperand *ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ MBlazeOperand *ParseRegister(unsigned &RegNo);
+ MBlazeOperand *ParseImmediate();
+ MBlazeOperand *ParseFsl();
+ MBlazeOperand* ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "MBlazeGenAsmMatcher.inc"
+
+ /// }
+
+
+public:
+ MBlazeAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
+ : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
+
+ virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+/// MBlazeOperand - Instances of this class represent a parsed MBlaze machine
+/// instruction.
+struct MBlazeOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ Token,
+ Immediate,
+ Register,
+ Memory,
+ Fsl
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ union {
+ struct {
+ const char *Data;
+ unsigned Length;
+ } Tok;
+
+ struct {
+ unsigned RegNum;
+ } Reg;
+
+ struct {
+ const MCExpr *Val;
+ } Imm;
+
+ struct {
+ unsigned Base;
+ unsigned OffReg;
+ const MCExpr *Off;
+ } Mem;
+
+ struct {
+ const MCExpr *Val;
+ } FslImm;
+ };
+
+ MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ MBlazeOperand(const MBlazeOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ switch (Kind) {
+ case Register:
+ Reg = o.Reg;
+ break;
+ case Immediate:
+ Imm = o.Imm;
+ break;
+ case Token:
+ Tok = o.Tok;
+ break;
+ case Memory:
+ Mem = o.Mem;
+ break;
+ case Fsl:
+ FslImm = o.FslImm;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ unsigned getReg() const {
+ assert(Kind == Register && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ const MCExpr *getFslImm() const {
+ assert(Kind == Fsl && "Invalid access!");
+ return FslImm.Val;
+ }
+
+ unsigned getMemBase() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Base;
+ }
+
+ const MCExpr* getMemOff() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Off;
+ }
+
+ unsigned getMemOffReg() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.OffReg;
+ }
+
+ bool isToken() const { return Kind == Token; }
+ bool isImm() const { return Kind == Immediate; }
+ bool isMem() const { return Kind == Memory; }
+ bool isFsl() const { return Kind == Fsl; }
+ bool isReg() const { return Kind == Register; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (Expr == 0)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ void addFslOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getFslImm());
+ }
+
+ void addMemOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::CreateReg(getMemBase()));
+
+ unsigned RegOff = getMemOffReg();
+ if (RegOff)
+ Inst.addOperand(MCOperand::CreateReg(RegOff));
+ else
+ addExpr(Inst, getMemOff());
+ }
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ virtual void dump(raw_ostream &OS) const;
+
+ static MBlazeOperand *CreateToken(StringRef Str, SMLoc S) {
+ MBlazeOperand *Op = new MBlazeOperand(Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateFslImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Fsl);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateMem(unsigned Base, const MCExpr *Off, SMLoc S,
+ SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Memory);
+ Op->Mem.Base = Base;
+ Op->Mem.Off = Off;
+ Op->Mem.OffReg = 0;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static MBlazeOperand *CreateMem(unsigned Base, unsigned Off, SMLoc S,
+ SMLoc E) {
+ MBlazeOperand *Op = new MBlazeOperand(Memory);
+ Op->Mem.Base = Base;
+ Op->Mem.OffReg = Off;
+ Op->Mem.Off = 0;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+};
+
+} // end anonymous namespace.
+
+void MBlazeOperand::dump(raw_ostream &OS) const {
+ switch (Kind) {
+ case Immediate:
+ getImm()->print(OS);
+ break;
+ case Register:
+ OS << "<register R";
+ OS << MBlazeRegisterInfo::getRegisterNumbering(getReg()) << ">";
+ break;
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ case Memory: {
+ OS << "<memory R";
+ OS << MBlazeRegisterInfo::getRegisterNumbering(getMemBase());
+ OS << ", ";
+
+ unsigned RegOff = getMemOffReg();
+ if (RegOff)
+ OS << "R" << MBlazeRegisterInfo::getRegisterNumbering(RegOff);
+ else
+ OS << getMemOff();
+ OS << ">";
+ }
+ break;
+ case Fsl:
+ getFslImm()->print(OS);
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+//
+bool MBlazeAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out) {
+ MCInst Inst;
+ SMLoc ErrorLoc;
+ unsigned ErrorInfo;
+
+ switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
+ case Match_Success:
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "instruction use requires an option to be enabled");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_ConversionFail:
+ return Error(IDLoc, "unable to convert operands to instruction");
+ case Match_InvalidOperand:
+ ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((MBlazeOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+ return true;
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ if (Operands.size() != 4)
+ return 0;
+
+ MBlazeOperand &Base = *(MBlazeOperand*)Operands[2];
+ MBlazeOperand &Offset = *(MBlazeOperand*)Operands[3];
+
+ SMLoc S = Base.getStartLoc();
+ SMLoc O = Offset.getStartLoc();
+ SMLoc E = Offset.getEndLoc();
+
+ if (!Base.isReg()) {
+ Error(S, "base address must be a register");
+ return 0;
+ }
+
+ if (!Offset.isReg() && !Offset.isImm()) {
+ Error(O, "offset must be a register or immediate");
+ return 0;
+ }
+
+ MBlazeOperand *Op;
+ if (Offset.isReg())
+ Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getReg(), S, E);
+ else
+ Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getImm(), S, E);
+
+ delete Operands.pop_back_val();
+ delete Operands.pop_back_val();
+ Operands.push_back(Op);
+
+ return Op;
+}
+
+bool MBlazeAsmParser::ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc, SMLoc &EndLoc) {
+ return (ParseRegister(RegNo) == 0);
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseRegister(unsigned &RegNo) {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ switch (getLexer().getKind()) {
+ default: return 0;
+ case AsmToken::Identifier:
+ RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
+ if (RegNo == 0)
+ return 0;
+
+ getLexer().Lex();
+ return MBlazeOperand::CreateReg(RegNo, S, E);
+ }
+}
+
+static unsigned MatchFslRegister(StringRef String) {
+ if (!String.startswith("rfsl"))
+ return -1;
+
+ unsigned regNum;
+ if (String.substr(4).getAsInteger(10,regNum))
+ return -1;
+
+ return regNum;
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseFsl() {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ switch (getLexer().getKind()) {
+ default: return 0;
+ case AsmToken::Identifier:
+ unsigned reg = MatchFslRegister(getLexer().getTok().getIdentifier());
+ if (reg >= 16)
+ return 0;
+
+ getLexer().Lex();
+ const MCExpr *EVal = MCConstantExpr::Create(reg,getContext());
+ return MBlazeOperand::CreateFslImm(EVal,S,E);
+ }
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+ const MCExpr *EVal;
+ switch (getLexer().getKind()) {
+ default: return 0;
+ case AsmToken::LParen:
+ case AsmToken::Plus:
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ case AsmToken::Identifier:
+ if (getParser().ParseExpression(EVal))
+ return 0;
+
+ return MBlazeOperand::CreateImm(EVal, S, E);
+ }
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ MBlazeOperand *Op;
+
+ // Attempt to parse the next token as a register name
+ unsigned RegNo;
+ Op = ParseRegister(RegNo);
+
+ // Attempt to parse the next token as an FSL immediate
+ if (!Op)
+ Op = ParseFsl();
+
+ // Attempt to parse the next token as an immediate
+ if (!Op)
+ Op = ParseImmediate();
+
+ // If the token could not be parsed then fail
+ if (!Op) {
+ Error(Parser.getTok().getLoc(), "unknown operand");
+ return 0;
+ }
+
+ // Push the parsed operand into the list of operands
+ Operands.push_back(Op);
+ return Op;
+}
+
+/// Parse an mblaze instruction mnemonic followed by its operands.
+bool MBlazeAsmParser::
+ParseInstruction(StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // The first operands is the token for the instruction name
+ size_t dotLoc = Name.find('.');
+ Operands.push_back(MBlazeOperand::CreateToken(Name.substr(0,dotLoc),NameLoc));
+ if (dotLoc < Name.size())
+ Operands.push_back(MBlazeOperand::CreateToken(Name.substr(dotLoc),NameLoc));
+
+ // If there are no more operands then finish
+ if (getLexer().is(AsmToken::EndOfStatement))
+ return false;
+
+ // Parse the first operand
+ if (!ParseOperand(Operands))
+ return true;
+
+ while (getLexer().isNot(AsmToken::EndOfStatement) &&
+ getLexer().is(AsmToken::Comma)) {
+ // Consume the comma token
+ getLexer().Lex();
+
+ // Parse the next operand
+ if (!ParseOperand(Operands))
+ return true;
+ }
+
+ // If the instruction requires a memory operand then we need to
+ // replace the last two operands (base+offset) with a single
+ // memory operand.
+ if (Name.startswith("lw") || Name.startswith("sw") ||
+ Name.startswith("lh") || Name.startswith("sh") ||
+ Name.startswith("lb") || Name.startswith("sb"))
+ return (ParseMemory(Operands) == NULL);
+
+ return false;
+}
+
+/// ParseDirective parses the arm specific directives
+bool MBlazeAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+extern "C" void LLVMInitializeMBlazeAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmParser() {
+ RegisterAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
+ LLVMInitializeMBlazeAsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "MBlazeGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile b/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile
new file mode 100644
index 0000000..611a0f4
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmParser
+
+# Hack: we need to include 'main' MBlaze target directory for private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt
new file mode 100644
index 0000000..9376e68
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/CMakeLists.txt
@@ -0,0 +1,16 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeDisassembler
+ MBlazeDisassembler.cpp
+ )
+
+# workaround for hanging compilation on MSVC9 and 10
+if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+set_property(
+ SOURCE MBlazeDisassembler.cpp
+ PROPERTY COMPILE_FLAGS "/Od"
+ )
+endif()
+
+add_dependencies(LLVMMBlazeDisassembler MBlazeCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
new file mode 100644
index 0000000..3379ac2
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -0,0 +1,647 @@
+//===- MBlazeDisassembler.cpp - Disassembler for MicroBlaze ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It contains code to translate
+// the data produced by the decoder into MCInsts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeDisassembler.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+
+// #include "MBlazeGenDecoderTables.inc"
+// #include "MBlazeGenRegisterNames.inc"
+#include "MBlazeGenInstrInfo.inc"
+#include "MBlazeGenEDInfo.inc"
+
+using namespace llvm;
+
+const unsigned UNSUPPORTED = -1;
+
+static unsigned mblazeBinary2Opcode[] = {
+ MBlaze::ADD, MBlaze::RSUB, MBlaze::ADDC, MBlaze::RSUBC, //00,01,02,03
+ MBlaze::ADDK, MBlaze::RSUBK, MBlaze::ADDKC, MBlaze::RSUBKC, //04,05,06,07
+ MBlaze::ADDI, MBlaze::RSUBI, MBlaze::ADDIC, MBlaze::RSUBIC, //08,09,0A,0B
+ MBlaze::ADDIK, MBlaze::RSUBIK, MBlaze::ADDIKC, MBlaze::RSUBIKC, //0C,0D,0E,0F
+
+ MBlaze::MUL, MBlaze::BSRL, MBlaze::IDIV, MBlaze::GETD, //10,11,12,13
+ UNSUPPORTED, UNSUPPORTED, MBlaze::FADD, UNSUPPORTED, //14,15,16,17
+ MBlaze::MULI, MBlaze::BSRLI, UNSUPPORTED, MBlaze::GET, //18,19,1A,1B
+ UNSUPPORTED, UNSUPPORTED, UNSUPPORTED, UNSUPPORTED, //1C,1D,1E,1F
+
+ MBlaze::OR, MBlaze::AND, MBlaze::XOR, MBlaze::ANDN, //20,21,22,23
+ MBlaze::SEXT8, MBlaze::MFS, MBlaze::BR, MBlaze::BEQ, //24,25,26,27
+ MBlaze::ORI, MBlaze::ANDI, MBlaze::XORI, MBlaze::ANDNI, //28,29,2A,2B
+ MBlaze::IMM, MBlaze::RTSD, MBlaze::BRI, MBlaze::BEQI, //2C,2D,2E,2F
+
+ MBlaze::LBU, MBlaze::LHU, MBlaze::LW, UNSUPPORTED, //30,31,32,33
+ MBlaze::SB, MBlaze::SH, MBlaze::SW, UNSUPPORTED, //34,35,36,37
+ MBlaze::LBUI, MBlaze::LHUI, MBlaze::LWI, UNSUPPORTED, //38,39,3A,3B
+ MBlaze::SBI, MBlaze::SHI, MBlaze::SWI, UNSUPPORTED, //3C,3D,3E,3F
+};
+
+static unsigned getRD(uint32_t insn) {
+ return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F);
+}
+
+static unsigned getRA(uint32_t insn) {
+ return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F);
+}
+
+static unsigned getRB(uint32_t insn) {
+ return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F);
+}
+
+static int64_t getRS(uint32_t insn) {
+ return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF);
+}
+
+static int64_t getIMM(uint32_t insn) {
+ int16_t val = (insn & 0xFFFF);
+ return val;
+}
+
+static int64_t getSHT(uint32_t insn) {
+ int16_t val = (insn & 0x1F);
+ return val;
+}
+
+static unsigned getFLAGS(int32_t insn) {
+ return (insn & 0x7FF);
+}
+
+static int64_t getFSL(uint32_t insn) {
+ int16_t val = (insn & 0xF);
+ return val;
+}
+
+static unsigned decodeMUL(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0: return MBlaze::MUL;
+ case 1: return MBlaze::MULH;
+ case 2: return MBlaze::MULHSU;
+ case 3: return MBlaze::MULHU;
+ }
+}
+
+static unsigned decodeSEXT(uint32_t insn) {
+ switch (insn&0x7FF) {
+ default: return UNSUPPORTED;
+ case 0x60: return MBlaze::SEXT8;
+ case 0x68: return MBlaze::WIC;
+ case 0x64: return MBlaze::WDC;
+ case 0x66: return MBlaze::WDCC;
+ case 0x74: return MBlaze::WDCF;
+ case 0x61: return MBlaze::SEXT16;
+ case 0x41: return MBlaze::SRL;
+ case 0x21: return MBlaze::SRC;
+ case 0x01: return MBlaze::SRA;
+ }
+}
+
+static unsigned decodeBEQ(uint32_t insn) {
+ switch ((insn>>21)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BEQ;
+ case 0x10: return MBlaze::BEQD;
+ case 0x05: return MBlaze::BGE;
+ case 0x15: return MBlaze::BGED;
+ case 0x04: return MBlaze::BGT;
+ case 0x14: return MBlaze::BGTD;
+ case 0x03: return MBlaze::BLE;
+ case 0x13: return MBlaze::BLED;
+ case 0x02: return MBlaze::BLT;
+ case 0x12: return MBlaze::BLTD;
+ case 0x01: return MBlaze::BNE;
+ case 0x11: return MBlaze::BNED;
+ }
+}
+
+static unsigned decodeBEQI(uint32_t insn) {
+ switch ((insn>>21)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BEQI;
+ case 0x10: return MBlaze::BEQID;
+ case 0x05: return MBlaze::BGEI;
+ case 0x15: return MBlaze::BGEID;
+ case 0x04: return MBlaze::BGTI;
+ case 0x14: return MBlaze::BGTID;
+ case 0x03: return MBlaze::BLEI;
+ case 0x13: return MBlaze::BLEID;
+ case 0x02: return MBlaze::BLTI;
+ case 0x12: return MBlaze::BLTID;
+ case 0x01: return MBlaze::BNEI;
+ case 0x11: return MBlaze::BNEID;
+ }
+}
+
+static unsigned decodeBR(uint32_t insn) {
+ switch ((insn>>16)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BR;
+ case 0x08: return MBlaze::BRA;
+ case 0x0C: return MBlaze::BRK;
+ case 0x10: return MBlaze::BRD;
+ case 0x14: return MBlaze::BRLD;
+ case 0x18: return MBlaze::BRAD;
+ case 0x1C: return MBlaze::BRALD;
+ }
+}
+
+static unsigned decodeBRI(uint32_t insn) {
+ switch ((insn>>16)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::BRI;
+ case 0x08: return MBlaze::BRAI;
+ case 0x0C: return MBlaze::BRKI;
+ case 0x10: return MBlaze::BRID;
+ case 0x14: return MBlaze::BRLID;
+ case 0x18: return MBlaze::BRAID;
+ case 0x1C: return MBlaze::BRALID;
+ }
+}
+
+static unsigned decodeBSRL(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x2: return MBlaze::BSLL;
+ case 0x1: return MBlaze::BSRA;
+ case 0x0: return MBlaze::BSRL;
+ }
+}
+
+static unsigned decodeBSRLI(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x2: return MBlaze::BSLLI;
+ case 0x1: return MBlaze::BSRAI;
+ case 0x0: return MBlaze::BSRLI;
+ }
+}
+
+static unsigned decodeRSUBK(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::RSUBK;
+ case 0x1: return MBlaze::CMP;
+ case 0x3: return MBlaze::CMPU;
+ }
+}
+
+static unsigned decodeFADD(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::FADD;
+ case 0x080: return MBlaze::FRSUB;
+ case 0x100: return MBlaze::FMUL;
+ case 0x180: return MBlaze::FDIV;
+ case 0x200: return MBlaze::FCMP_UN;
+ case 0x210: return MBlaze::FCMP_LT;
+ case 0x220: return MBlaze::FCMP_EQ;
+ case 0x230: return MBlaze::FCMP_LE;
+ case 0x240: return MBlaze::FCMP_GT;
+ case 0x250: return MBlaze::FCMP_NE;
+ case 0x260: return MBlaze::FCMP_GE;
+ case 0x280: return MBlaze::FLT;
+ case 0x300: return MBlaze::FINT;
+ case 0x380: return MBlaze::FSQRT;
+ }
+}
+
+static unsigned decodeGET(uint32_t insn) {
+ switch ((insn>>10)&0x3F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::GET;
+ case 0x01: return MBlaze::EGET;
+ case 0x02: return MBlaze::AGET;
+ case 0x03: return MBlaze::EAGET;
+ case 0x04: return MBlaze::TGET;
+ case 0x05: return MBlaze::TEGET;
+ case 0x06: return MBlaze::TAGET;
+ case 0x07: return MBlaze::TEAGET;
+ case 0x08: return MBlaze::CGET;
+ case 0x09: return MBlaze::ECGET;
+ case 0x0A: return MBlaze::CAGET;
+ case 0x0B: return MBlaze::ECAGET;
+ case 0x0C: return MBlaze::TCGET;
+ case 0x0D: return MBlaze::TECGET;
+ case 0x0E: return MBlaze::TCAGET;
+ case 0x0F: return MBlaze::TECAGET;
+ case 0x10: return MBlaze::NGET;
+ case 0x11: return MBlaze::NEGET;
+ case 0x12: return MBlaze::NAGET;
+ case 0x13: return MBlaze::NEAGET;
+ case 0x14: return MBlaze::TNGET;
+ case 0x15: return MBlaze::TNEGET;
+ case 0x16: return MBlaze::TNAGET;
+ case 0x17: return MBlaze::TNEAGET;
+ case 0x18: return MBlaze::NCGET;
+ case 0x19: return MBlaze::NECGET;
+ case 0x1A: return MBlaze::NCAGET;
+ case 0x1B: return MBlaze::NECAGET;
+ case 0x1C: return MBlaze::TNCGET;
+ case 0x1D: return MBlaze::TNECGET;
+ case 0x1E: return MBlaze::TNCAGET;
+ case 0x1F: return MBlaze::TNECAGET;
+ case 0x20: return MBlaze::PUT;
+ case 0x22: return MBlaze::APUT;
+ case 0x24: return MBlaze::TPUT;
+ case 0x26: return MBlaze::TAPUT;
+ case 0x28: return MBlaze::CPUT;
+ case 0x2A: return MBlaze::CAPUT;
+ case 0x2C: return MBlaze::TCPUT;
+ case 0x2E: return MBlaze::TCAPUT;
+ case 0x30: return MBlaze::NPUT;
+ case 0x32: return MBlaze::NAPUT;
+ case 0x34: return MBlaze::TNPUT;
+ case 0x36: return MBlaze::TNAPUT;
+ case 0x38: return MBlaze::NCPUT;
+ case 0x3A: return MBlaze::NCAPUT;
+ case 0x3C: return MBlaze::TNCPUT;
+ case 0x3E: return MBlaze::TNCAPUT;
+ }
+}
+
+static unsigned decodeGETD(uint32_t insn) {
+ switch ((insn>>5)&0x3F) {
+ default: return UNSUPPORTED;
+ case 0x00: return MBlaze::GETD;
+ case 0x01: return MBlaze::EGETD;
+ case 0x02: return MBlaze::AGETD;
+ case 0x03: return MBlaze::EAGETD;
+ case 0x04: return MBlaze::TGETD;
+ case 0x05: return MBlaze::TEGETD;
+ case 0x06: return MBlaze::TAGETD;
+ case 0x07: return MBlaze::TEAGETD;
+ case 0x08: return MBlaze::CGETD;
+ case 0x09: return MBlaze::ECGETD;
+ case 0x0A: return MBlaze::CAGETD;
+ case 0x0B: return MBlaze::ECAGETD;
+ case 0x0C: return MBlaze::TCGETD;
+ case 0x0D: return MBlaze::TECGETD;
+ case 0x0E: return MBlaze::TCAGETD;
+ case 0x0F: return MBlaze::TECAGETD;
+ case 0x10: return MBlaze::NGETD;
+ case 0x11: return MBlaze::NEGETD;
+ case 0x12: return MBlaze::NAGETD;
+ case 0x13: return MBlaze::NEAGETD;
+ case 0x14: return MBlaze::TNGETD;
+ case 0x15: return MBlaze::TNEGETD;
+ case 0x16: return MBlaze::TNAGETD;
+ case 0x17: return MBlaze::TNEAGETD;
+ case 0x18: return MBlaze::NCGETD;
+ case 0x19: return MBlaze::NECGETD;
+ case 0x1A: return MBlaze::NCAGETD;
+ case 0x1B: return MBlaze::NECAGETD;
+ case 0x1C: return MBlaze::TNCGETD;
+ case 0x1D: return MBlaze::TNECGETD;
+ case 0x1E: return MBlaze::TNCAGETD;
+ case 0x1F: return MBlaze::TNECAGETD;
+ case 0x20: return MBlaze::PUTD;
+ case 0x22: return MBlaze::APUTD;
+ case 0x24: return MBlaze::TPUTD;
+ case 0x26: return MBlaze::TAPUTD;
+ case 0x28: return MBlaze::CPUTD;
+ case 0x2A: return MBlaze::CAPUTD;
+ case 0x2C: return MBlaze::TCPUTD;
+ case 0x2E: return MBlaze::TCAPUTD;
+ case 0x30: return MBlaze::NPUTD;
+ case 0x32: return MBlaze::NAPUTD;
+ case 0x34: return MBlaze::TNPUTD;
+ case 0x36: return MBlaze::TNAPUTD;
+ case 0x38: return MBlaze::NCPUTD;
+ case 0x3A: return MBlaze::NCAPUTD;
+ case 0x3C: return MBlaze::TNCPUTD;
+ case 0x3E: return MBlaze::TNCAPUTD;
+ }
+}
+
+static unsigned decodeIDIV(uint32_t insn) {
+ switch (insn&0x3) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::IDIV;
+ case 0x2: return MBlaze::IDIVU;
+ }
+}
+
+static unsigned decodeLBU(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::LBU;
+ case 0x1: return MBlaze::LBUR;
+ }
+}
+
+static unsigned decodeLHU(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::LHU;
+ case 0x1: return MBlaze::LHUR;
+ }
+}
+
+static unsigned decodeLW(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::LW;
+ case 0x1: return MBlaze::LWR;
+ case 0x2: return MBlaze::LWX;
+ }
+}
+
+static unsigned decodeSB(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::SB;
+ case 0x1: return MBlaze::SBR;
+ }
+}
+
+static unsigned decodeSH(uint32_t insn) {
+ switch ((insn>>9)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::SH;
+ case 0x1: return MBlaze::SHR;
+ }
+}
+
+static unsigned decodeSW(uint32_t insn) {
+ switch ((insn>>9)&0x3) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::SW;
+ case 0x1: return MBlaze::SWR;
+ case 0x2: return MBlaze::SWX;
+ }
+}
+
+static unsigned decodeMFS(uint32_t insn) {
+ switch ((insn>>15)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0:
+ switch ((insn>>16)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::MSRSET;
+ case 0x1: return MBlaze::MSRCLR;
+ }
+ case 0x1:
+ switch ((insn>>14)&0x1) {
+ default: return UNSUPPORTED;
+ case 0x0: return MBlaze::MFS;
+ case 0x1: return MBlaze::MTS;
+ }
+ }
+}
+
+static unsigned decodeOR(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::OR;
+ case 0x400: return MBlaze::PCMPBF;
+ }
+}
+
+static unsigned decodeXOR(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::XOR;
+ case 0x400: return MBlaze::PCMPEQ;
+ }
+}
+
+static unsigned decodeANDN(uint32_t insn) {
+ switch (getFLAGS(insn)) {
+ default: return UNSUPPORTED;
+ case 0x000: return MBlaze::ANDN;
+ case 0x400: return MBlaze::PCMPNE;
+ }
+}
+
+static unsigned decodeRTSD(uint32_t insn) {
+ switch ((insn>>21)&0x1F) {
+ default: return UNSUPPORTED;
+ case 0x10: return MBlaze::RTSD;
+ case 0x11: return MBlaze::RTID;
+ case 0x12: return MBlaze::RTBD;
+ case 0x14: return MBlaze::RTED;
+ }
+}
+
+static unsigned getOPCODE(uint32_t insn) {
+ unsigned opcode = mblazeBinary2Opcode[ (insn>>26)&0x3F ];
+ switch (opcode) {
+ case MBlaze::MUL: return decodeMUL(insn);
+ case MBlaze::SEXT8: return decodeSEXT(insn);
+ case MBlaze::BEQ: return decodeBEQ(insn);
+ case MBlaze::BEQI: return decodeBEQI(insn);
+ case MBlaze::BR: return decodeBR(insn);
+ case MBlaze::BRI: return decodeBRI(insn);
+ case MBlaze::BSRL: return decodeBSRL(insn);
+ case MBlaze::BSRLI: return decodeBSRLI(insn);
+ case MBlaze::RSUBK: return decodeRSUBK(insn);
+ case MBlaze::FADD: return decodeFADD(insn);
+ case MBlaze::GET: return decodeGET(insn);
+ case MBlaze::GETD: return decodeGETD(insn);
+ case MBlaze::IDIV: return decodeIDIV(insn);
+ case MBlaze::LBU: return decodeLBU(insn);
+ case MBlaze::LHU: return decodeLHU(insn);
+ case MBlaze::LW: return decodeLW(insn);
+ case MBlaze::SB: return decodeSB(insn);
+ case MBlaze::SH: return decodeSH(insn);
+ case MBlaze::SW: return decodeSW(insn);
+ case MBlaze::MFS: return decodeMFS(insn);
+ case MBlaze::OR: return decodeOR(insn);
+ case MBlaze::XOR: return decodeXOR(insn);
+ case MBlaze::ANDN: return decodeANDN(insn);
+ case MBlaze::RTSD: return decodeRTSD(insn);
+ default: return opcode;
+ }
+}
+
+EDInstInfo *MBlazeDisassembler::getEDInfo() const {
+ return instInfoMBlaze;
+}
+
+//
+// Public interface for the disassembler
+//
+
+bool MBlazeDisassembler::getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream) const {
+ // The machine instruction.
+ uint32_t insn;
+ uint8_t bytes[4];
+
+ // We always consume 4 bytes of data
+ size = 4;
+
+ // We want to read exactly 4 bytes of data.
+ if (region.readBytes(address, 4, (uint8_t*)bytes, NULL) == -1)
+ return false;
+
+ // Encoded as a big-endian 32-bit word in the stream.
+ insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0);
+
+ // Get the MCInst opcode from the binary instruction and make sure
+ // that it is a valid instruction.
+ unsigned opcode = getOPCODE(insn);
+ if (opcode == UNSUPPORTED)
+ return false;
+
+ instr.setOpcode(opcode);
+
+ uint64_t tsFlags = MBlazeInsts[opcode].TSFlags;
+ switch ((tsFlags & MBlazeII::FormMask)) {
+ default: llvm_unreachable("unknown instruction encoding");
+
+ case MBlazeII::FRRRR:
+ instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+ instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ break;
+
+ case MBlazeII::FRRR:
+ instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+ break;
+
+ case MBlazeII::FRI:
+ switch (opcode) {
+ default: llvm_unreachable("unknown instruction encoding");
+ case MBlaze::MFS:
+ instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+ break;
+ case MBlaze::MTS:
+ instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+ instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ break;
+ case MBlaze::MSRSET:
+ case MBlaze::MSRCLR:
+ instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ instr.addOperand(MCOperand::CreateImm(insn&0x7FFF));
+ break;
+ }
+ break;
+
+ case MBlazeII::FRRI:
+ instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ switch (opcode) {
+ default:
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+ case MBlaze::BSRLI:
+ case MBlaze::BSRAI:
+ case MBlaze::BSLLI:
+ instr.addOperand(MCOperand::CreateImm(insn&0x1F));
+ break;
+ }
+ break;
+
+ case MBlazeII::FCRR:
+ instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+ break;
+
+ case MBlazeII::FCRI:
+ instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+
+ case MBlazeII::FRCR:
+ instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+ break;
+
+ case MBlazeII::FRCI:
+ instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+
+ case MBlazeII::FCCR:
+ instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+ break;
+
+ case MBlazeII::FCCI:
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ break;
+
+ case MBlazeII::FRRCI:
+ instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ instr.addOperand(MCOperand::CreateImm(getSHT(insn)));
+ break;
+
+ case MBlazeII::FRRC:
+ instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ break;
+
+ case MBlazeII::FRCX:
+ instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+ break;
+
+ case MBlazeII::FRCS:
+ instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ instr.addOperand(MCOperand::CreateReg(getRS(insn)));
+ break;
+
+ case MBlazeII::FCRCS:
+ instr.addOperand(MCOperand::CreateReg(getRS(insn)));
+ instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ break;
+
+ case MBlazeII::FCRCX:
+ instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+ break;
+
+ case MBlazeII::FCX:
+ instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+ break;
+
+ case MBlazeII::FCR:
+ instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+ break;
+
+ case MBlazeII::FRIR:
+ instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+ instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ break;
+ }
+
+ return true;
+}
+
+static MCDisassembler *createMBlazeDisassembler(const Target &T) {
+ return new MBlazeDisassembler;
+}
+
+extern "C" void LLVMInitializeMBlazeDisassembler() {
+ // Register the disassembler.
+ TargetRegistry::RegisterMCDisassembler(TheMBlazeTarget,
+ createMBlazeDisassembler);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
new file mode 100644
index 0000000..d05eced
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -0,0 +1,55 @@
+//===- MBlazeDisassembler.h - Disassembler for MicroBlaze ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It it the header for
+// MBlazeDisassembler, a subclass of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEDISASSEMBLER_H
+#define MBLAZEDISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+struct InternalInstruction;
+
+namespace llvm {
+
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+struct EDInstInfo;
+
+/// MBlazeDisassembler - Disassembler for all MBlaze platforms.
+class MBlazeDisassembler : public MCDisassembler {
+public:
+ /// Constructor - Initializes the disassembler.
+ ///
+ MBlazeDisassembler() :
+ MCDisassembler() {
+ }
+
+ ~MBlazeDisassembler() {
+ }
+
+ /// getInstruction - See MCDisassembler.
+ bool getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream) const;
+
+ /// getEDInfo - See MCDisassembler.
+ EDInstInfo *getEDInfo() const;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile b/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile
new file mode 100644
index 0000000..0530b32
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/Disassembler/Makefile -------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeDisassembler
+
+# Hack: we need to include 'main' MBlaze target directory to grab headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000..242a573
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeAsmPrinter
+ MBlazeInstPrinter.cpp
+ )
+
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
new file mode 100644
index 0000000..a7fd287
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
@@ -0,0 +1,69 @@
+//===-- MBlazeInstPrinter.cpp - Convert MBlaze MCInst to assembly syntax --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MBlaze.h"
+#include "MBlazeInstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MBlazeGenAsmWriter.inc"
+
+void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+ printInstruction(MI, O);
+}
+
+void MBlazeInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
+ assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ O << getRegisterName(Op.getReg());
+ } else if (Op.isImm()) {
+ O << (int32_t)Op.getImm();
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+ }
+}
+
+void MBlazeInstPrinter::printFSLImm(const MCInst *MI, int OpNo,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ if (MO.isImm())
+ O << "rfsl" << MO.getImm();
+ else
+ printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printUnsignedImm(const MCInst *MI, int OpNo,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ if (MO.isImm())
+ O << (uint32_t)MO.getImm();
+ else
+ printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printMemOperand(const MCInst *MI, int OpNo,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, OpNo, O, NULL);
+ O << ", ";
+ printOperand(MI, OpNo+1, O, NULL);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
new file mode 100644
index 0000000..bebc6c8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -0,0 +1,43 @@
+//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTPRINTER_H
+#define MBLAZEINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+ class MCOperand;
+
+ class MBlazeInstPrinter : public MCInstPrinter {
+ public:
+ MBlazeInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {
+ }
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+ static const char *getInstructionName(unsigned Opcode);
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier = 0);
+ void printFSLImm(const MCInst *MI, int OpNo, raw_ostream &O);
+ void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, int OpNo,raw_ostream &O,
+ const char *Modifier = 0);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile b/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile
new file mode 100644
index 0000000..9fb6e86
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmPrinter
+
+# Hack: we need to include 'main' MBlaze target directory to grab
+# private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.h b/contrib/llvm/lib/Target/MBlaze/MBlaze.h
index f9d828b..00c73f0 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlaze.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.h
@@ -21,8 +21,16 @@ namespace llvm {
class MBlazeTargetMachine;
class FunctionPass;
class MachineCodeEmitter;
+ class MCCodeEmitter;
+ class TargetAsmBackend;
class formatted_raw_ostream;
+ MCCodeEmitter *createMBlazeMCCodeEmitter(const Target &,
+ TargetMachine &TM,
+ MCContext &Ctx);
+
+ TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &);
+
FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlaze.td b/contrib/llvm/lib/Target/MBlaze/MBlaze.td
index 3815b6d..1fa1e4d 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlaze.td
+++ b/contrib/llvm/lib/Target/MBlaze/MBlaze.td
@@ -32,35 +32,35 @@ def MBlazeInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
def FeaturePipe3 : SubtargetFeature<"pipe3", "HasPipe3", "true",
- "Implements 3-stage pipeline.">;
+ "Implements 3-stage pipeline">;
def FeatureBarrel : SubtargetFeature<"barrel", "HasBarrel", "true",
- "Implements barrel shifter.">;
+ "Implements barrel shifter">;
def FeatureDiv : SubtargetFeature<"div", "HasDiv", "true",
- "Implements hardware divider.">;
+ "Implements hardware divider">;
def FeatureMul : SubtargetFeature<"mul", "HasMul", "true",
- "Implements hardware multiplier.">;
+ "Implements hardware multiplier">;
def FeatureFSL : SubtargetFeature<"fsl", "HasFSL", "true",
- "Implements FSL instructions.">;
+ "Implements FSL instructions">;
def FeatureEFSL : SubtargetFeature<"efsl", "HasEFSL", "true",
- "Implements extended FSL instructions.">;
+ "Implements extended FSL instructions">;
def FeatureMSRSet : SubtargetFeature<"msrset", "HasMSRSet", "true",
- "Implements MSR register set and clear.">;
+ "Implements MSR register set and clear">;
def FeatureException : SubtargetFeature<"exception", "HasException", "true",
- "Implements hardware exception support.">;
+ "Implements hardware exception support">;
def FeaturePatCmp : SubtargetFeature<"patcmp", "HasPatCmp", "true",
- "Implements pattern compare instruction.">;
+ "Implements pattern compare instruction">;
def FeatureFPU : SubtargetFeature<"fpu", "HasFPU", "true",
- "Implements floating point unit.">;
+ "Implements floating point unit">;
def FeatureESR : SubtargetFeature<"esr", "HasESR", "true",
"Implements ESR and EAR registers">;
def FeaturePVR : SubtargetFeature<"pvr", "HasPVR", "true",
- "Implements processor version register.">;
+ "Implements processor version register">;
def FeatureMul64 : SubtargetFeature<"mul64", "HasMul64", "true",
"Implements multiplier with 64-bit result">;
def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true",
- "Implements sqrt and floating point convert.">;
+ "Implements sqrt and floating point convert">;
def FeatureMMU : SubtargetFeature<"mmu", "HasMMU", "true",
- "Implements memory management unit.">;
+ "Implements memory management unit">;
//===----------------------------------------------------------------------===//
// MBlaze processors supported.
@@ -69,13 +69,26 @@ def FeatureMMU : SubtargetFeature<"mmu", "HasMMU", "true",
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, MBlazeGenericItineraries, Features>;
-
def : Proc<"v400", []>;
def : Proc<"v500", []>;
def : Proc<"v600", []>;
def : Proc<"v700", []>;
def : Proc<"v710", []>;
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+def MBlazeAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
def MBlaze : Target {
let InstructionSet = MBlazeInstrInfo;
+ let AssemblyWriters = [MBlazeAsmWriter];
}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp
new file mode 100644
index 0000000..a4b21af
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmBackend.cpp
@@ -0,0 +1,163 @@
+//===-- MBlazeAsmBackend.cpp - MBlaze Assembler Backend -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "MBlaze.h"
+#include "MBlazeELFWriterInfo.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+static unsigned getFixupKindSize(unsigned Kind) {
+ switch (Kind) {
+ default: assert(0 && "invalid fixup kind!");
+ case FK_Data_1: return 1;
+ case FK_PCRel_2:
+ case FK_Data_2: return 2;
+ case FK_PCRel_4:
+ case FK_Data_4: return 4;
+ case FK_Data_8: return 8;
+ }
+}
+
+
+namespace {
+class MBlazeELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ MBlazeELFObjectWriter(Triple::OSType OSType)
+ : MCELFObjectTargetWriter(/*is64Bit*/ false, OSType, ELF::EM_MBLAZE,
+ /*HasRelocationAddend*/ true) {}
+};
+
+class MBlazeAsmBackend : public TargetAsmBackend {
+public:
+ MBlazeAsmBackend(const Target &T)
+ : TargetAsmBackend() {
+ }
+
+ unsigned getNumFixupKinds() const {
+ return 2;
+ }
+
+ bool MayNeedRelaxation(const MCInst &Inst) const;
+
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+ unsigned getPointerSize() const {
+ return 4;
+ }
+};
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+ switch (Op) {
+ default: return Op;
+ case MBlaze::ADDIK: return MBlaze::ADDIK32;
+ case MBlaze::ORI: return MBlaze::ORI32;
+ case MBlaze::BRLID: return MBlaze::BRLID32;
+ }
+}
+
+bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+ return false;
+
+ bool hasExprOrImm = false;
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
+ hasExprOrImm |= Inst.getOperand(i).isExpr();
+
+ return hasExprOrImm;
+}
+
+void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ Res = Inst;
+ Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
+}
+
+bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ if ((Count % 4) != 0)
+ return false;
+
+ for (uint64_t i = 0; i < Count; i += 4)
+ OW->Write32(0x00000000);
+
+ return true;
+}
+} // end anonymous namespace
+
+namespace {
+class ELFMBlazeAsmBackend : public MBlazeAsmBackend {
+public:
+ Triple::OSType OSType;
+ ELFMBlazeAsmBackend(const Target &T, Triple::OSType _OSType)
+ : MBlazeAsmBackend(T), OSType(_OSType) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createELFObjectWriter(new MBlazeELFObjectWriter(OSType), OS,
+ /*IsLittleEndian*/ false);
+ }
+};
+
+void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ unsigned Size = getFixupKindSize(Fixup.getKind());
+
+ assert(Fixup.getOffset() + Size <= DataSize &&
+ "Invalid fixup offset!");
+
+ char *data = Data + Fixup.getOffset();
+ switch (Size) {
+ default: llvm_unreachable("Cannot fixup unknown value.");
+ case 1: llvm_unreachable("Cannot fixup 1 byte value.");
+ case 8: llvm_unreachable("Cannot fixup 8 byte value.");
+
+ case 4:
+ *(data+7) = uint8_t(Value);
+ *(data+6) = uint8_t(Value >> 8);
+ *(data+3) = uint8_t(Value >> 16);
+ *(data+2) = uint8_t(Value >> 24);
+ break;
+
+ case 2:
+ *(data+3) = uint8_t(Value >> 0);
+ *(data+2) = uint8_t(Value >> 8);
+ }
+}
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T,
+ const std::string &TT) {
+ switch (Triple(TT).getOS()) {
+ case Triple::Darwin:
+ assert(0 && "Mac not supported on MBlaze");
+ case Triple::MinGW32:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ assert(0 && "Windows not supported on MBlaze");
+ default:
+ return new ELFMBlazeAsmBackend(T, Triple(TT).getOS());
+ }
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index f4b30ad..0016df5 100644
--- a/contrib/llvm/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -19,6 +19,8 @@
#include "MBlazeInstrInfo.h"
#include "MBlazeTargetMachine.h"
#include "MBlazeMachineFunction.h"
+#include "MBlazeMCInstLower.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
@@ -27,6 +29,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -57,6 +60,15 @@ namespace {
return "MBlaze Assembly Printer";
}
+ void printSavedRegsBitmask();
+ void emitFrameDirective();
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ virtual void EmitFunctionEntryLabel();
+
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const;
+
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O);
@@ -65,26 +77,12 @@ namespace {
void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = 0);
- void printSavedRegsBitmask(raw_ostream &OS);
- void emitFrameDirective();
-
- void printInstruction(const MachineInstr *MI, raw_ostream &O);
- void EmitInstruction(const MachineInstr *MI) {
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- printInstruction(MI, OS);
- OutStreamer.EmitRawText(OS.str());
- }
- virtual void EmitFunctionBodyStart();
- virtual void EmitFunctionBodyEnd();
- static const char *getRegisterName(unsigned RegNo);
-
- virtual void EmitFunctionEntryLabel();
+ void EmitInstruction(const MachineInstr *MI);
};
} // end of anonymous namespace
-#include "MBlazeGenAsmWriter.inc"
+// #include "MBlazeGenAsmWriter.inc"
//===----------------------------------------------------------------------===//
//
@@ -117,10 +115,6 @@ namespace {
//
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Mask directives
-//===----------------------------------------------------------------------===//
-
// Print a 32 bit hex number with all numbers.
static void printHex32(unsigned int Value, raw_ostream &O) {
O << "0x";
@@ -128,12 +122,11 @@ static void printHex32(unsigned int Value, raw_ostream &O) {
O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
}
-
// Create a bitmask with all callee saved registers for CPU or Floating Point
// registers. For CPU registers consider RA, GP and FP for saving if necessary.
-void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
+void MBlazeAsmPrinter::printSavedRegsBitmask() {
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const TargetRegisterInfo &RI = *TM.getRegisterInfo();
- const MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
// CPU Saved Registers Bitmasks
unsigned int CPUBitmask = 0;
@@ -144,12 +137,12 @@ void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg);
- if (MBlaze::CPURegsRegisterClass->contains(Reg))
+ if (MBlaze::GPRRegisterClass->contains(Reg))
CPUBitmask |= (1 << RegNum);
}
// Return Address and Frame registers must also be set in CPUBitmask.
- if (RI.hasFP(*MF))
+ if (TFI->hasFP(*MF))
CPUBitmask |= (1 << MBlazeRegisterInfo::
getRegisterNumbering(RI.getFrameRegister(*MF)));
@@ -158,48 +151,51 @@ void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
getRegisterNumbering(RI.getRARegister()));
// Print CPUBitmask
- O << "\t.mask \t"; printHex32(CPUBitmask, O);
- O << ',' << MBlazeFI->getCPUTopSavedRegOff() << '\n';
+ OutStreamer.EmitRawText("\t.mask\t0x" + Twine::utohexstr(CPUBitmask));
}
-//===----------------------------------------------------------------------===//
-// Frame and Set directives
-//===----------------------------------------------------------------------===//
-
/// Frame Directive
void MBlazeAsmPrinter::emitFrameDirective() {
- const TargetRegisterInfo &RI = *TM.getRegisterInfo();
-
- unsigned stackReg = RI.getFrameRegister(*MF);
- unsigned returnReg = RI.getRARegister();
- unsigned stackSize = MF->getFrameInfo()->getStackSize();
-
+ if (!OutStreamer.hasRawTextSupport())
+ return;
- OutStreamer.EmitRawText("\t.frame\t" + Twine(getRegisterName(stackReg)) +
- "," + Twine(stackSize) + "," +
- Twine(getRegisterName(returnReg)));
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ unsigned stkReg = RI.getFrameRegister(*MF);
+ unsigned retReg = RI.getRARegister();
+ unsigned stkSze = MF->getFrameInfo()->getStackSize();
+
+ OutStreamer.EmitRawText("\t.frame\t" +
+ Twine(MBlazeInstPrinter::getRegisterName(stkReg)) +
+ "," + Twine(stkSze) + "," +
+ Twine(MBlazeInstPrinter::getRegisterName(retReg)));
}
void MBlazeAsmPrinter::EmitFunctionEntryLabel() {
- OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
- OutStreamer.EmitLabel(CurrentFnSym);
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+ AsmPrinter::EmitFunctionEntryLabel();
}
-/// EmitFunctionBodyStart - Targets can override this to emit stuff before
-/// the first basic block in the function.
void MBlazeAsmPrinter::EmitFunctionBodyStart() {
+ if (!OutStreamer.hasRawTextSupport())
+ return;
+
emitFrameDirective();
-
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- printSavedRegsBitmask(OS);
- OutStreamer.EmitRawText(OS.str());
+ printSavedRegsBitmask();
}
-/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
-/// the last basic block in the function.
void MBlazeAsmPrinter::EmitFunctionBodyEnd() {
- OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+}
+
+//===----------------------------------------------------------------------===//
+void MBlazeAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MBlazeMCInstLower MCInstLowering(OutContext, *Mang, *this);
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ OutStreamer.EmitInstruction(TmpInst);
}
// Print out an operand for an inline asm expression.
@@ -220,11 +216,11 @@ void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
switch (MO.getType()) {
case MachineOperand::MO_Register:
- O << getRegisterName(MO.getReg());
+ O << MBlazeInstPrinter::getRegisterName(MO.getReg());
break;
case MachineOperand::MO_Immediate:
- O << (int)MO.getImm();
+ O << (int32_t)MO.getImm();
break;
case MachineOperand::MO_FPImmediate: {
@@ -248,7 +244,7 @@ void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
case MachineOperand::MO_JumpTableIndex:
O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
- << '_' << MO.getIndex();
+ << '_' << MO.getIndex();
break;
case MachineOperand::MO_ConstantPoolIndex:
@@ -267,7 +263,7 @@ void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(opNum);
if (MO.isImm())
- O << (unsigned int)MO.getImm();
+ O << (uint32_t)MO.getImm();
else
printOperand(MI, opNum, O);
}
@@ -284,12 +280,56 @@ void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum,
void MBlazeAsmPrinter::
printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier) {
- printOperand(MI, opNum+1, O);
- O << ", ";
printOperand(MI, opNum, O);
+ O << ", ";
+ printOperand(MI, opNum+1, O);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MBlazeAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Check if the last terminator is an unconditional branch.
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+ ; // Noop
+ return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI) {
+ if (SyntaxVariant == 0)
+ return new MBlazeInstPrinter(MAI);
+ return 0;
}
// Force static initialization.
extern "C" void LLVMInitializeMBlazeAsmPrinter() {
RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
+ TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget,
+ createMBlazeMCInstPrinter);
+
}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td b/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td
index 8622e0d..4962573 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -1,16 +1,16 @@
//===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
// This describes the calling conventions for MBlaze architecture.
//===----------------------------------------------------------------------===//
/// CCIfSubtarget - Match if the current subtarget has a feature F.
-class CCIfSubtarget<string F, CCAction A>:
+class CCIfSubtarget<string F, CCAction A>:
CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>;
//===----------------------------------------------------------------------===//
@@ -19,8 +19,10 @@ class CCIfSubtarget<string F, CCAction A>:
def RetCC_MBlaze : CallingConv<[
// i32 are returned in registers R3, R4
- CCIfType<[i32], CCAssignToReg<[R3, R4]>>,
+ CCIfType<[i32,f32], CCAssignToReg<[R3, R4]>>
+]>;
- // f32 are returned in registers F3, F4
- CCIfType<[f32], CCAssignToReg<[F3, F4]>>
+def CC_MBlaze : CallingConv<[
+ CCIfType<[i32,f32], CCCustom<"CC_MBlaze_AssignReg">>,
+ CCIfType<[i32,f32], CCAssignToStack<4, 4>>
]>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index b551b79..4399ee2 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// Simple pass to fills delay slots with NOPs.
+// A pass that attempts to fill instructions with delay slots. If no
+// instructions can be moved into the delay slot then a NOP is placed there.
//
//===----------------------------------------------------------------------===//
@@ -19,11 +20,23 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
STATISTIC(FilledSlots, "Number of delay slots filled");
+namespace llvm {
+cl::opt<bool> DisableDelaySlotFiller(
+ "disable-mblaze-delay-filler",
+ cl::init(false),
+ cl::desc("Disable the MBlaze delay slot filter."),
+ cl::Hidden);
+}
+
namespace {
struct Filler : public MachineFunctionPass {
@@ -31,7 +44,7 @@ namespace {
const TargetInstrInfo *TII;
static char ID;
- Filler(TargetMachine &tm)
+ Filler(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
virtual const char *getPassName() const {
@@ -51,6 +64,168 @@ namespace {
char Filler::ID = 0;
} // end of anonymous namespace
+static bool hasImmInstruction(MachineBasicBlock::iterator &candidate) {
+ // Any instruction with an immediate mode operand greater than
+ // 16-bits requires an implicit IMM instruction.
+ unsigned numOper = candidate->getNumOperands();
+ for (unsigned op = 0; op < numOper; ++op) {
+ MachineOperand &mop = candidate->getOperand(op);
+
+ // The operand requires more than 16-bits to represent.
+ if (mop.isImm() && (mop.getImm() < -0x8000 || mop.getImm() > 0x7fff))
+ return true;
+
+ // We must assume that unknown immediate values require more than
+ // 16-bits to represent.
+ if (mop.isGlobal() || mop.isSymbol())
+ return true;
+
+ // FIXME: we could probably check to see if the FP value happens
+ // to not need an IMM instruction. For now we just always
+ // assume that FP values do.
+ if (mop.isFPImm())
+ return true;
+ }
+
+ return false;
+}
+
+static unsigned getLastRealOperand(MachineBasicBlock::iterator &instr) {
+ switch (instr->getOpcode()) {
+ default: return instr->getNumOperands();
+
+ // These instructions have a variable number of operands but the first two
+ // are the "real" operands that we care about during hazard detection.
+ case MBlaze::BRLID:
+ case MBlaze::BRALID:
+ case MBlaze::BRLD:
+ case MBlaze::BRALD:
+ return 2;
+ }
+}
+
+static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
+ MachineBasicBlock::iterator &slot) {
+ // Hazard check
+ MachineBasicBlock::iterator a = candidate;
+ MachineBasicBlock::iterator b = slot;
+ TargetInstrDesc desc = candidate->getDesc();
+
+ // MBB layout:-
+ // candidate := a0 = operation(a1, a2)
+ // ...middle bit...
+ // slot := b0 = operation(b1, b2)
+
+ // Possible hazards:-/
+ // 1. a1 or a2 was written during the middle bit
+ // 2. a0 was read or written during the middle bit
+ // 3. a0 is one or more of {b0, b1, b2}
+ // 4. b0 is one or more of {a1, a2}
+ // 5. a accesses memory, and the middle bit
+ // contains a store operation.
+ bool a_is_memory = desc.mayLoad() || desc.mayStore();
+
+ // Determine the number of operands in the slot instruction and in the
+ // candidate instruction.
+ const unsigned aend = getLastRealOperand(a);
+ const unsigned bend = getLastRealOperand(b);
+
+ // Check hazards type 1, 2 and 5 by scanning the middle bit
+ MachineBasicBlock::iterator m = a;
+ for (++m; m != b; ++m) {
+ for (unsigned aop = 0; aop<aend; ++aop) {
+ bool aop_is_reg = a->getOperand(aop).isReg();
+ if (!aop_is_reg) continue;
+
+ bool aop_is_def = a->getOperand(aop).isDef();
+ unsigned aop_reg = a->getOperand(aop).getReg();
+
+ const unsigned mend = getLastRealOperand(m);
+ for (unsigned mop = 0; mop<mend; ++mop) {
+ bool mop_is_reg = m->getOperand(mop).isReg();
+ if (!mop_is_reg) continue;
+
+ bool mop_is_def = m->getOperand(mop).isDef();
+ unsigned mop_reg = m->getOperand(mop).getReg();
+
+ if (aop_is_def && (mop_reg == aop_reg))
+ return true; // Hazard type 2, because aop = a0
+ else if (mop_is_def && (mop_reg == aop_reg))
+ return true; // Hazard type 1, because aop in {a1, a2}
+ }
+ }
+
+ // Check hazard type 5
+ if (a_is_memory && m->getDesc().mayStore())
+ return true;
+ }
+
+ // Check hazard type 3 & 4
+ for (unsigned aop = 0; aop<aend; ++aop) {
+ if (a->getOperand(aop).isReg()) {
+ unsigned aop_reg = a->getOperand(aop).getReg();
+
+ for (unsigned bop = 0; bop<bend; ++bop) {
+ if (b->getOperand(bop).isReg() && !b->getOperand(bop).isImplicit()) {
+ unsigned bop_reg = b->getOperand(bop).getReg();
+ if (aop_reg == bop_reg)
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static bool isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate) {
+ if (candidate == MBB.begin())
+ return false;
+
+ TargetInstrDesc brdesc = (--candidate)->getDesc();
+ return (brdesc.hasDelaySlot());
+}
+
+static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) {
+ if (!I->hasUnmodeledSideEffects())
+ return false;
+
+ unsigned op = I->getOpcode();
+ if (op == MBlaze::ADDK || op == MBlaze::ADDIK ||
+ op == MBlaze::ADDC || op == MBlaze::ADDIC ||
+ op == MBlaze::ADDKC || op == MBlaze::ADDIKC ||
+ op == MBlaze::RSUBK || op == MBlaze::RSUBIK ||
+ op == MBlaze::RSUBC || op == MBlaze::RSUBIC ||
+ op == MBlaze::RSUBKC || op == MBlaze::RSUBIKC)
+ return false;
+
+ return true;
+}
+
+static MachineBasicBlock::iterator
+findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
+ MachineBasicBlock::iterator I = slot;
+ while (true) {
+ if (I == MBB.begin())
+ break;
+
+ --I;
+ TargetInstrDesc desc = I->getDesc();
+ if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
+ desc.isCall() || desc.isReturn() || desc.isBarrier() ||
+ hasUnknownSideEffects(I))
+ break;
+
+ if (hasImmInstruction(I) || delayHasHazard(I,slot))
+ continue;
+
+ return I;
+ }
+
+ return MBB.end();
+}
+
/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
/// Currently, we fill delay slots with NOPs. We assume there is only one
/// delay slot per delayed instruction.
@@ -58,11 +233,19 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
if (I->getDesc().hasDelaySlot()) {
+ MachineBasicBlock::iterator D = MBB.end();
MachineBasicBlock::iterator J = I;
- ++J;
- BuildMI(MBB, J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+
+ if (!DisableDelaySlotFiller)
+ D = findDelayInstr(MBB,I);
+
++FilledSlots;
Changed = true;
+
+ if (D == MBB.end())
+ BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+ else
+ MBB.splice(++J, &MBB, D);
}
return Changed;
}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
new file mode 100644
index 0000000..3f26ed1
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
@@ -0,0 +1,111 @@
+//===-- MBlazeELFWriterInfo.cpp - ELF Writer Info for the MBlaze backend --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the MBlaze backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeELFWriterInfo.h"
+#include "MBlazeRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Implementation of the MBlazeELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM)
+ : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+ TM.getTargetData()->isLittleEndian()) {
+}
+
+MBlazeELFWriterInfo::~MBlazeELFWriterInfo() {}
+
+unsigned MBlazeELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+ switch (MachineRelTy) {
+ case MBlaze::reloc_pcrel_word:
+ return ELF::R_MICROBLAZE_64_PCREL;
+ case MBlaze::reloc_absolute_word:
+ return ELF::R_MICROBLAZE_NONE;
+ default:
+ llvm_unreachable("unknown mblaze machine relocation type");
+ }
+ return 0;
+}
+
+long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier) const {
+ switch (RelTy) {
+ case ELF::R_MICROBLAZE_32_PCREL:
+ return Modifier - 4;
+ case ELF::R_MICROBLAZE_32:
+ return Modifier;
+ default:
+ llvm_unreachable("unknown mblaze relocation type");
+ }
+ return 0;
+}
+
+unsigned MBlazeELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+ // FIXME: Most of these sizes are guesses based on the name
+ switch (RelTy) {
+ case ELF::R_MICROBLAZE_32:
+ case ELF::R_MICROBLAZE_32_PCREL:
+ case ELF::R_MICROBLAZE_32_PCREL_LO:
+ case ELF::R_MICROBLAZE_32_LO:
+ case ELF::R_MICROBLAZE_SRO32:
+ case ELF::R_MICROBLAZE_SRW32:
+ case ELF::R_MICROBLAZE_32_SYM_OP_SYM:
+ case ELF::R_MICROBLAZE_GOTOFF_32:
+ return 32;
+
+ case ELF::R_MICROBLAZE_64_PCREL:
+ case ELF::R_MICROBLAZE_64:
+ case ELF::R_MICROBLAZE_GOTPC_64:
+ case ELF::R_MICROBLAZE_GOT_64:
+ case ELF::R_MICROBLAZE_PLT_64:
+ case ELF::R_MICROBLAZE_GOTOFF_64:
+ return 64;
+ }
+
+ return 0;
+}
+
+bool MBlazeELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+ // FIXME: Most of these are guesses based on the name
+ switch (RelTy) {
+ case ELF::R_MICROBLAZE_32_PCREL:
+ case ELF::R_MICROBLAZE_64_PCREL:
+ case ELF::R_MICROBLAZE_32_PCREL_LO:
+ case ELF::R_MICROBLAZE_GOTPC_64:
+ return true;
+ }
+
+ return false;
+}
+
+unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+ return MBlaze::reloc_absolute_word;
+}
+
+long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset,
+ unsigned RelOffset,
+ unsigned RelTy) const {
+ if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL)
+ return SymOffset - (RelOffset + 4);
+ else
+ assert("computeRelocation unknown for this relocation type");
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h
new file mode 100644
index 0000000..63bfc0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- MBlazeELFWriterInfo.h - ELF Writer Info for MBlaze ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the MBlaze backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_ELF_WRITER_INFO_H
+#define MBLAZE_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class MBlazeELFWriterInfo : public TargetELFWriterInfo {
+ public:
+ MBlazeELFWriterInfo(TargetMachine &TM);
+ virtual ~MBlazeELFWriterInfo();
+
+ /// getRelocationType - Returns the target specific ELF Relocation type.
+ /// 'MachineRelTy' contains the object code independent relocation type
+ virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+ /// hasRelocationAddend - True if the target uses an addend in the
+ /// ELF relocation entry.
+ virtual bool hasRelocationAddend() const { return false; }
+
+ /// getDefaultAddendForRelTy - Gets the default addend value for a
+ /// relocation entry based on the target ELF relocation type.
+ virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+ long int Modifier = 0) const;
+
+ /// getRelTySize - Returns the size of relocatable field in bits
+ virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+ /// isPCRelativeRel - True if the relocation type is pc relative
+ virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+ /// getJumpTableRelocationTy - Returns the machine relocation type used
+ /// to reference a jumptable.
+ virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+ /// computeRelocation - Some relocatable fields could be relocated
+ /// directly, avoiding the relocation symbol emission, compute the
+ /// final relocation value for this symbol.
+ virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+ unsigned RelTy) const;
+ };
+
+} // end llvm namespace
+
+#endif // MBLAZE_ELF_WRITER_INFO_H
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
new file mode 100644
index 0000000..e763902
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -0,0 +1,450 @@
+//=======- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-frame-lowering"
+
+#include "MBlazeFrameLowering.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace llvm {
+ cl::opt<bool> DisableStackAdjust(
+ "disable-mblaze-stack-adjust",
+ cl::init(false),
+ cl::desc("Disable MBlaze stack layout adjustment."),
+ cl::Hidden);
+}
+
+static void replaceFrameIndexes(MachineFunction &MF,
+ SmallVector<std::pair<int,int64_t>, 16> &FR) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ const SmallVector<std::pair<int,int64_t>, 16>::iterator FRB = FR.begin();
+ const SmallVector<std::pair<int,int64_t>, 16>::iterator FRE = FR.end();
+
+ SmallVector<std::pair<int,int64_t>, 16>::iterator FRI = FRB;
+ for (; FRI != FRE; ++FRI) {
+ MFI->RemoveStackObject(FRI->first);
+ int NFI = MFI->CreateFixedObject(4, FRI->second, true);
+ MBlazeFI->recordReplacement(FRI->first, NFI);
+
+ for (MachineFunction::iterator MB=MF.begin(), ME=MF.end(); MB!=ME; ++MB) {
+ MachineBasicBlock::iterator MBB = MB->begin();
+ const MachineBasicBlock::iterator MBE = MB->end();
+
+ for (; MBB != MBE; ++MBB) {
+ MachineInstr::mop_iterator MIB = MBB->operands_begin();
+ const MachineInstr::mop_iterator MIE = MBB->operands_end();
+
+ for (MachineInstr::mop_iterator MII = MIB; MII != MIE; ++MII) {
+ if (!MII->isFI() || MII->getIndex() != FRI->first) continue;
+ DEBUG(dbgs() << "FOUND FI#" << MII->getIndex() << "\n");
+ MII->setIndex(NFI);
+ }
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are are done through a positive offset
+// from the stack/frame pointer, so the stack is considered
+// to grow up.
+//
+//===----------------------------------------------------------------------===//
+
+static void analyzeFrameIndexes(MachineFunction &MF) {
+ if (DisableStackAdjust) return;
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ MachineRegisterInfo::livein_iterator LII = MRI.livein_begin();
+ MachineRegisterInfo::livein_iterator LIE = MRI.livein_end();
+ const SmallVector<int, 16> &LiveInFI = MBlazeFI->getLiveIn();
+ SmallVector<MachineInstr*, 16> EraseInstr;
+ SmallVector<std::pair<int,int64_t>, 16> FrameRelocate;
+
+ MachineBasicBlock *MBB = MF.getBlockNumbered(0);
+ MachineBasicBlock::iterator MIB = MBB->begin();
+ MachineBasicBlock::iterator MIE = MBB->end();
+
+ int StackAdjust = 0;
+ int StackOffset = -28;
+
+ // In this loop we are searching frame indexes that corrospond to incoming
+ // arguments that are already in the stack. We look for instruction sequences
+ // like the following:
+ //
+ // LWI REG, FI1, 0
+ // ...
+ // SWI REG, FI2, 0
+ //
+ // As long as there are no defs of REG in the ... part, we can eliminate
+ // the SWI instruction because the value has already been stored to the
+ // stack by the caller. All we need to do is locate FI at the correct
+ // stack location according to the calling convensions.
+ //
+ // Additionally, if the SWI operation kills the def of REG then we don't
+ // need the LWI operation so we can erase it as well.
+ for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i) {
+ for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+ if (I->getOpcode() != MBlaze::LWI || I->getNumOperands() != 3 ||
+ !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+ I->getOperand(1).getIndex() != LiveInFI[i]) continue;
+
+ unsigned FIReg = I->getOperand(0).getReg();
+ MachineBasicBlock::iterator SI = I;
+ for (SI++; SI != MIE; ++SI) {
+ if (!SI->getOperand(0).isReg() ||
+ !SI->getOperand(1).isFI() ||
+ SI->getOpcode() != MBlaze::SWI) continue;
+
+ int FI = SI->getOperand(1).getIndex();
+ if (SI->getOperand(0).getReg() != FIReg ||
+ MFI->isFixedObjectIndex(FI) ||
+ MFI->getObjectSize(FI) != 4) continue;
+
+ if (SI->getOperand(0).isDef()) break;
+
+ if (SI->getOperand(0).isKill()) {
+ DEBUG(dbgs() << "LWI for FI#" << I->getOperand(1).getIndex()
+ << " removed\n");
+ EraseInstr.push_back(I);
+ }
+
+ EraseInstr.push_back(SI);
+ DEBUG(dbgs() << "SWI for FI#" << FI << " removed\n");
+
+ FrameRelocate.push_back(std::make_pair(FI,StackOffset));
+ DEBUG(dbgs() << "FI#" << FI << " relocated to " << StackOffset << "\n");
+
+ StackOffset -= 4;
+ StackAdjust += 4;
+ break;
+ }
+ }
+ }
+
+ // In this loop we are searching for frame indexes that corrospond to
+ // incoming arguments that are in registers. We look for instruction
+ // sequences like the following:
+ //
+ // ... SWI REG, FI, 0
+ //
+ // As long as the ... part does not define REG and if REG is an incoming
+ // parameter register then we know that, according to ABI convensions, the
+ // caller has allocated stack space for it already. Instead of allocating
+ // stack space on our frame, we record the correct location in the callers
+ // frame.
+ for (MachineRegisterInfo::livein_iterator LI = LII; LI != LIE; ++LI) {
+ for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+ if (I->definesRegister(LI->first))
+ break;
+
+ if (I->getOpcode() != MBlaze::SWI || I->getNumOperands() != 3 ||
+ !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+ I->getOperand(1).getIndex() < 0) continue;
+
+ if (I->getOperand(0).getReg() == LI->first) {
+ int FI = I->getOperand(1).getIndex();
+ MBlazeFI->recordLiveIn(FI);
+
+ int FILoc = 0;
+ switch (LI->first) {
+ default: llvm_unreachable("invalid incoming parameter!");
+ case MBlaze::R5: FILoc = -4; break;
+ case MBlaze::R6: FILoc = -8; break;
+ case MBlaze::R7: FILoc = -12; break;
+ case MBlaze::R8: FILoc = -16; break;
+ case MBlaze::R9: FILoc = -20; break;
+ case MBlaze::R10: FILoc = -24; break;
+ }
+
+ StackAdjust += 4;
+ FrameRelocate.push_back(std::make_pair(FI,FILoc));
+ DEBUG(dbgs() << "FI#" << FI << " relocated to " << FILoc << "\n");
+ break;
+ }
+ }
+ }
+
+ // Go ahead and erase all of the instructions that we determined were
+ // no longer needed.
+ for (int i = 0, e = EraseInstr.size(); i < e; ++i)
+ MBB->erase(EraseInstr[i]);
+
+ // Replace all of the frame indexes that we have relocated with new
+ // fixed object frame indexes.
+ replaceFrameIndexes(MF, FrameRelocate);
+}
+
+static void interruptFrameLayout(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ llvm::CallingConv::ID CallConv = F->getCallingConv();
+
+ // If this function is not using either the interrupt_handler
+ // calling convention or the save_volatiles calling convention
+ // then we don't need to do any additional frame layout.
+ if (CallConv != llvm::CallingConv::MBLAZE_INTR &&
+ CallConv != llvm::CallingConv::MBLAZE_SVOL)
+ return;
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ // Determine if the calling convention is the interrupt_handler
+ // calling convention. Some pieces of the prologue and epilogue
+ // only need to be emitted if we are lowering and interrupt handler.
+ bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+ // Determine where to put prologue and epilogue additions
+ MachineBasicBlock &MENT = MF.front();
+ MachineBasicBlock &MEXT = MF.back();
+
+ MachineBasicBlock::iterator MENTI = MENT.begin();
+ MachineBasicBlock::iterator MEXTI = prior(MEXT.end());
+
+ DebugLoc ENTDL = MENTI != MENT.end() ? MENTI->getDebugLoc() : DebugLoc();
+ DebugLoc EXTDL = MEXTI != MEXT.end() ? MEXTI->getDebugLoc() : DebugLoc();
+
+ // Store the frame indexes generated during prologue additions for use
+ // when we are generating the epilogue additions.
+ SmallVector<int, 10> VFI;
+
+ // Build the prologue SWI for R3 - R12 if needed. Note that R11 must
+ // always have a SWI because it is used when processing RMSR.
+ for (unsigned r = MBlaze::R3; r <= MBlaze::R12; ++r) {
+ if (!MRI.isPhysRegUsed(r) && !(isIntr && r == MBlaze::R11)) continue;
+
+ int FI = MFI->CreateStackObject(4,4,false,false);
+ VFI.push_back(FI);
+
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), r)
+ .addFrameIndex(FI).addImm(0);
+ }
+
+ // Build the prologue SWI for R17, R18
+ int R17FI = MFI->CreateStackObject(4,4,false,false);
+ int R18FI = MFI->CreateStackObject(4,4,false,false);
+
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R17)
+ .addFrameIndex(R17FI).addImm(0);
+
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R18)
+ .addFrameIndex(R18FI).addImm(0);
+
+ // Buid the prologue SWI and the epilogue LWI for RMSR if needed
+ if (isIntr) {
+ int MSRFI = MFI->CreateStackObject(4,4,false,false);
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::MFS), MBlaze::R11)
+ .addReg(MBlaze::RMSR);
+ BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R11)
+ .addFrameIndex(MSRFI).addImm(0);
+
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R11)
+ .addFrameIndex(MSRFI).addImm(0);
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::MTS), MBlaze::RMSR)
+ .addReg(MBlaze::R11);
+ }
+
+ // Build the epilogue LWI for R17, R18
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R18)
+ .addFrameIndex(R18FI).addImm(0);
+
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R17)
+ .addFrameIndex(R17FI).addImm(0);
+
+ // Build the epilogue LWI for R3 - R12 if needed
+ for (unsigned r = MBlaze::R12, i = VFI.size(); r >= MBlaze::R3; --r) {
+ if (!MRI.isPhysRegUsed(r)) continue;
+ BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), r)
+ .addFrameIndex(VFI[--i]).addImm(0);
+ }
+}
+
+static void determineFrameLayout(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+ // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+ // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
+ // the approach done by calculateFrameObjectOffsets to the stack frame.
+ MBlazeFI->adjustLoadArgsFI(MFI);
+ MBlazeFI->adjustStoreVarArgsFI(MFI);
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+ DEBUG(dbgs() << "Original Frame Size: " << FrameSize << "\n" );
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ // unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+ MFI->setStackSize(FrameSize);
+ DEBUG(dbgs() << "Aligned Frame Size: " << FrameSize << "\n" );
+}
+
+int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI)
+ const {
+ const MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ if (MBlazeFI->hasReplacement(FI))
+ FI = MBlazeFI->getReplacement(FI);
+ return TargetFrameLowering::getFrameIndexOffset(MF,FI);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+ // Determine the correct frame layout
+ determineFrameLayout(MF);
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack() && !requiresRA) return;
+
+ int FPOffset = MBlazeFI->getFPStackOffset();
+ int RAOffset = MBlazeFI->getRAStackOffset();
+
+ // Adjust stack : addi R1, R1, -imm
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADDIK), MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(-StackSize);
+
+ // swi R15, R1, stack_loc
+ if (MFI->adjustsStack() || requiresRA) {
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+ .addReg(MBlaze::R15).addReg(MBlaze::R1).addImm(RAOffset);
+ }
+
+ if (hasFP(MF)) {
+ // swi R19, R1, stack_loc
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+ .addReg(MBlaze::R19).addReg(MBlaze::R1).addImm(FPOffset);
+
+ // add R19, R1, R0
+ BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADD), MBlaze::R19)
+ .addReg(MBlaze::R1).addReg(MBlaze::R0);
+ }
+}
+
+void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ const MBlazeInstrInfo &TII =
+ *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+ // Get the FI's where RA and FP are saved.
+ int FPOffset = MBlazeFI->getFPStackOffset();
+ int RAOffset = MBlazeFI->getRAStackOffset();
+
+ if (hasFP(MF)) {
+ // add R1, R19, R0
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1)
+ .addReg(MBlaze::R19).addReg(MBlaze::R0);
+
+ // lwi R19, R1, stack_loc
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19)
+ .addReg(MBlaze::R1).addImm(FPOffset);
+ }
+
+ // lwi R15, R1, stack_loc
+ if (MFI->adjustsStack() || requiresRA) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
+ .addReg(MBlaze::R1).addImm(RAOffset);
+ }
+
+ // Get the number of bytes from FrameInfo
+ int StackSize = (int) MFI->getStackSize();
+
+ // addi R1, R1, imm
+ if (StackSize) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDIK), MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(StackSize);
+ }
+}
+
+void MBlazeFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+ bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+ if (MFI->adjustsStack() || requiresRA) {
+ MBlazeFI->setRAStackOffset(0);
+ MFI->CreateFixedObject(4,0,true);
+ }
+
+ if (hasFP(MF)) {
+ MBlazeFI->setFPStackOffset(4);
+ MFI->CreateFixedObject(4,4,true);
+ }
+
+ interruptFrameLayout(MF);
+ analyzeFrameIndexes(MF);
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h
new file mode 100644
index 0000000..8be15bf
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -0,0 +1,53 @@
+//=- MBlazeFrameLowering.h - Define frame lowering for MicroBlaze -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_FRAMEINFO_H
+#define MBLAZE_FRAMEINFO_H
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MBlazeSubtarget;
+
+class MBlazeFrameLowering : public TargetFrameLowering {
+protected:
+ const MBlazeSubtarget &STI;
+
+public:
+ explicit MBlazeFrameLowering(const MBlazeSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 4, 0), STI(sti) {
+ }
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const { return true; }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+ virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index e64dd0e..6b43497 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -81,13 +81,9 @@ private:
SDNode *getGlobalBaseReg();
SDNode *Select(SDNode *N);
- // Complex Pattern.
- bool SelectAddr(SDNode *Op, SDValue N,
- SDValue &Base, SDValue &Offset);
-
// Address Selection
- bool SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index);
- bool SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base);
+ bool SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index);
+ bool SelectAddrRegImm(SDValue N, SDValue &Disp, SDValue &Base);
// getI32Imm - Return a target constant with the specified value, of type i32.
inline SDValue getI32Imm(unsigned Imm) {
@@ -122,7 +118,7 @@ static bool isIntS32Immediate(SDValue Op, int32_t &Imm) {
/// can be represented as an indexed [r+r] operation. Returns false if it
/// can be more efficiently represented with [r+imm].
bool MBlazeDAGToDAGISel::
-SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) {
+SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index) {
if (N.getOpcode() == ISD::FrameIndex) return false;
if (N.getOpcode() == ISD::TargetExternalSymbol ||
N.getOpcode() == ISD::TargetGlobalAddress)
@@ -137,8 +133,8 @@ SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) {
N.getOperand(1).getOpcode() == ISD::TargetJumpTable)
return false; // jump tables.
- Base = N.getOperand(1);
- Index = N.getOperand(0);
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
return true;
}
@@ -149,9 +145,9 @@ SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) {
/// a signed 32-bit displacement [r+imm], and if it is not better
/// represented as reg+reg.
bool MBlazeDAGToDAGISel::
-SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) {
+SelectAddrRegImm(SDValue N, SDValue &Base, SDValue &Disp) {
// If this can be more profitably realized as r+r, fail.
- if (SelectAddrRegReg(Op, N, Disp, Base))
+ if (SelectAddrRegReg(N, Base, Disp))
return false;
if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
@@ -163,7 +159,6 @@ SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) {
} else {
Base = N.getOperand(0);
}
- DEBUG( errs() << "WESLEY: Using Operand Immediate\n" );
return true; // [r+i]
}
} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
@@ -171,7 +166,6 @@ SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) {
uint32_t Imm = CN->getZExtValue();
Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0));
Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0));
- DEBUG( errs() << "WESLEY: Using Constant Node\n" );
return true;
}
@@ -190,76 +184,21 @@ SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() {
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
-/// ComplexPattern used on MBlazeInstrInfo
-/// Used on MBlaze Load/Store instructions
-bool MBlazeDAGToDAGISel::
-SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) {
- // if Address is FI, get the TargetFrameIndex.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
- }
-
- // on PIC code Load GA
- if (TM.getRelocationModel() == Reloc::PIC_) {
- if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
- (Addr.getOpcode() == ISD::TargetConstantPool) ||
- (Addr.getOpcode() == ISD::TargetJumpTable)){
- Base = CurDAG->getRegister(MBlaze::R15, MVT::i32);
- Offset = Addr;
- return true;
- }
- } else {
- if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress))
- return false;
- }
-
- // Operand is a result from an ADD.
- if (Addr.getOpcode() == ISD::ADD) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- if (isUInt<16>(CN->getZExtValue())) {
-
- // If the first operand is a FI, get the TargetFI Node
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
- (Addr.getOperand(0))) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- } else {
- Base = Addr.getOperand(0);
- }
-
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
- return true;
- }
- }
- }
-
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
-}
-
/// Select instructions not customized! Used for
/// expanded, promoted and normal instructions
SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
DebugLoc dl = Node->getDebugLoc();
- // Dump information about the Node being selected
- DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
-
// If we have a custom node, we already have selected!
- if (Node->isMachineOpcode()) {
- DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ if (Node->isMachineOpcode())
return NULL;
- }
///
// Instruction Selection not handled by the auto-generated
// tablegen selection should be handled here.
///
- switch(Opcode) {
+ switch (Opcode) {
default: break;
// Get target GOT address.
@@ -271,7 +210,7 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
EVT VT = Node->getValueType(0);
SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
- unsigned Opc = MBlaze::ADDI;
+ unsigned Opc = MBlaze::ADDIK;
if (Node->hasOneUse())
return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm);
return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm);
@@ -289,8 +228,8 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32);
SDValue InFlag(0, 0);
- if ( (isa<GlobalAddressSDNode>(Callee)) ||
- (isa<ExternalSymbolSDNode>(Callee)) )
+ if ((isa<GlobalAddressSDNode>(Callee)) ||
+ (isa<ExternalSymbolSDNode>(Callee)))
{
/// Direct call for global addresses and external symbols
SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32);
@@ -309,7 +248,7 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
// Emit Jump and Link Register
SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other,
- MVT::Flag, R20Reg, Chain);
+ MVT::Glue, R20Reg, Chain);
Chain = SDValue(ResNode, 0);
InFlag = SDValue(ResNode, 1);
ReplaceUses(SDValue(Node, 0), Chain);
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 1730b68..2f40bfc 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -35,6 +35,11 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
case MBlazeISD::JmpLink : return "MBlazeISD::JmpLink";
@@ -56,9 +61,9 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
setBooleanContents(ZeroOrOneBooleanContent);
// Set up the register classes
- addRegisterClass(MVT::i32, MBlaze::CPURegsRegisterClass);
+ addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass);
if (Subtarget->hasFPU()) {
- addRegisterClass(MVT::f32, MBlaze::FGR32RegisterClass);
+ addRegisterClass(MVT::f32, MBlaze::GPRRegisterClass);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
}
@@ -86,6 +91,10 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ // Sign extended loads must be expanded
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
// MBlaze has no REM or DIVREM operations.
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i32, Expand);
@@ -112,8 +121,8 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
}
// Expand unsupported conversions
- setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
- setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
// Expand SELECT_CC
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
@@ -166,7 +175,6 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
// Use the default for now
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
// MBlaze doesn't have extending float->double load/store
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
@@ -204,172 +212,353 @@ SDValue MBlazeTargetLowering::LowerOperation(SDValue Op,
//===----------------------------------------------------------------------===//
MachineBasicBlock*
MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
-
+ MachineBasicBlock *MBB)
+ const {
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
+
case MBlaze::ShiftRL:
case MBlaze::ShiftRA:
- case MBlaze::ShiftL: {
- // To "insert" a shift left instruction, we actually have to insert a
- // simple loop. The incoming instruction knows the destination vreg to
- // set, the source vreg to operate over and the shift amount.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // start:
- // andi samt, samt, 31
- // beqid samt, finish
- // add dst, src, r0
- // loop:
- // addik samt, samt, -1
- // sra dst, dst
- // bneid samt, loop
- // nop
- // finish:
- MachineFunction *F = BB->getParent();
- MachineRegisterInfo &R = F->getRegInfo();
- MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, loop);
- F->insert(It, finish);
-
- // Update machine-CFG edges by transfering adding all successors and
- // remaining instructions from the current block to the new block which
- // will contain the Phi node for the select.
- finish->splice(finish->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- finish->transferSuccessorsAndUpdatePHIs(BB);
-
- // Add the true and fallthrough blocks as its successors.
- BB->addSuccessor(loop);
- BB->addSuccessor(finish);
-
- // Next, add the finish block as a successor of the loop block
- loop->addSuccessor(finish);
- loop->addSuccessor(loop);
-
- unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
- BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT)
- .addReg(MI->getOperand(2).getReg())
- .addImm(31);
-
- unsigned IVAL = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
- BuildMI(BB, dl, TII->get(MBlaze::ADDI), IVAL)
- .addReg(MI->getOperand(1).getReg())
- .addImm(0);
-
- BuildMI(BB, dl, TII->get(MBlaze::BEQID))
- .addReg(IAMT)
- .addMBB(finish);
-
- unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
- unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
- BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
- .addReg(IVAL).addMBB(BB)
- .addReg(NDST).addMBB(loop);
-
- unsigned SAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
- unsigned NAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
- BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
- .addReg(IAMT).addMBB(BB)
- .addReg(NAMT).addMBB(loop);
-
- if (MI->getOpcode() == MBlaze::ShiftL)
- BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
- else if (MI->getOpcode() == MBlaze::ShiftRA)
- BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
- else if (MI->getOpcode() == MBlaze::ShiftRL)
- BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
- else
- llvm_unreachable( "Cannot lower unknown shift instruction" );
-
- BuildMI(loop, dl, TII->get(MBlaze::ADDI), NAMT)
- .addReg(SAMT)
- .addImm(-1);
-
- BuildMI(loop, dl, TII->get(MBlaze::BNEID))
- .addReg(NAMT)
- .addMBB(loop);
-
- BuildMI(*finish, finish->begin(), dl,
- TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
- .addReg(IVAL).addMBB(BB)
- .addReg(NDST).addMBB(loop);
-
- // The pseudo instruction is no longer needed so remove it
+ case MBlaze::ShiftL:
+ return EmitCustomShift(MI, MBB);
+
+ case MBlaze::Select_FCC:
+ case MBlaze::Select_CC:
+ return EmitCustomSelect(MI, MBB);
+
+ case MBlaze::CAS32:
+ case MBlaze::SWP32:
+ case MBlaze::LAA32:
+ case MBlaze::LAS32:
+ case MBlaze::LAD32:
+ case MBlaze::LAO32:
+ case MBlaze::LAX32:
+ case MBlaze::LAN32:
+ return EmitCustomAtomic(MI, MBB);
+
+ case MBlaze::MEMBARRIER:
+ // The Microblaze does not need memory barriers. Just delete the pseudo
+ // instruction and finish.
MI->eraseFromParent();
- return finish;
+ return MBB;
+ }
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // To "insert" a shift left instruction, we actually have to insert a
+ // simple loop. The incoming instruction knows the destination vreg to
+ // set, the source vreg to operate over and the shift amount.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ // start:
+ // andi samt, samt, 31
+ // beqid samt, finish
+ // add dst, src, r0
+ // loop:
+ // addik samt, samt, -1
+ // sra dst, dst
+ // bneid samt, loop
+ // nop
+ // finish:
+ MachineFunction *F = MBB->getParent();
+ MachineRegisterInfo &R = F->getRegInfo();
+ MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop);
+ F->insert(It, finish);
+
+ // Update machine-CFG edges by transfering adding all successors and
+ // remaining instructions from the current block to the new block which
+ // will contain the Phi node for the select.
+ finish->splice(finish->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ finish->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Add the true and fallthrough blocks as its successors.
+ MBB->addSuccessor(loop);
+ MBB->addSuccessor(finish);
+
+ // Next, add the finish block as a successor of the loop block
+ loop->addSuccessor(finish);
+ loop->addSuccessor(loop);
+
+ unsigned IAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(MBB, dl, TII->get(MBlaze::ANDI), IAMT)
+ .addReg(MI->getOperand(2).getReg())
+ .addImm(31);
+
+ unsigned IVAL = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(MBB, dl, TII->get(MBlaze::ADDIK), IVAL)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0);
+
+ BuildMI(MBB, dl, TII->get(MBlaze::BEQID))
+ .addReg(IAMT)
+ .addMBB(finish);
+
+ unsigned DST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ unsigned NDST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
+ .addReg(IVAL).addMBB(MBB)
+ .addReg(NDST).addMBB(loop);
+
+ unsigned SAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ unsigned NAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
+ .addReg(IAMT).addMBB(MBB)
+ .addReg(NAMT).addMBB(loop);
+
+ if (MI->getOpcode() == MBlaze::ShiftL)
+ BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
+ else if (MI->getOpcode() == MBlaze::ShiftRA)
+ BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
+ else if (MI->getOpcode() == MBlaze::ShiftRL)
+ BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
+ else
+ llvm_unreachable("Cannot lower unknown shift instruction");
+
+ BuildMI(loop, dl, TII->get(MBlaze::ADDIK), NAMT)
+ .addReg(SAMT)
+ .addImm(-1);
+
+ BuildMI(loop, dl, TII->get(MBlaze::BNEID))
+ .addReg(NAMT)
+ .addMBB(loop);
+
+ BuildMI(*finish, finish->begin(), dl,
+ TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ .addReg(IVAL).addMBB(MBB)
+ .addReg(NDST).addMBB(loop);
+
+ // The pseudo instruction is no longer needed so remove it
+ MI->eraseFromParent();
+ return finish;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomSelect(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ unsigned Opc;
+ switch (MI->getOperand(4).getImm()) {
+ default: llvm_unreachable("Unknown branch condition");
+ case MBlazeCC::EQ: Opc = MBlaze::BEQID; break;
+ case MBlazeCC::NE: Opc = MBlaze::BNEID; break;
+ case MBlazeCC::GT: Opc = MBlaze::BGTID; break;
+ case MBlazeCC::LT: Opc = MBlaze::BLTID; break;
+ case MBlazeCC::GE: Opc = MBlaze::BGEID; break;
+ case MBlazeCC::LE: Opc = MBlaze::BLEID; break;
+ }
+
+ F->insert(It, flsBB);
+ F->insert(It, dneBB);
+
+ // Transfer the remainder of MBB and its successor edges to dneBB.
+ dneBB->splice(dneBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ dneBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ MBB->addSuccessor(flsBB);
+ MBB->addSuccessor(dneBB);
+ flsBB->addSuccessor(dneBB);
+
+ BuildMI(MBB, dl, TII->get(Opc))
+ .addReg(MI->getOperand(3).getReg())
+ .addMBB(dneBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
+ // .addReg(MI->getOperand(2).getReg()).addMBB(BB);
+
+ BuildMI(*dneBB, dneBB->begin(), dl,
+ TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return dneBB;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // All atomic instructions on the Microblaze are implemented using the
+ // load-linked / store-conditional style atomic instruction sequences.
+ // Thus, all operations will look something like the following:
+ //
+ // start:
+ // lwx RV, RP, 0
+ // <do stuff>
+ // swx RV, RP, 0
+ // addic RC, R0, 0
+ // bneid RC, start
+ //
+ // exit:
+ //
+ // To "insert" a shift left instruction, we actually have to insert a
+ // simple loop. The incoming instruction knows the destination vreg to
+ // set, the source vreg to operate over and the shift amount.
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator It = MBB;
+ ++It;
+
+ // start:
+ // andi samt, samt, 31
+ // beqid samt, finish
+ // add dst, src, r0
+ // loop:
+ // addik samt, samt, -1
+ // sra dst, dst
+ // bneid samt, loop
+ // nop
+ // finish:
+ MachineFunction *F = MBB->getParent();
+ MachineRegisterInfo &R = F->getRegInfo();
+
+ // Create the start and exit basic blocks for the atomic operation
+ MachineBasicBlock *start = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exit = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, start);
+ F->insert(It, exit);
+
+ // Update machine-CFG edges by transfering adding all successors and
+ // remaining instructions from the current block to the new block which
+ // will contain the Phi node for the select.
+ exit->splice(exit->begin(), MBB, llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ exit->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Add the fallthrough block as its successors.
+ MBB->addSuccessor(start);
+
+ BuildMI(start, dl, TII->get(MBlaze::LWX), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MBlaze::R0);
+
+ MachineBasicBlock *final = start;
+ unsigned finalReg = 0;
+
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Cannot lower unknown atomic instruction!");
+
+ case MBlaze::SWP32:
+ finalReg = MI->getOperand(2).getReg();
+ start->addSuccessor(exit);
+ start->addSuccessor(start);
+ break;
+
+ case MBlaze::LAN32:
+ case MBlaze::LAX32:
+ case MBlaze::LAO32:
+ case MBlaze::LAD32:
+ case MBlaze::LAS32:
+ case MBlaze::LAA32: {
+ unsigned opcode = 0;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Cannot lower unknown atomic load!");
+ case MBlaze::LAA32: opcode = MBlaze::ADDIK; break;
+ case MBlaze::LAS32: opcode = MBlaze::RSUBIK; break;
+ case MBlaze::LAD32: opcode = MBlaze::AND; break;
+ case MBlaze::LAO32: opcode = MBlaze::OR; break;
+ case MBlaze::LAX32: opcode = MBlaze::XOR; break;
+ case MBlaze::LAN32: opcode = MBlaze::AND; break;
}
- case MBlaze::Select_FCC:
- case MBlaze::Select_CC: {
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
-
- unsigned Opc;
- switch (MI->getOperand(4).getImm()) {
- default: llvm_unreachable( "Unknown branch condition" );
- case MBlazeCC::EQ: Opc = MBlaze::BNEID; break;
- case MBlazeCC::NE: Opc = MBlaze::BEQID; break;
- case MBlazeCC::GT: Opc = MBlaze::BLEID; break;
- case MBlazeCC::LT: Opc = MBlaze::BGEID; break;
- case MBlazeCC::GE: Opc = MBlaze::BLTID; break;
- case MBlazeCC::LE: Opc = MBlaze::BGTID; break;
+ finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ start->addSuccessor(exit);
+ start->addSuccessor(start);
+
+ BuildMI(start, dl, TII->get(opcode), finalReg)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg());
+
+ if (MI->getOpcode() == MBlaze::LAN32) {
+ unsigned tmp = finalReg;
+ finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(start, dl, TII->get(MBlaze::XORI), finalReg)
+ .addReg(tmp)
+ .addImm(-1);
}
+ break;
+ }
+
+ case MBlaze::CAS32: {
+ finalReg = MI->getOperand(3).getReg();
+ final = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, final);
+ start->addSuccessor(exit);
+ start->addSuccessor(final);
+ final->addSuccessor(exit);
+ final->addSuccessor(start);
+
+ unsigned CMP = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(start, dl, TII->get(MBlaze::CMP), CMP)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg());
- F->insert(It, flsBB);
- F->insert(It, dneBB);
-
- // Transfer the remainder of BB and its successor edges to dneBB.
- dneBB->splice(dneBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- dneBB->transferSuccessorsAndUpdatePHIs(BB);
-
- BB->addSuccessor(flsBB);
- BB->addSuccessor(dneBB);
- flsBB->addSuccessor(dneBB);
-
- BuildMI(BB, dl, TII->get(Opc))
- .addReg(MI->getOperand(3).getReg())
- .addMBB(dneBB);
-
- // sinkMBB:
- // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
- // ...
- //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
- // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
- // .addReg(MI->getOperand(2).getReg()).addMBB(BB);
-
- BuildMI(*dneBB, dneBB->begin(), dl,
- TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
- .addReg(MI->getOperand(1).getReg()).addMBB(BB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return dneBB;
+ BuildMI(start, dl, TII->get(MBlaze::BNEID))
+ .addReg(CMP)
+ .addMBB(exit);
+
+ final->moveAfter(start);
+ exit->moveAfter(final);
+ break;
}
}
+
+ unsigned CHK = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+ BuildMI(final, dl, TII->get(MBlaze::SWX))
+ .addReg(finalReg)
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MBlaze::R0);
+
+ BuildMI(final, dl, TII->get(MBlaze::ADDIC), CHK)
+ .addReg(MBlaze::R0)
+ .addImm(0);
+
+ BuildMI(final, dl, TII->get(MBlaze::BNEID))
+ .addReg(CHK)
+ .addMBB(start);
+
+ // The pseudo instruction is no longer needed so remove it
+ MI->eraseFromParent();
+ return exit;
}
//===----------------------------------------------------------------------===//
@@ -392,9 +581,9 @@ SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op,
CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS)
.getValue(1);
} else {
- llvm_unreachable( "Cannot lower select_cc with unknown type" );
+ llvm_unreachable("Cannot lower select_cc with unknown type");
}
-
+
return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
CompareFlag);
}
@@ -421,15 +610,12 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
SDValue HiPart;
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
- bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
- unsigned char OpFlag = IsPIC ? MBlazeII::MO_GOT : MBlazeII::MO_ABS_HILO;
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 0);
return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI);
- //return JTI;
}
SDValue MBlazeTargetLowering::
@@ -440,7 +626,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
- N->getOffset(), MBlazeII::MO_ABS_HILO);
+ N->getOffset(), 0);
return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
}
@@ -456,7 +642,8 @@ SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op,
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0,
+ return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+ MachinePointerInfo(SV),
false, false, 0);
}
@@ -466,52 +653,24 @@ SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op,
#include "MBlazeGenCallingConv.inc"
-static bool CC_MBlaze2(unsigned ValNo, EVT ValVT,
- EVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- static const unsigned RegsSize=6;
- static const unsigned IntRegs[] = {
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned ArgRegs[] = {
MBlaze::R5, MBlaze::R6, MBlaze::R7,
MBlaze::R8, MBlaze::R9, MBlaze::R10
};
- static const unsigned FltRegs[] = {
- MBlaze::F5, MBlaze::F6, MBlaze::F7,
- MBlaze::F8, MBlaze::F9, MBlaze::F10
- };
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+ unsigned Reg = State.AllocateReg(ArgRegs, NumArgRegs);
+ if (!Reg) return false;
- unsigned Reg=0;
-
- // Promote i8 and i16
- if (LocVT == MVT::i8 || LocVT == MVT::i16) {
- LocVT = MVT::i32;
- if (ArgFlags.isSExt())
- LocInfo = CCValAssign::SExt;
- else if (ArgFlags.isZExt())
- LocInfo = CCValAssign::ZExt;
- else
- LocInfo = CCValAssign::AExt;
- }
-
- if (ValVT == MVT::i32) {
- Reg = State.AllocateReg(IntRegs, RegsSize);
- LocVT = MVT::i32;
- } else if (ValVT == MVT::f32) {
- Reg = State.AllocateReg(FltRegs, RegsSize);
- LocVT = MVT::f32;
- }
+ unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+ State.AllocateStack(SizeInBytes, SizeInBytes);
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- if (!Reg) {
- unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
- unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- } else {
- unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
- State.AllocateStack(SizeInBytes, SizeInBytes);
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- }
-
- return false; // CC must always match
+ return true;
}
//===----------------------------------------------------------------------===//
@@ -532,31 +691,35 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
// MBlaze does not yet support tail call optimization
isTailCall = false;
+ // The MBlaze requires stack slots for arguments passed to var arg
+ // functions even if they are passed in registers.
+ bool needsRegArgSlots = isVarArg;
+
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
*DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze2);
+ CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // Variable argument function calls require a minimum of 24-bytes of stack
+ if (isVarArg && NumBytes < 24) NumBytes = 24;
+
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
- // First/LastArgStackLoc contains the first/last
- // "at stack" argument location.
- int LastArgStackLoc = 0;
- unsigned FirstStackArgLoc = 0;
-
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
+ MVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[i];
// Promote the value if needed.
@@ -582,20 +745,31 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
// Register can't get to this point...
assert(VA.isMemLoc());
+ // Since we are alread passing values on the stack we don't
+ // need to worry about creating additional slots for the
+ // values passed via registers.
+ needsRegArgSlots = false;
+
// Create the frame index object for this incoming parameter
- LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
- int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
- LastArgStackLoc, true);
+ unsigned ArgSize = VA.getValVT().getSizeInBits()/8;
+ unsigned StackLoc = VA.getLocMemOffset() + 4;
+ int FI = MFI->CreateFixedObject(ArgSize, StackLoc, true);
SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
// emit ISD::STORE whichs stores the
// parameter value to a stack Location
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
false, false, 0));
}
}
+ // If we need to reserve stack space for the arguments passed via registers
+ // then create a fixed stack object at the beginning of the stack.
+ if (needsRegArgSlots && TFI.hasReservedCallFrame(MF))
+ MFI->CreateFixedObject(28,0,true);
+
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
if (!MemOpChains.empty())
@@ -616,19 +790,18 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
- unsigned char OpFlag = MBlazeII::MO_NO_FLAG;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- getPointerTy(), 0, OpFlag);
+ getPointerTy(), 0, 0);
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
- getPointerTy(), OpFlag);
+ getPointerTy(), 0);
// MBlazeJmpLink = #chain, #target_address, #opt_in_flags...
// = Chain, Callee, Reg#1, Reg#2, ...
//
// Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
@@ -678,7 +851,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
RVLocs[i].getValVT(), InFlag).getValue(1);
InFlag = Chain.getValue(2);
InVals.push_back(Chain.getValue(0));
- }
+ }
return Chain;
}
@@ -713,30 +886,28 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze2);
+ CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze);
SDValue StackPtr;
- unsigned FirstStackArgLoc = 0;
-
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
// Arguments stored on registers
if (VA.isRegLoc()) {
- EVT RegVT = VA.getLocVT();
+ MVT RegVT = VA.getLocVT();
ArgRegEnd = VA.getLocReg();
TargetRegisterClass *RC = 0;
if (RegVT == MVT::i32)
- RC = MBlaze::CPURegsRegisterClass;
+ RC = MBlaze::GPRRegisterClass;
else if (RegVT == MVT::f32)
- RC = MBlaze::FGR32RegisterClass;
+ RC = MBlaze::GPRRegisterClass;
else
llvm_unreachable("RegVT not supported by LowerFormalArguments");
// Transform the arguments stored on
// physical registers into virtual ones
- unsigned Reg = MF.addLiveIn(ArgRegEnd, RC);
+ unsigned Reg = MF.addLiveIn(ArgRegEnd, RC, dl);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
// If this is an 8 or 16-bit value, it has been passed promoted
@@ -756,9 +927,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
}
InVals.push_back(ArgValue);
-
} else { // VA.isRegLoc()
-
// sanity check
assert(VA.isMemLoc());
@@ -774,41 +943,44 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// offset on PEI::calculateFrameObjectOffsets.
// Arguments are always 32-bit.
unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ unsigned StackLoc = VA.getLocMemOffset() + 4;
int FI = MFI->CreateFixedObject(ArgSize, 0, true);
- MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+
- (FirstStackArgLoc + VA.getLocMemOffset())));
+ MBlazeFI->recordLoadArgsFI(FI, -StackLoc);
+ MBlazeFI->recordLiveIn(FI);
// Create load nodes to retrieve arguments from the stack
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
// To meet ABI, when VARARGS are passed on registers, the registers
// must have their values written to the caller stack frame. If the last
- // argument was placed in the stack, there's no need to save any register.
+ // argument was placed in the stack, there's no need to save any register.
if ((isVarArg) && ArgRegEnd) {
if (StackPtr.getNode() == 0)
StackPtr = DAG.getRegister(StackReg, getPointerTy());
// The last register argument that must be saved is MBlaze::R10
- TargetRegisterClass *RC = MBlaze::CPURegsRegisterClass;
+ TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5);
unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1);
unsigned End = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10);
- unsigned StackLoc = ArgLocs.size()-1 + (Start - Begin);
+ unsigned StackLoc = Start - Begin + 1;
for (; Start <= End; ++Start, ++StackLoc) {
unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start);
- unsigned LiveReg = MF.addLiveIn(Reg, RC);
+ unsigned LiveReg = MF.addLiveIn(Reg, RC, dl);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
int FI = MFI->CreateFixedObject(4, 0, true);
- MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
+ MBlazeFI->recordStoreVarArgsFI(FI, -(StackLoc*4));
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
- OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+ OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+ MachinePointerInfo(),
false, false, 0));
// Record the frame index of the first variable argument
@@ -818,7 +990,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
}
}
- // All stores are grouped in one node to allow the matching between
+ // All stores are grouped in one node to allow the matching between
// the size of Ins and InVals. This only happens when on varg functions
if (!OutChains.empty()) {
OutChains.push_back(Chain);
@@ -872,13 +1044,18 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
Flag = Chain.getValue(1);
}
- // Return on MBlaze is always a "rtsd R15, 8"
+ // If this function is using the interrupt_handler calling convention
+ // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
+ unsigned Ret = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
+ : MBlazeISD::Ret;
+ unsigned Reg = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlaze::R14
+ : MBlaze::R15;
+ SDValue DReg = DAG.getRegister(Reg, MVT::i32);
+
if (Flag.getNode())
- return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other,
- Chain, DAG.getRegister(MBlaze::R15, MVT::i32), Flag);
- else // Return Void
- return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other,
- Chain, DAG.getRegister(MBlaze::R15, MVT::i32));
+ return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg, Flag);
+
+ return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg);
}
//===----------------------------------------------------------------------===//
@@ -909,6 +1086,37 @@ getConstraintType(const std::string &Constraint) const
return TargetLowering::getConstraintType(Constraint);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MBlazeTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'd':
+ case 'y':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
/// return a list of registers that can be used to satisfy the constraint.
/// This should only be used for C_RegisterClass constraints.
@@ -917,10 +1125,10 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
- return std::make_pair(0U, MBlaze::CPURegsRegisterClass);
+ return std::make_pair(0U, MBlaze::GPRRegisterClass);
case 'f':
if (VT == MVT::f32)
- return std::make_pair(0U, MBlaze::FGR32RegisterClass);
+ return std::make_pair(0U, MBlaze::GPRRegisterClass);
}
}
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
@@ -940,6 +1148,7 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
// GCC MBlaze Constraint Letters
case 'd':
case 'y':
+ case 'f':
return make_vector<unsigned>(
MBlaze::R3, MBlaze::R4, MBlaze::R5, MBlaze::R6,
MBlaze::R7, MBlaze::R9, MBlaze::R10, MBlaze::R11,
@@ -947,15 +1156,6 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25,
MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29,
MBlaze::R30, MBlaze::R31, 0);
-
- case 'f':
- return make_vector<unsigned>(
- MBlaze::F3, MBlaze::F4, MBlaze::F5, MBlaze::F6,
- MBlaze::F7, MBlaze::F9, MBlaze::F10, MBlaze::F11,
- MBlaze::F12, MBlaze::F19, MBlaze::F20, MBlaze::F21,
- MBlaze::F22, MBlaze::F23, MBlaze::F24, MBlaze::F25,
- MBlaze::F26, MBlaze::F27, MBlaze::F28, MBlaze::F29,
- MBlaze::F30, MBlaze::F31, 0);
}
return std::vector<unsigned>();
}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
index 5ec2563..91649bc 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -15,6 +15,7 @@
#ifndef MBlazeISELLOWERING_H
#define MBlazeISELLOWERING_H
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
#include "MBlaze.h"
@@ -31,6 +32,30 @@ namespace llvm {
GE,
LE
};
+
+ inline static CC getOppositeCondition(CC cc) {
+ switch (cc) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case GT: return LE;
+ case LT: return GE;
+ case GE: return LT;
+ case LE: return GE;
+ }
+ }
+
+ inline static const char *MBlazeCCToString(CC cc) {
+ switch (cc) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return "eq";
+ case NE: return "ne";
+ case GT: return "gt";
+ case LT: return "lt";
+ case GE: return "ge";
+ case LE: return "le";
+ }
+ }
}
namespace MBlazeISD {
@@ -53,8 +78,11 @@ namespace llvm {
// Integer Compare
ICmp,
- // Return
- Ret
+ // Return from subroutine
+ Ret,
+
+ // Return from interrupt
+ IRet
};
}
@@ -121,6 +149,15 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+ virtual MachineBasicBlock*
+ EmitCustomShift(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual MachineBasicBlock*
+ EmitCustomSelect(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ virtual MachineBasicBlock*
+ EmitCustomAtomic(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
virtual MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const;
@@ -128,6 +165,11 @@ namespace llvm {
// Inline asm support
ConstraintType getConstraintType(const std::string &Constraint) const;
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td
index 657b1d4..094de5c 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -19,72 +19,72 @@
// Memory Access Instructions
//===----------------------------------------------------------------------===//
class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
- TA<op, 0x000, (outs FGR32:$dst), (ins memrr:$addr),
+ TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr),
!strconcat(instr_asm, " $dst, $addr"),
- [(set FGR32:$dst, (OpNode xaddr:$addr))], IILoad>;
+ [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IILoad>;
class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
- TAI<op, (outs FGR32:$dst), (ins memri:$addr),
- !strconcat(instr_asm, " $dst, $addr"),
- [(set FGR32:$dst, (OpNode iaddr:$addr))], IILoad>;
+ TB<op, (outs GPR:$dst), (ins memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>;
class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
- TA<op, 0x000, (outs), (ins FGR32:$dst, memrr:$addr),
+ TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr),
!strconcat(instr_asm, " $dst, $addr"),
- [(OpNode FGR32:$dst, xaddr:$addr)], IIStore>;
+ [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIStore>;
class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
- TAI<op, (outs), (ins FGR32:$dst, memrr:$addr),
- !strconcat(instr_asm, " $dst, $addr"),
- [(OpNode FGR32:$dst, iaddr:$addr)], IIStore>;
+ TB<op, (outs), (ins GPR:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIStore>;
class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
InstrItinClass itin> :
- TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
- [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>;
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
class CmpFN<bits<6> op, bits<11> flags, string instr_asm,
InstrItinClass itin> :
- TA<op, flags, (outs CPURegs:$dst), (ins FGR32:$b, FGR32:$c),
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
[], itin>;
class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
InstrItinClass itin> :
- TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
- !strconcat(instr_asm, " $dst, $c, $b"),
- [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>;
-
-class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
- InstrItinClass itin> :
- TF<op, flags, (outs FGR32:$dst), (ins FGR32:$b),
- !strconcat(instr_asm, " $dst, $b"),
- [], itin>;
-
-class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
- InstrItinClass itin> :
- TF<op, flags, (outs FGR32:$dst), (ins CPURegs:$b),
- !strconcat(instr_asm, " $dst, $b"),
- [], itin>;
-
-class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
- InstrItinClass itin> :
- TF<op, flags, (outs CPURegs:$dst), (ins FGR32:$b),
- !strconcat(instr_asm, " $dst, $b"),
- [], itin>;
+ TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
class LogicF<bits<6> op, string instr_asm> :
- TAI<op, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
- !strconcat(instr_asm, " $dst, $b, $c"),
- [],
- IIAlu>;
+ TB<op, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIAlu>;
class LogicFI<bits<6> op, string instr_asm> :
- TAI<op, (outs FGR32:$dst), (ins FGR32:$b, fimm:$c),
- !strconcat(instr_asm, " $dst, $b, $c"),
- [],
- IIAlu>;
+ TB<op, (outs GPR:$dst), (ins GPR:$b, fimm:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIAlu>;
+
+let rb=0 in {
+ class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+ class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+ class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+}
//===----------------------------------------------------------------------===//
// Pseudo instructions
@@ -94,24 +94,25 @@ class LogicFI<bits<6> op, string instr_asm> :
// FPU Arithmetic Instructions
//===----------------------------------------------------------------------===//
let Predicates=[HasFPU] in {
- def FOR : LogicF<0x28, "or ">;
def FORI : LogicFI<0x28, "ori ">;
def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIAlu>;
def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIAlu>;
def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIAlu>;
def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIAlu>;
+}
- def LWF : LoadFM<0x32, "lw ", load>;
- def LWFI : LoadFMI<0x32, "lwi ", load>;
+let Predicates=[HasFPU], isCodeGenOnly=1 in {
+ def LWF : LoadFM<0x32, "lw ", load>;
+ def LWFI : LoadFMI<0x3A, "lwi ", load>;
- def SWF : StoreFM<0x32, "sw ", store>;
- def SWFI : StoreFMI<0x32, "swi ", store>;
+ def SWF : StoreFM<0x36, "sw ", store>;
+ def SWFI : StoreFMI<0x3E, "swi ", store>;
}
let Predicates=[HasFPU,HasSqrt] in {
def FLT : ArithIF<0x16, 0x280, "flt ", IIAlu>;
def FINT : ArithFI<0x16, 0x300, "fint ", IIAlu>;
- def FSQRT : ArithF2<0x16, 0x300, "fsqrt ", IIAlu>;
+ def FSQRT : ArithF2<0x16, 0x380, "fsqrt ", IIAlu>;
}
let isAsCheapAsAMove = 1 in {
@@ -126,98 +127,98 @@ let isAsCheapAsAMove = 1 in {
let usesCustomInserter = 1 in {
- def Select_FCC : MBlazePseudo<(outs FGR32:$dst),
- (ins FGR32:$T, FGR32:$F, CPURegs:$CMP, i32imm:$CC),
+ def Select_FCC : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC),
"; SELECT_FCC PSEUDO!",
[]>;
}
// Floating point conversions
let Predicates=[HasFPU] in {
- def : Pat<(sint_to_fp CPURegs:$V), (FLT CPURegs:$V)>;
- def : Pat<(fp_to_sint FGR32:$V), (FINT FGR32:$V)>;
- def : Pat<(fsqrt FGR32:$V), (FSQRT FGR32:$V)>;
+ def : Pat<(sint_to_fp GPR:$V), (FLT GPR:$V)>;
+ def : Pat<(fp_to_sint GPR:$V), (FINT GPR:$V)>;
+ def : Pat<(fsqrt GPR:$V), (FSQRT GPR:$V)>;
}
// SET_CC operations
let Predicates=[HasFPU] in {
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETEQ),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_EQ FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETNE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_EQ FGR32:$L, FGR32:$R), 1)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETOEQ),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_EQ FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (XOR (FCMP_UN FGR32:$L, FGR32:$R),
- (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (OR (FCMP_UN FGR32:$L, FGR32:$R),
- (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETGT),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_GT FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETLT),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_LT FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETGE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_GE FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETLE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_LE FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGT),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_GT FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLT),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_LT FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_GE FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_LE FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETUEQ),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (OR (FCMP_UN FGR32:$L, FGR32:$R),
- (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETUNE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_NE FGR32:$L, FGR32:$R), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGT),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (OR (FCMP_UN FGR32:$L, FGR32:$R),
- (FCMP_GT FGR32:$L, FGR32:$R)), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETULT),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (OR (FCMP_UN FGR32:$L, FGR32:$R),
- (FCMP_LT FGR32:$L, FGR32:$R)), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (OR (FCMP_UN FGR32:$L, FGR32:$R),
- (FCMP_GE FGR32:$L, FGR32:$R)), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETULE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (OR (FCMP_UN FGR32:$L, FGR32:$R),
- (FCMP_LE FGR32:$L, FGR32:$R)), 2)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETO),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_UN FGR32:$L, FGR32:$R), 1)>;
- def : Pat<(setcc FGR32:$L, FGR32:$R, SETUO),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (FCMP_UN FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_EQ GPR:$L, GPR:$R), 1)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (XOR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LT GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_GE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_LE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_NE GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_GT GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_LT GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_GE GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (OR (FCMP_UN GPR:$L, GPR:$R),
+ (FCMP_LE GPR:$L, GPR:$R)), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETO),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_UN GPR:$L, GPR:$R), 1)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUO),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (FCMP_UN GPR:$L, GPR:$R), 2)>;
}
// SELECT operations
-def : Pat<(select CPURegs:$C, FGR32:$T, FGR32:$F),
- (Select_FCC FGR32:$T, FGR32:$F, CPURegs:$C, 2)>;
+def : Pat<(select (i32 GPR:$C), (f32 GPR:$T), (f32 GPR:$F)),
+ (Select_FCC GPR:$T, GPR:$F, GPR:$C, 2)>;
//===----------------------------------------------------------------------===//
// Patterns for Floating Point Instructions
//===----------------------------------------------------------------------===//
-def : Pat<(f32 fpimm:$imm), (FORI F0, fpimm:$imm)>;
+def : Pat<(f32 fpimm:$imm), (FORI (i32 R0), fpimm:$imm)>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td
index 5158411..3209845 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -10,144 +10,220 @@
//===----------------------------------------------------------------------===//
// FSL Instruction Formats
//===----------------------------------------------------------------------===//
-class FSLGetD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
- TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b),
- !strconcat(instr_asm, " $dst, $b"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b))], IIAlu>;
-
-class FSLGet<bits<6> op, string instr_asm, Intrinsic OpNode> :
- TAI<op, (outs CPURegs:$dst), (ins fslimm:$b),
- !strconcat(instr_asm, " $dst, $b"),
- [(set CPURegs:$dst, (OpNode immZExt4:$b))], IIAlu>;
-
-class FSLPutD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
- TA<op, flags, (outs), (ins CPURegs:$v, CPURegs:$b),
- !strconcat(instr_asm, " $v, $b"),
- [(OpNode CPURegs:$v, CPURegs:$b)], IIAlu>;
-
-class FSLPut<bits<6> op, string instr_asm, Intrinsic OpNode> :
- TAI<op, (outs), (ins CPURegs:$v, fslimm:$b),
- !strconcat(instr_asm, " $v, $b"),
- [(OpNode CPURegs:$v, immZExt4:$b)], IIAlu>;
-
-class FSLPutTD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
- TA<op, flags, (outs), (ins CPURegs:$b),
- !strconcat(instr_asm, " $b"),
- [(OpNode CPURegs:$b)], IIAlu>;
-
-class FSLPutT<bits<6> op, string instr_asm, Intrinsic OpNode> :
- TAI<op, (outs), (ins fslimm:$b),
- !strconcat(instr_asm, " $b"),
- [(OpNode immZExt4:$b)], IIAlu>;
+class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FRCX, (outs GPR:$dst), (ins fslimm:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [(set GPR:$dst, (OpNode immZExt4:$b))],IIAlu>
+{
+ bits<5> rd;
+ bits<4> fslno;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = 0x0;
+ let Inst{16} = 0x0;
+ let Inst{17-21} = flags; // NCTAE
+ let Inst{22-27} = 0x0;
+ let Inst{28-31} = fslno;
+}
+
+class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FRCR, (outs GPR:$dst), (ins GPR:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [(set GPR:$dst, (OpNode GPR:$b))], IIAlu>
+{
+ bits<5> rd;
+ bits<5> rb;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = 0x0;
+ let Inst{16-20} = rb;
+ let Inst{21} = 0x0;
+ let Inst{22-26} = flags; // NCTAE
+ let Inst{27-31} = 0x0;
+}
+
+class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCRCX, (outs), (ins GPR:$v, fslimm:$b),
+ !strconcat(instr_asm, " $v, $b"),
+ [(OpNode GPR:$v, immZExt4:$b)], IIAlu>
+{
+ bits<5> ra;
+ bits<4> fslno;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = ra;
+ let Inst{16} = 0x1;
+ let Inst{17-20} = flags; // NCTA
+ let Inst{21-27} = 0x0;
+ let Inst{28-31} = fslno;
+}
+
+class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCRR, (outs), (ins GPR:$v, GPR:$b),
+ !strconcat(instr_asm, " $v, $b"),
+ [(OpNode GPR:$v, GPR:$b)], IIAlu>
+{
+ bits<5> ra;
+ bits<5> rb;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = rb;
+ let Inst{21} = 0x1;
+ let Inst{22-25} = flags; // NCTA
+ let Inst{26-31} = 0x0;
+}
+
+class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCX, (outs), (ins fslimm:$b),
+ !strconcat(instr_asm, " $b"),
+ [(OpNode immZExt4:$b)], IIAlu>
+{
+ bits<4> fslno;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = 0x0;
+ let Inst{16} = 0x1;
+ let Inst{17-20} = flags; // NCTA
+ let Inst{21-27} = 0x0;
+ let Inst{28-31} = fslno;
+}
+
+class FSLPutTD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+ MBlazeInst<op, FCR, (outs), (ins GPR:$b),
+ !strconcat(instr_asm, " $b"),
+ [(OpNode GPR:$b)], IIAlu>
+{
+ bits<5> rb;
+
+ let Inst{6-10} = 0x0;
+ let Inst{11-15} = 0x0;
+ let Inst{16-20} = rb;
+ let Inst{21} = 0x1;
+ let Inst{22-25} = flags; // NCTA
+ let Inst{26-31} = 0x0;
+}
//===----------------------------------------------------------------------===//
// FSL Get Instructions
//===----------------------------------------------------------------------===//
-def GET : FSLGet<0x1B, "get ", int_mblaze_fsl_get>;
-def AGET : FSLGet<0x1B, "aget ", int_mblaze_fsl_aget>;
-def CGET : FSLGet<0x1B, "cget ", int_mblaze_fsl_cget>;
-def CAGET : FSLGet<0x1B, "caget ", int_mblaze_fsl_caget>;
-def EGET : FSLGet<0x1B, "eget ", int_mblaze_fsl_eget>;
-def EAGET : FSLGet<0x1B, "eaget ", int_mblaze_fsl_eaget>;
-def ECGET : FSLGet<0x1B, "ecget ", int_mblaze_fsl_ecget>;
-def ECAGET : FSLGet<0x1B, "ecaget ", int_mblaze_fsl_ecaget>;
-def NGET : FSLGet<0x1B, "nget ", int_mblaze_fsl_nget>;
-def NAGET : FSLGet<0x1B, "naget ", int_mblaze_fsl_naget>;
-def NCGET : FSLGet<0x1B, "ncget ", int_mblaze_fsl_ncget>;
-def NCAGET : FSLGet<0x1B, "ncaget ", int_mblaze_fsl_ncaget>;
-def NEGET : FSLGet<0x1B, "neget ", int_mblaze_fsl_neget>;
-def NEAGET : FSLGet<0x1B, "neaget ", int_mblaze_fsl_neaget>;
-def NECGET : FSLGet<0x1B, "necget ", int_mblaze_fsl_necget>;
-def NECAGET : FSLGet<0x1B, "necaget ", int_mblaze_fsl_necaget>;
-def TGET : FSLGet<0x1B, "tget ", int_mblaze_fsl_tget>;
-def TAGET : FSLGet<0x1B, "taget ", int_mblaze_fsl_taget>;
-def TCGET : FSLGet<0x1B, "tcget ", int_mblaze_fsl_tcget>;
-def TCAGET : FSLGet<0x1B, "tcaget ", int_mblaze_fsl_tcaget>;
-def TEGET : FSLGet<0x1B, "teget ", int_mblaze_fsl_teget>;
-def TEAGET : FSLGet<0x1B, "teaget ", int_mblaze_fsl_teaget>;
-def TECGET : FSLGet<0x1B, "tecget ", int_mblaze_fsl_tecget>;
-def TECAGET : FSLGet<0x1B, "tecaget ", int_mblaze_fsl_tecaget>;
-def TNGET : FSLGet<0x1B, "tnget ", int_mblaze_fsl_tnget>;
-def TNAGET : FSLGet<0x1B, "tnaget ", int_mblaze_fsl_tnaget>;
-def TNCGET : FSLGet<0x1B, "tncget ", int_mblaze_fsl_tncget>;
-def TNCAGET : FSLGet<0x1B, "tncaget ", int_mblaze_fsl_tncaget>;
-def TNEGET : FSLGet<0x1B, "tneget ", int_mblaze_fsl_tneget>;
-def TNEAGET : FSLGet<0x1B, "tneaget ", int_mblaze_fsl_tneaget>;
-def TNECGET : FSLGet<0x1B, "tnecget ", int_mblaze_fsl_tnecget>;
-def TNECAGET : FSLGet<0x1B, "tnecaget ", int_mblaze_fsl_tnecaget>;
+def GET : FSLGet<0x1B, 0x00, "get ", int_mblaze_fsl_get>;
+def AGET : FSLGet<0x1B, 0x02, "aget ", int_mblaze_fsl_aget>;
+def CGET : FSLGet<0x1B, 0x08, "cget ", int_mblaze_fsl_cget>;
+def CAGET : FSLGet<0x1B, 0x0A, "caget ", int_mblaze_fsl_caget>;
+def EGET : FSLGet<0x1B, 0x01, "eget ", int_mblaze_fsl_eget>;
+def EAGET : FSLGet<0x1B, 0x03, "eaget ", int_mblaze_fsl_eaget>;
+def ECGET : FSLGet<0x1B, 0x09, "ecget ", int_mblaze_fsl_ecget>;
+def ECAGET : FSLGet<0x1B, 0x0B, "ecaget ", int_mblaze_fsl_ecaget>;
+def TGET : FSLGet<0x1B, 0x04, "tget ", int_mblaze_fsl_tget>;
+def TAGET : FSLGet<0x1B, 0x06, "taget ", int_mblaze_fsl_taget>;
+def TCGET : FSLGet<0x1B, 0x0C, "tcget ", int_mblaze_fsl_tcget>;
+def TCAGET : FSLGet<0x1B, 0x0E, "tcaget ", int_mblaze_fsl_tcaget>;
+def TEGET : FSLGet<0x1B, 0x05, "teget ", int_mblaze_fsl_teget>;
+def TEAGET : FSLGet<0x1B, 0x07, "teaget ", int_mblaze_fsl_teaget>;
+def TECGET : FSLGet<0x1B, 0x0D, "tecget ", int_mblaze_fsl_tecget>;
+def TECAGET : FSLGet<0x1B, 0x0F, "tecaget ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+ def NGET : FSLGet<0x1B, 0x10, "nget ", int_mblaze_fsl_nget>;
+ def NAGET : FSLGet<0x1B, 0x12, "naget ", int_mblaze_fsl_naget>;
+ def NCGET : FSLGet<0x1B, 0x18, "ncget ", int_mblaze_fsl_ncget>;
+ def NCAGET : FSLGet<0x1B, 0x1A, "ncaget ", int_mblaze_fsl_ncaget>;
+ def NEGET : FSLGet<0x1B, 0x11, "neget ", int_mblaze_fsl_neget>;
+ def NEAGET : FSLGet<0x1B, 0x13, "neaget ", int_mblaze_fsl_neaget>;
+ def NECGET : FSLGet<0x1B, 0x19, "necget ", int_mblaze_fsl_necget>;
+ def NECAGET : FSLGet<0x1B, 0x1B, "necaget ", int_mblaze_fsl_necaget>;
+ def TNGET : FSLGet<0x1B, 0x14, "tnget ", int_mblaze_fsl_tnget>;
+ def TNAGET : FSLGet<0x1B, 0x16, "tnaget ", int_mblaze_fsl_tnaget>;
+ def TNCGET : FSLGet<0x1B, 0x1C, "tncget ", int_mblaze_fsl_tncget>;
+ def TNCAGET : FSLGet<0x1B, 0x1E, "tncaget ", int_mblaze_fsl_tncaget>;
+ def TNEGET : FSLGet<0x1B, 0x15, "tneget ", int_mblaze_fsl_tneget>;
+ def TNEAGET : FSLGet<0x1B, 0x17, "tneaget ", int_mblaze_fsl_tneaget>;
+ def TNECGET : FSLGet<0x1B, 0x1D, "tnecget ", int_mblaze_fsl_tnecget>;
+ def TNECAGET : FSLGet<0x1B, 0x1F, "tnecaget ", int_mblaze_fsl_tnecaget>;
+}
//===----------------------------------------------------------------------===//
// FSL Dynamic Get Instructions
//===----------------------------------------------------------------------===//
-def GETD : FSLGetD<0x1B, 0x00, "getd ", int_mblaze_fsl_get>;
-def AGETD : FSLGetD<0x1B, 0x00, "agetd ", int_mblaze_fsl_aget>;
-def CGETD : FSLGetD<0x1B, 0x00, "cgetd ", int_mblaze_fsl_cget>;
-def CAGETD : FSLGetD<0x1B, 0x00, "cagetd ", int_mblaze_fsl_caget>;
-def EGETD : FSLGetD<0x1B, 0x00, "egetd ", int_mblaze_fsl_eget>;
-def EAGETD : FSLGetD<0x1B, 0x00, "eagetd ", int_mblaze_fsl_eaget>;
-def ECGETD : FSLGetD<0x1B, 0x00, "ecgetd ", int_mblaze_fsl_ecget>;
-def ECAGETD : FSLGetD<0x1B, 0x00, "ecagetd ", int_mblaze_fsl_ecaget>;
-def NGETD : FSLGetD<0x1B, 0x00, "ngetd ", int_mblaze_fsl_nget>;
-def NAGETD : FSLGetD<0x1B, 0x00, "nagetd ", int_mblaze_fsl_naget>;
-def NCGETD : FSLGetD<0x1B, 0x00, "ncgetd ", int_mblaze_fsl_ncget>;
-def NCAGETD : FSLGetD<0x1B, 0x00, "ncagetd ", int_mblaze_fsl_ncaget>;
-def NEGETD : FSLGetD<0x1B, 0x00, "negetd ", int_mblaze_fsl_neget>;
-def NEAGETD : FSLGetD<0x1B, 0x00, "neagetd ", int_mblaze_fsl_neaget>;
-def NECGETD : FSLGetD<0x1B, 0x00, "necgetd ", int_mblaze_fsl_necget>;
-def NECAGETD : FSLGetD<0x1B, 0x00, "necagetd ", int_mblaze_fsl_necaget>;
-def TGETD : FSLGetD<0x1B, 0x00, "tgetd ", int_mblaze_fsl_tget>;
-def TAGETD : FSLGetD<0x1B, 0x00, "tagetd ", int_mblaze_fsl_taget>;
-def TCGETD : FSLGetD<0x1B, 0x00, "tcgetd ", int_mblaze_fsl_tcget>;
-def TCAGETD : FSLGetD<0x1B, 0x00, "tcagetd ", int_mblaze_fsl_tcaget>;
-def TEGETD : FSLGetD<0x1B, 0x00, "tegetd ", int_mblaze_fsl_teget>;
-def TEAGETD : FSLGetD<0x1B, 0x00, "teagetd ", int_mblaze_fsl_teaget>;
-def TECGETD : FSLGetD<0x1B, 0x00, "tecgetd ", int_mblaze_fsl_tecget>;
-def TECAGETD : FSLGetD<0x1B, 0x00, "tecagetd ", int_mblaze_fsl_tecaget>;
-def TNGETD : FSLGetD<0x1B, 0x00, "tngetd ", int_mblaze_fsl_tnget>;
-def TNAGETD : FSLGetD<0x1B, 0x00, "tnagetd ", int_mblaze_fsl_tnaget>;
-def TNCGETD : FSLGetD<0x1B, 0x00, "tncgetd ", int_mblaze_fsl_tncget>;
-def TNCAGETD : FSLGetD<0x1B, 0x00, "tncagetd ", int_mblaze_fsl_tncaget>;
-def TNEGETD : FSLGetD<0x1B, 0x00, "tnegetd ", int_mblaze_fsl_tneget>;
-def TNEAGETD : FSLGetD<0x1B, 0x00, "tneagetd ", int_mblaze_fsl_tneaget>;
-def TNECGETD : FSLGetD<0x1B, 0x00, "tnecgetd ", int_mblaze_fsl_tnecget>;
-def TNECAGETD : FSLGetD<0x1B, 0x00, "tnecagetd", int_mblaze_fsl_tnecaget>;
+def GETD : FSLGetD<0x13, 0x00, "getd ", int_mblaze_fsl_get>;
+def AGETD : FSLGetD<0x13, 0x02, "agetd ", int_mblaze_fsl_aget>;
+def CGETD : FSLGetD<0x13, 0x08, "cgetd ", int_mblaze_fsl_cget>;
+def CAGETD : FSLGetD<0x13, 0x0A, "cagetd ", int_mblaze_fsl_caget>;
+def EGETD : FSLGetD<0x13, 0x01, "egetd ", int_mblaze_fsl_eget>;
+def EAGETD : FSLGetD<0x13, 0x03, "eagetd ", int_mblaze_fsl_eaget>;
+def ECGETD : FSLGetD<0x13, 0x09, "ecgetd ", int_mblaze_fsl_ecget>;
+def ECAGETD : FSLGetD<0x13, 0x0B, "ecagetd ", int_mblaze_fsl_ecaget>;
+def TGETD : FSLGetD<0x13, 0x04, "tgetd ", int_mblaze_fsl_tget>;
+def TAGETD : FSLGetD<0x13, 0x06, "tagetd ", int_mblaze_fsl_taget>;
+def TCGETD : FSLGetD<0x13, 0x0C, "tcgetd ", int_mblaze_fsl_tcget>;
+def TCAGETD : FSLGetD<0x13, 0x0E, "tcagetd ", int_mblaze_fsl_tcaget>;
+def TEGETD : FSLGetD<0x13, 0x05, "tegetd ", int_mblaze_fsl_teget>;
+def TEAGETD : FSLGetD<0x13, 0x07, "teagetd ", int_mblaze_fsl_teaget>;
+def TECGETD : FSLGetD<0x13, 0x0D, "tecgetd ", int_mblaze_fsl_tecget>;
+def TECAGETD : FSLGetD<0x13, 0x0F, "tecagetd ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+ def NGETD : FSLGetD<0x13, 0x10, "ngetd ", int_mblaze_fsl_nget>;
+ def NAGETD : FSLGetD<0x13, 0x12, "nagetd ", int_mblaze_fsl_naget>;
+ def NCGETD : FSLGetD<0x13, 0x18, "ncgetd ", int_mblaze_fsl_ncget>;
+ def NCAGETD : FSLGetD<0x13, 0x1A, "ncagetd ", int_mblaze_fsl_ncaget>;
+ def NEGETD : FSLGetD<0x13, 0x11, "negetd ", int_mblaze_fsl_neget>;
+ def NEAGETD : FSLGetD<0x13, 0x13, "neagetd ", int_mblaze_fsl_neaget>;
+ def NECGETD : FSLGetD<0x13, 0x19, "necgetd ", int_mblaze_fsl_necget>;
+ def NECAGETD : FSLGetD<0x13, 0x1B, "necagetd ", int_mblaze_fsl_necaget>;
+ def TNGETD : FSLGetD<0x13, 0x14, "tngetd ", int_mblaze_fsl_tnget>;
+ def TNAGETD : FSLGetD<0x13, 0x16, "tnagetd ", int_mblaze_fsl_tnaget>;
+ def TNCGETD : FSLGetD<0x13, 0x1C, "tncgetd ", int_mblaze_fsl_tncget>;
+ def TNCAGETD : FSLGetD<0x13, 0x1E, "tncagetd ", int_mblaze_fsl_tncaget>;
+ def TNEGETD : FSLGetD<0x13, 0x15, "tnegetd ", int_mblaze_fsl_tneget>;
+ def TNEAGETD : FSLGetD<0x13, 0x17, "tneagetd ", int_mblaze_fsl_tneaget>;
+ def TNECGETD : FSLGetD<0x13, 0x1D, "tnecgetd ", int_mblaze_fsl_tnecget>;
+ def TNECAGETD : FSLGetD<0x13, 0x1F, "tnecagetd", int_mblaze_fsl_tnecaget>;
+}
//===----------------------------------------------------------------------===//
// FSL Put Instructions
//===----------------------------------------------------------------------===//
-def PUT : FSLPut<0x1B, "put ", int_mblaze_fsl_put>;
-def APUT : FSLPut<0x1B, "aput ", int_mblaze_fsl_aput>;
-def CPUT : FSLPut<0x1B, "cput ", int_mblaze_fsl_cput>;
-def CAPUT : FSLPut<0x1B, "caput ", int_mblaze_fsl_caput>;
-def NPUT : FSLPut<0x1B, "nput ", int_mblaze_fsl_nput>;
-def NAPUT : FSLPut<0x1B, "naput ", int_mblaze_fsl_naput>;
-def NCPUT : FSLPut<0x1B, "ncput ", int_mblaze_fsl_ncput>;
-def NCAPUT : FSLPut<0x1B, "ncaput ", int_mblaze_fsl_ncaput>;
-def TPUT : FSLPutT<0x1B, "tput ", int_mblaze_fsl_tput>;
-def TAPUT : FSLPutT<0x1B, "taput ", int_mblaze_fsl_taput>;
-def TCPUT : FSLPutT<0x1B, "tcput ", int_mblaze_fsl_tcput>;
-def TCAPUT : FSLPutT<0x1B, "tcaput ", int_mblaze_fsl_tcaput>;
-def TNPUT : FSLPutT<0x1B, "tnput ", int_mblaze_fsl_tnput>;
-def TNAPUT : FSLPutT<0x1B, "tnaput ", int_mblaze_fsl_tnaput>;
-def TNCPUT : FSLPutT<0x1B, "tncput ", int_mblaze_fsl_tncput>;
-def TNCAPUT : FSLPutT<0x1B, "tncaput ", int_mblaze_fsl_tncaput>;
+def PUT : FSLPut<0x1B, 0x0, "put ", int_mblaze_fsl_put>;
+def APUT : FSLPut<0x1B, 0x1, "aput ", int_mblaze_fsl_aput>;
+def CPUT : FSLPut<0x1B, 0x4, "cput ", int_mblaze_fsl_cput>;
+def CAPUT : FSLPut<0x1B, 0x5, "caput ", int_mblaze_fsl_caput>;
+def TPUT : FSLPutT<0x1B, 0x2, "tput ", int_mblaze_fsl_tput>;
+def TAPUT : FSLPutT<0x1B, 0x3, "taput ", int_mblaze_fsl_taput>;
+def TCPUT : FSLPutT<0x1B, 0x6, "tcput ", int_mblaze_fsl_tcput>;
+def TCAPUT : FSLPutT<0x1B, 0x7, "tcaput ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+ def NPUT : FSLPut<0x1B, 0x8, "nput ", int_mblaze_fsl_nput>;
+ def NAPUT : FSLPut<0x1B, 0x9, "naput ", int_mblaze_fsl_naput>;
+ def NCPUT : FSLPut<0x1B, 0xC, "ncput ", int_mblaze_fsl_ncput>;
+ def NCAPUT : FSLPut<0x1B, 0xD, "ncaput ", int_mblaze_fsl_ncaput>;
+ def TNPUT : FSLPutT<0x1B, 0xA, "tnput ", int_mblaze_fsl_tnput>;
+ def TNAPUT : FSLPutT<0x1B, 0xB, "tnaput ", int_mblaze_fsl_tnaput>;
+ def TNCPUT : FSLPutT<0x1B, 0xE, "tncput ", int_mblaze_fsl_tncput>;
+ def TNCAPUT : FSLPutT<0x1B, 0xF, "tncaput ", int_mblaze_fsl_tncaput>;
+}
//===----------------------------------------------------------------------===//
// FSL Dynamic Put Instructions
//===----------------------------------------------------------------------===//
-def PUTD : FSLPutD<0x1B, 0x00, "putd ", int_mblaze_fsl_put>;
-def APUTD : FSLPutD<0x1B, 0x00, "aputd ", int_mblaze_fsl_aput>;
-def CPUTD : FSLPutD<0x1B, 0x00, "cputd ", int_mblaze_fsl_cput>;
-def CAPUTD : FSLPutD<0x1B, 0x00, "caputd ", int_mblaze_fsl_caput>;
-def NPUTD : FSLPutD<0x1B, 0x00, "nputd ", int_mblaze_fsl_nput>;
-def NAPUTD : FSLPutD<0x1B, 0x00, "naputd ", int_mblaze_fsl_naput>;
-def NCPUTD : FSLPutD<0x1B, 0x00, "ncputd ", int_mblaze_fsl_ncput>;
-def NCAPUTD : FSLPutD<0x1B, 0x00, "ncaputd ", int_mblaze_fsl_ncaput>;
-def TPUTD : FSLPutTD<0x1B, 0x00, "tputd ", int_mblaze_fsl_tput>;
-def TAPUTD : FSLPutTD<0x1B, 0x00, "taputd ", int_mblaze_fsl_taput>;
-def TCPUTD : FSLPutTD<0x1B, 0x00, "tcputd ", int_mblaze_fsl_tcput>;
-def TCAPUTD : FSLPutTD<0x1B, 0x00, "tcaputd ", int_mblaze_fsl_tcaput>;
-def TNPUTD : FSLPutTD<0x1B, 0x00, "tnputd ", int_mblaze_fsl_tnput>;
-def TNAPUTD : FSLPutTD<0x1B, 0x00, "tnaputd ", int_mblaze_fsl_tnaput>;
-def TNCPUTD : FSLPutTD<0x1B, 0x00, "tncputd ", int_mblaze_fsl_tncput>;
-def TNCAPUTD : FSLPutTD<0x1B, 0x00, "tncaputd ", int_mblaze_fsl_tncaput>;
+def PUTD : FSLPutD<0x13, 0x0, "putd ", int_mblaze_fsl_put>;
+def APUTD : FSLPutD<0x13, 0x1, "aputd ", int_mblaze_fsl_aput>;
+def CPUTD : FSLPutD<0x13, 0x4, "cputd ", int_mblaze_fsl_cput>;
+def CAPUTD : FSLPutD<0x13, 0x5, "caputd ", int_mblaze_fsl_caput>;
+def TPUTD : FSLPutTD<0x13, 0x2, "tputd ", int_mblaze_fsl_tput>;
+def TAPUTD : FSLPutTD<0x13, 0x3, "taputd ", int_mblaze_fsl_taput>;
+def TCPUTD : FSLPutTD<0x13, 0x6, "tcputd ", int_mblaze_fsl_tcput>;
+def TCAPUTD : FSLPutTD<0x13, 0x7, "tcaputd ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+ def NPUTD : FSLPutD<0x13, 0x8, "nputd ", int_mblaze_fsl_nput>;
+ def NAPUTD : FSLPutD<0x13, 0x9, "naputd ", int_mblaze_fsl_naput>;
+ def NCPUTD : FSLPutD<0x13, 0xC, "ncputd ", int_mblaze_fsl_ncput>;
+ def NCAPUTD : FSLPutD<0x13, 0xD, "ncaputd ", int_mblaze_fsl_ncaput>;
+ def TNPUTD : FSLPutTD<0x13, 0xA, "tnputd ", int_mblaze_fsl_tnput>;
+ def TNAPUTD : FSLPutTD<0x13, 0xB, "tnaputd ", int_mblaze_fsl_tnaput>;
+ def TNCPUTD : FSLPutTD<0x13, 0xE, "tncputd ", int_mblaze_fsl_tncput>;
+ def TNCAPUTD : FSLPutTD<0x13, 0xF, "tncaputd ", int_mblaze_fsl_tncaput>;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td
index 28e8e44..d62574d 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -7,6 +7,35 @@
//
//===----------------------------------------------------------------------===//
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def FPseudo : Format<0>;
+def FRRR : Format<1>; // ADD, OR, etc.
+def FRRI : Format<2>; // ADDI, ORI, etc.
+def FCRR : Format<3>; // PUTD, WDC, WIC, BEQ, BNE, BGE, etc.
+def FCRI : Format<4>; // RTID, RTED, RTSD, BEQI, BNEI, BGEI, etc.
+def FRCR : Format<5>; // BRLD, BRALD, GETD
+def FRCI : Format<6>; // BRLID, BRALID, MSRCLR, MSRSET
+def FCCR : Format<7>; // BR, BRA, BRD, etc.
+def FCCI : Format<8>; // IMM, BRI, BRAI, BRID, etc.
+def FRRCI : Format<9>; // BSRLI, BSRAI, BSLLI
+def FRRC : Format<10>; // SEXT8, SEXT16, SRA, SRC, SRL, FLT, FINT, FSQRT
+def FRCX : Format<11>; // GET
+def FRCS : Format<12>; // MFS
+def FCRCS : Format<13>; // MTS
+def FCRCX : Format<14>; // PUT
+def FCX : Format<15>; // TPUT
+def FCR : Format<16>; // TPUTD
+def FRIR : Format<17>; // RSUBI
+def FRRRR : Format<18>; // RSUB, FRSUB
+def FRI : Format<19>; // RSUB, FRSUB
+def FC : Format<20>; // NOP
+
//===----------------------------------------------------------------------===//
// Describe MBlaze instructions format
//
@@ -21,226 +50,155 @@
//===----------------------------------------------------------------------===//
// Generic MBlaze Format
-class MBlazeInst<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin> : Instruction
-{
- field bits<32> Inst;
-
+class MBlazeInst<bits<6> op, Format form, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> : Instruction {
let Namespace = "MBlaze";
+ field bits<32> Inst;
- bits<6> opcode;
+ bits<6> opcode = op;
+ Format Form = form;
+ bits<6> FormBits = Form.Value;
// Top 6 bits are the 'opcode' field
- let Inst{0-5} = opcode;
-
+ let Inst{0-5} = opcode;
+
+ // If the instruction is marked as a pseudo, set isCodeGenOnly so that the
+ // assembler and disassmbler ignore it.
+ let isCodeGenOnly = !eq(!cast<string>(form), "FPseudo");
+
dag OutOperandList = outs;
dag InOperandList = ins;
let AsmString = asmstr;
let Pattern = pattern;
let Itinerary = itin;
+
+ // TSFlags layout should be kept in sync with MBlazeInstrInfo.h.
+ let TSFlags{5-0} = FormBits;
}
//===----------------------------------------------------------------------===//
// Pseudo instruction class
//===----------------------------------------------------------------------===//
class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MBlazeInst<outs, ins, asmstr, pattern, IIPseudo>;
+ MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIPseudo>;
//===----------------------------------------------------------------------===//
// Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
//===----------------------------------------------------------------------===//
class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
- list<dag> pattern, InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op,FRRR,outs, ins, asmstr, pattern, itin>
{
bits<5> rd;
bits<5> ra;
bits<5> rb;
- let opcode = op;
-
let Inst{6-10} = rd;
- let Inst{11-15} = ra;
+ let Inst{11-15} = ra;
let Inst{16-20} = rb;
let Inst{21-31} = flags;
}
-class TAI<bits<6> op, dag outs, dag ins, string asmstr,
- list<dag> pattern, InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
- bits<5> rd;
- bits<5> ra;
- bits<16> imm16;
-
- let opcode = op;
-
- let Inst{6-10} = rd;
- let Inst{11-15} = ra;
- let Inst{16-31} = imm16;
-}
-
-class TIMM<bits<6> op, dag outs, dag ins, string asmstr,
- list<dag> pattern, InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
- bits<5> ra;
- bits<16> imm16;
-
- let opcode = op;
-
- let Inst{6-15} = 0;
- let Inst{16-31} = imm16;
-}
-
-class TADDR<bits<6> op, dag outs, dag ins, string asmstr,
- list<dag> pattern, InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
- bits<26> addr;
-
- let opcode = op;
-
- let Inst{6-31} = addr;
-}
-
//===----------------------------------------------------------------------===//
// Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|>
//===----------------------------------------------------------------------===//
class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
+ InstrItinClass itin> :
+ MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin>
{
bits<5> rd;
bits<5> ra;
bits<16> imm16;
- let opcode = op;
-
let Inst{6-10} = rd;
- let Inst{11-15} = ra;
+ let Inst{11-15} = ra;
let Inst{16-31} = imm16;
}
//===----------------------------------------------------------------------===//
-// Float instruction class in MBlaze : <|opcode|rd|ra|flags|>
+// Type A instruction class in MBlaze but with the operands reversed
+// in the LLVM DAG : <|opcode|rd|ra|rb|flags|>
//===----------------------------------------------------------------------===//
-class TF<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
- list<dag> pattern, InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
+class TAR<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ TA<op, flags, outs, ins, asmstr, pattern, itin>
{
- bits<5> rd;
- bits<5> ra;
+ bits<5> rrd;
+ bits<5> rrb;
+ bits<5> rra;
- let opcode = op;
+ let Form = FRRRR;
- let Inst{6-10} = rd;
- let Inst{11-15} = ra;
- let Inst{16-20} = 0;
- let Inst{21-31} = flags;
+ let rd = rrd;
+ let ra = rra;
+ let rb = rrb;
}
//===----------------------------------------------------------------------===//
-// Branch instruction class in MBlaze : <|opcode|rd|br|ra|flags|>
+// Type B instruction class in MBlaze but with the operands reversed in
+// the LLVM DAG : <|opcode|rd|ra|immediate|>
//===----------------------------------------------------------------------===//
-
-class TBR<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
- string asmstr, list<dag> pattern, InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
- bits<5> ra;
-
- let opcode = op;
-
- let Inst{6-10} = 0;
- let Inst{11-15} = br;
- let Inst{16-20} = ra;
- let Inst{21-31} = flags;
-}
-
-class TBRC<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
- string asmstr, list<dag> pattern, InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
- bits<5> ra;
- bits<5> rb;
-
- let opcode = op;
-
- let Inst{6-10} = br;
- let Inst{11-15} = ra;
- let Inst{16-20} = rb;
- let Inst{21-31} = flags;
-}
-
-class TBRL<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
- string asmstr, list<dag> pattern, InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
- bits<5> ra;
-
- let opcode = op;
-
- let Inst{6-10} = 0xF;
- let Inst{11-15} = br;
- let Inst{16-20} = ra;
- let Inst{21-31} = flags;
+class TBR<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin> :
+ TB<op, outs, ins, asmstr, pattern, itin> {
+ bits<5> rrd;
+ bits<16> rimm16;
+ bits<5> rra;
+
+ let Form = FRIR;
+
+ let rd = rrd;
+ let ra = rra;
+ let imm16 = rimm16;
}
-class TBRI<bits<6> op, bits<5> br, dag outs, dag ins,
- string asmstr, list<dag> pattern, InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
- bits<16> imm16;
-
- let opcode = op;
-
- let Inst{6-10} = 0;
- let Inst{11-15} = br;
- let Inst{16-31} = imm16;
-}
-
-class TBRLI<bits<6> op, bits<5> br, dag outs, dag ins,
- string asmstr, list<dag> pattern, InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
- bits<16> imm16;
-
- let opcode = op;
+//===----------------------------------------------------------------------===//
+// Shift immediate instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+class SHT<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<5> ra;
+ bits<5> imm5;
- let Inst{6-10} = 0xF;
- let Inst{11-15} = br;
- let Inst{16-31} = imm16;
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = 0x0;
+ let Inst{21-22} = flags;
+ let Inst{23-26} = 0x0;
+ let Inst{27-31} = imm5;
}
-class TBRCI<bits<6> op, bits<5> br, dag outs, dag ins,
- string asmstr, list<dag> pattern, InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
- bits<5> ra;
- bits<16> imm16;
-
- let opcode = op;
+//===----------------------------------------------------------------------===//
+// Special instruction class in MBlaze : <|opcode|rd|imm14|>
+//===----------------------------------------------------------------------===//
+class SPC<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<14> imm14;
- let Inst{6-10} = br;
- let Inst{11-15} = ra;
- let Inst{16-31} = imm16;
+ let Inst{6-10} = rd;
+ let Inst{11-15} = 0x0;
+ let Inst{16-17} = flags;
+ let Inst{18-31} = imm14;
}
-class TRET<bits<6> op, dag outs, dag ins,
- string asmstr, list<dag> pattern, InstrItinClass itin> :
- MBlazeInst<outs, ins, asmstr, pattern, itin>
-{
- bits<5> ra;
- bits<16> imm16;
-
- let opcode = op;
+//===----------------------------------------------------------------------===//
+// MSR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<15> imm15;
- let Inst{6-10} = 0x10;
- let Inst{11-15} = ra;
- let Inst{16-31} = imm16;
+ let Inst{6-10} = rd;
+ let Inst{11-16} = flags;
+ let Inst{17-31} = imm15;
}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index b590c09..b353dcd 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -38,10 +38,10 @@ static bool isZeroImm(const MachineOperand &op) {
unsigned MBlazeInstrInfo::
isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const {
if (MI->getOpcode() == MBlaze::LWI) {
- if ((MI->getOperand(2).isFI()) && // is a stack slot
- (MI->getOperand(1).isImm()) && // the imm is zero
- (isZeroImm(MI->getOperand(1)))) {
- FrameIndex = MI->getOperand(2).getIndex();
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
}
@@ -57,10 +57,10 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const {
unsigned MBlazeInstrInfo::
isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
if (MI->getOpcode() == MBlaze::SWI) {
- if ((MI->getOperand(2).isFI()) && // is a stack slot
- (MI->getOperand(1).isImm()) && // the imm is zero
- (isZeroImm(MI->getOperand(1)))) {
- FrameIndex = MI->getOperand(2).getIndex();
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2)))) {
+ FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
}
@@ -80,7 +80,7 @@ copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
- llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg)
+ llvm::BuildMI(MBB, I, DL, get(MBlaze::ADDK), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0);
}
@@ -91,7 +91,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
- .addImm(0).addFrameIndex(FI);
+ .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
}
void MBlazeInstrInfo::
@@ -101,21 +101,168 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg)
- .addImm(0).addFrameIndex(FI);
+ .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
}
//===----------------------------------------------------------------------===//
// Branch Analysis
//===----------------------------------------------------------------------===//
+bool MBlazeInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (MBlaze::isUncondBranchOpcode(LastOpc)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (MBlaze::isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with something like BEQID then BRID, handle it.
+ if (MBlaze::isCondBranchOpcode(SecondLastInst->getOpcode()) &&
+ MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it.
+ // The second one is not executed, so remove it.
+ if (MBlaze::isUncondBranchOpcode(SecondLastInst->getOpcode()) &&
+ MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
unsigned MBlazeInstrInfo::
InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const {
- // Can only insert uncond branches so far.
- assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
- BuildMI(&MBB, DL, get(MBlaze::BRI)).addMBB(TBB);
- return 1;
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "MBlaze branch conditions have two components!");
+
+ unsigned Opc = MBlaze::BRID;
+ if (!Cond.empty())
+ Opc = (unsigned)Cond[0].getImm();
+
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(MBlaze::BRID)).addMBB(FBB);
+ return 2;
+}
+
+unsigned MBlazeInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+
+ if (!MBlaze::isUncondBranchOpcode(I->getOpcode()) &&
+ !MBlaze::isCondBranchOpcode(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!MBlaze::isCondBranchOpcode(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid MBlaze branch opcode!");
+ switch (Cond[0].getImm()) {
+ default: return true;
+ case MBlaze::BEQ: Cond[0].setImm(MBlaze::BNE); return false;
+ case MBlaze::BNE: Cond[0].setImm(MBlaze::BEQ); return false;
+ case MBlaze::BGT: Cond[0].setImm(MBlaze::BLE); return false;
+ case MBlaze::BGE: Cond[0].setImm(MBlaze::BLT); return false;
+ case MBlaze::BLT: Cond[0].setImm(MBlaze::BGE); return false;
+ case MBlaze::BLE: Cond[0].setImm(MBlaze::BGT); return false;
+ case MBlaze::BEQI: Cond[0].setImm(MBlaze::BNEI); return false;
+ case MBlaze::BNEI: Cond[0].setImm(MBlaze::BEQI); return false;
+ case MBlaze::BGTI: Cond[0].setImm(MBlaze::BLEI); return false;
+ case MBlaze::BGEI: Cond[0].setImm(MBlaze::BLTI); return false;
+ case MBlaze::BLTI: Cond[0].setImm(MBlaze::BGEI); return false;
+ case MBlaze::BLEI: Cond[0].setImm(MBlaze::BGTI); return false;
+ case MBlaze::BEQD: Cond[0].setImm(MBlaze::BNED); return false;
+ case MBlaze::BNED: Cond[0].setImm(MBlaze::BEQD); return false;
+ case MBlaze::BGTD: Cond[0].setImm(MBlaze::BLED); return false;
+ case MBlaze::BGED: Cond[0].setImm(MBlaze::BLTD); return false;
+ case MBlaze::BLTD: Cond[0].setImm(MBlaze::BGED); return false;
+ case MBlaze::BLED: Cond[0].setImm(MBlaze::BGTD); return false;
+ case MBlaze::BEQID: Cond[0].setImm(MBlaze::BNEID); return false;
+ case MBlaze::BNEID: Cond[0].setImm(MBlaze::BEQID); return false;
+ case MBlaze::BGTID: Cond[0].setImm(MBlaze::BLEID); return false;
+ case MBlaze::BGEID: Cond[0].setImm(MBlaze::BLTID); return false;
+ case MBlaze::BLTID: Cond[0].setImm(MBlaze::BGEID); return false;
+ case MBlaze::BLEID: Cond[0].setImm(MBlaze::BGTID); return false;
+ }
}
/// getGlobalBaseReg - Return a virtual register initialized with the
@@ -134,7 +281,7 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
- GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::GPRRegisterClass);
BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
GlobalBaseReg).addReg(MBlaze::R20);
RegInfo.addLiveIn(MBlaze::R20);
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
index b3dba0e..b7300c1 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -73,59 +73,92 @@ namespace MBlaze {
FCOND_GT,
// Only integer conditions
- COND_E,
- COND_GZ,
- COND_GEZ,
- COND_LZ,
- COND_LEZ,
+ COND_EQ,
+ COND_GT,
+ COND_GE,
+ COND_LT,
+ COND_LE,
COND_NE,
COND_INVALID
};
// Turn condition code into conditional branch opcode.
- unsigned GetCondBranchFromCond(CondCode CC);
+ inline static unsigned GetCondBranchFromCond(CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case COND_EQ: return MBlaze::BEQID;
+ case COND_NE: return MBlaze::BNEID;
+ case COND_GT: return MBlaze::BGTID;
+ case COND_GE: return MBlaze::BGEID;
+ case COND_LT: return MBlaze::BLTID;
+ case COND_LE: return MBlaze::BLEID;
+ }
+ }
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
- CondCode GetOppositeBranchCondition(MBlaze::CondCode CC);
+ // CondCode GetOppositeBranchCondition(MBlaze::CondCode CC);
/// MBlazeCCToString - Map each FP condition code to its string
- inline static const char *MBlazeFCCToString(MBlaze::CondCode CC)
- {
+ inline static const char *MBlazeFCCToString(MBlaze::CondCode CC) {
switch (CC) {
- default: llvm_unreachable("Unknown condition code");
- case FCOND_F:
- case FCOND_T: return "f";
- case FCOND_UN:
- case FCOND_OR: return "un";
- case FCOND_EQ:
- case FCOND_NEQ: return "eq";
- case FCOND_UEQ:
- case FCOND_OGL: return "ueq";
- case FCOND_OLT:
- case FCOND_UGE: return "olt";
- case FCOND_ULT:
- case FCOND_OGE: return "ult";
- case FCOND_OLE:
- case FCOND_UGT: return "ole";
- case FCOND_ULE:
- case FCOND_OGT: return "ule";
- case FCOND_SF:
- case FCOND_ST: return "sf";
- case FCOND_NGLE:
- case FCOND_GLE: return "ngle";
- case FCOND_SEQ:
- case FCOND_SNE: return "seq";
- case FCOND_NGL:
- case FCOND_GL: return "ngl";
- case FCOND_LT:
- case FCOND_NLT: return "lt";
- case FCOND_NGE:
- case FCOND_GE: return "ge";
- case FCOND_LE:
- case FCOND_NLE: return "nle";
- case FCOND_NGT:
- case FCOND_GT: return "gt";
+ default: llvm_unreachable("Unknown condition code");
+ case FCOND_F:
+ case FCOND_T: return "f";
+ case FCOND_UN:
+ case FCOND_OR: return "un";
+ case FCOND_EQ:
+ case FCOND_NEQ: return "eq";
+ case FCOND_UEQ:
+ case FCOND_OGL: return "ueq";
+ case FCOND_OLT:
+ case FCOND_UGE: return "olt";
+ case FCOND_ULT:
+ case FCOND_OGE: return "ult";
+ case FCOND_OLE:
+ case FCOND_UGT: return "ole";
+ case FCOND_ULE:
+ case FCOND_OGT: return "ule";
+ case FCOND_SF:
+ case FCOND_ST: return "sf";
+ case FCOND_NGLE:
+ case FCOND_GLE: return "ngle";
+ case FCOND_SEQ:
+ case FCOND_SNE: return "seq";
+ case FCOND_NGL:
+ case FCOND_GL: return "ngl";
+ case FCOND_LT:
+ case FCOND_NLT: return "lt";
+ case FCOND_NGE:
+ case FCOND_GE: return "ge";
+ case FCOND_LE:
+ case FCOND_NLE: return "nle";
+ case FCOND_NGT:
+ case FCOND_GT: return "gt";
+ }
+ }
+
+ inline static bool isUncondBranchOpcode(int Opc) {
+ switch (Opc) {
+ default: return false;
+ case MBlaze::BRI:
+ case MBlaze::BRAI:
+ case MBlaze::BRID:
+ case MBlaze::BRAID:
+ return true;
+ }
+ }
+
+ inline static bool isCondBranchOpcode(int Opc) {
+ switch (Opc) {
+ default: return false;
+ case MBlaze::BEQI: case MBlaze::BEQID:
+ case MBlaze::BNEI: case MBlaze::BNEID:
+ case MBlaze::BGTI: case MBlaze::BGTID:
+ case MBlaze::BGEI: case MBlaze::BGEID:
+ case MBlaze::BLTI: case MBlaze::BLTID:
+ case MBlaze::BLEI: case MBlaze::BLEID:
+ return true;
}
}
}
@@ -134,29 +167,54 @@ namespace MBlaze {
/// instruction info tracks.
///
namespace MBlazeII {
- /// Target Operand Flag enum.
- enum TOF {
+ enum {
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ FPseudo = 0,
+ FRRR,
+ FRRI,
+ FCRR,
+ FCRI,
+ FRCR,
+ FRCI,
+ FCCR,
+ FCCI,
+ FRRCI,
+ FRRC,
+ FRCX,
+ FRCS,
+ FCRCS,
+ FCRCX,
+ FCX,
+ FCR,
+ FRIR,
+ FRRRR,
+ FRI,
+ FC,
+ FormMask = 63
+
//===------------------------------------------------------------------===//
// MBlaze Specific MachineOperand flags.
- MO_NO_FLAG,
+ // MO_NO_FLAG,
/// MO_GOT - Represents the offset into the global offset table at which
/// the address the relocation entry symbol resides during execution.
- MO_GOT,
+ // MO_GOT,
/// MO_GOT_CALL - Represents the offset into the global offset table at
/// which the address of a call site relocation entry symbol resides
/// during execution. This is different from the above since this flag
/// can only be present in call instructions.
- MO_GOT_CALL,
+ // MO_GOT_CALL,
/// MO_GPREL - Represents the offset from the current gp value to be used
/// for the relocatable object file being produced.
- MO_GPREL,
+ // MO_GPREL,
/// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
/// address.
- MO_ABS_HILO
+ // MO_ABS_HILO
};
}
@@ -190,10 +248,20 @@ public:
int &FrameIndex) const;
/// Branch Analysis
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const;
+
+
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
index e5d1534..7b8f70a 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -13,35 +13,36 @@
include "MBlazeInstrFormats.td"
//===----------------------------------------------------------------------===//
-// MBlaze profiles and nodes
+// MBlaze type profiles
//===----------------------------------------------------------------------===//
+
+// def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
def SDT_MBlazeRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_MBlazeJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def SDT_MBlazeIRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeJmpLink : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-// Call
-def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
- [SDNPHasChain,SDNPOptInFlag,SDNPOutFlag]>;
+//===----------------------------------------------------------------------===//
+// MBlaze specific nodes
+//===----------------------------------------------------------------------===//
-// Return
-def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
- [SDNPHasChain, SDNPOptInFlag]>;
+def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def MBlazeIRet : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet,
+ [SDNPHasChain, SDNPOptInGlue]>;
-// Hi and Lo nodes are used to handle global addresses. Used on
-// MBlazeISelLowering to lower stuff like GlobalAddress, ExternalSymbol
-// static model.
-def MBWrapper : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
-def MBlazeGPRel : SDNode<"MBlazeISD::GPRel", SDTIntUnaryOp>;
+def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
+ [SDNPHasChain,SDNPOptInGlue,SDNPOutGlue,
+ SDNPVariadic]>;
-def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
-def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def MBWrapper : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
-// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
-def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
-def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
//===----------------------------------------------------------------------===//
// MBlaze Instruction Predicate Definitions.
@@ -67,11 +68,22 @@ def HasMMU : Predicate<"Subtarget.hasMMU()">;
// MBlaze Operand, Complex Patterns and Transformations Definitions.
//===----------------------------------------------------------------------===//
+def MBlazeMemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
+ let SuperClasses = [];
+}
+
+def MBlazeFslAsmOperand : AsmOperandClass {
+ let Name = "Fsl";
+ let SuperClasses = [];
+}
+
// Instruction operand types
def brtarget : Operand<OtherVT>;
def calltarget : Operand<i32>;
def simm16 : Operand<i32>;
def uimm5 : Operand<i32>;
+def uimm15 : Operand<i32>;
def fimm : Operand<f32>;
// Unsigned Operand
@@ -82,31 +94,23 @@ def uimm16 : Operand<i32> {
// FSL Operand
def fslimm : Operand<i32> {
let PrintMethod = "printFSLImm";
+ let ParserMatchClass = MBlazeFslAsmOperand;
}
// Address operand
def memri : Operand<i32> {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops simm16, CPURegs);
+ let MIOperandInfo = (ops GPR, simm16);
+ let ParserMatchClass = MBlazeMemAsmOperand;
}
def memrr : Operand<i32> {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops CPURegs, CPURegs);
+ let MIOperandInfo = (ops GPR, GPR);
+ let ParserMatchClass = MBlazeMemAsmOperand;
}
-// Transformation Function - get the lower 16 bits.
-def LO16 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
-}]>;
-
-// Transformation Function - get the higher 16 bits.
-def HI16 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() >> 16);
-}]>;
-
// Node immediate fits as 16-bit sign extended on target immediate.
-// e.g. addi, andi
def immSExt16 : PatLeaf<(imm), [{
return (N->getZExtValue() >> 16) == 0;
}]>;
@@ -117,19 +121,19 @@ def immSExt16 : PatLeaf<(imm), [{
// e.g. addiu, sltiu
def immZExt16 : PatLeaf<(imm), [{
return (N->getZExtValue() >> 16) == 0;
-}], LO16>;
+}]>;
// FSL immediate field must fit in 4 bits.
def immZExt4 : PatLeaf<(imm), [{
- return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
+ return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
}]>;
// shamt field must fit in 5 bits.
def immZExt5 : PatLeaf<(imm), [{
- return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+ return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
}]>;
-// MBlaze Address Mode! SDNode frameindex could possibily be a match
+// MBlaze Address Mode. SDNode frameindex could possibily be a match
// since load and store instructions from stack used it.
def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>;
def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
@@ -141,28 +145,14 @@ def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
// As stack alignment is always done with addiu, we need a 16-bit immediate
let Defs = [R1], Uses = [R1] in {
def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt),
- "${:comment} ADJCALLSTACKDOWN $amt",
+ "#ADJCALLSTACKDOWN $amt",
[(callseq_start timm:$amt)]>;
def ADJCALLSTACKUP : MBlazePseudo<(outs),
(ins uimm16:$amt1, simm16:$amt2),
- "${:comment} ADJCALLSTACKUP $amt1",
+ "#ADJCALLSTACKUP $amt1",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
-// Some assembly macros need to avoid pseudoinstructions and assembler
-// automatic reodering, we should reorder ourselves.
-def MACRO : MBlazePseudo<(outs), (ins), ".set macro", []>;
-def REORDER : MBlazePseudo<(outs), (ins), ".set reorder", []>;
-def NOMACRO : MBlazePseudo<(outs), (ins), ".set nomacro", []>;
-def NOREORDER : MBlazePseudo<(outs), (ins), ".set noreorder", []>;
-
-// When handling PIC code the assembler needs .cpload and .cprestore
-// directives. If the real instructions corresponding these directives
-// are used, we have the same behavior, but get also a bunch of warnings
-// from the assembler.
-def CPLOAD : MBlazePseudo<(outs), (ins CPURegs:$reg), ".cpload $reg", []>;
-def CPRESTORE : MBlazePseudo<(outs), (ins uimm16:$l), ".cprestore $l\n", []>;
-
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
@@ -172,47 +162,58 @@ def CPRESTORE : MBlazePseudo<(outs), (ins uimm16:$l), ".cprestore $l\n", []>;
//===----------------------------------------------------------------------===//
class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
InstrItinClass itin> :
- TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
Operand Od, PatLeaf imm_type> :
- TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+ TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>;
+
+class ArithI32<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIAlu>;
+
+class ShiftI<bits<6> op, bits<2> flags, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ SHT<op, flags, (outs GPR:$dst), (ins GPR:$b, Od:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+ [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>;
class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
InstrItinClass itin> :
- TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
- !strconcat(instr_asm, " $dst, $c, $b"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+ TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
Operand Od, PatLeaf imm_type> :
- TAI<op, (outs CPURegs:$dst), (ins Od:$b, CPURegs:$c),
+ TBR<op, (outs GPR:$dst), (ins Od:$b, GPR:$c),
!strconcat(instr_asm, " $dst, $c, $b"),
- [(set CPURegs:$dst, (OpNode imm_type:$b, CPURegs:$c))], IIAlu>;
+ [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIAlu>;
class ArithN<bits<6> op, bits<11> flags, string instr_asm,
InstrItinClass itin> :
- TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
[], itin>;
class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
- TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
- !strconcat(instr_asm, " $dst, $b, $c"),
- [], IIAlu>;
+ TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIAlu>;
class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
InstrItinClass itin> :
- TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
- !strconcat(instr_asm, " $dst, $b, $c"),
- [], itin>;
+ TAR<op, flags, (outs GPR:$dst), (ins GPR:$c, GPR:$b),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
- TAI<op, (outs CPURegs:$dst), (ins Od:$c, CPURegs:$b),
+ TBR<op, (outs GPR:$dst), (ins Od:$c, GPR:$b),
!strconcat(instr_asm, " $dst, $b, $c"),
[], IIAlu>;
@@ -221,135 +222,179 @@ class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
//===----------------------------------------------------------------------===//
class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
- TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIAlu>;
class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
- TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
- !strconcat(instr_asm, " $dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))],
- IIAlu>;
-
-class EffectiveAddress<string instr_asm> :
- TAI<0x08, (outs CPURegs:$dst), (ins memri:$addr),
- instr_asm, [(set CPURegs:$dst, iaddr:$addr)], IIAlu>;
+ TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set GPR:$dst, (OpNode GPR:$b, immZExt16:$c))],
+ IIAlu>;
+
+class LogicI32<bits<6> op, string instr_asm> :
+ TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIAlu>;
+
+class PatCmp<bits<6> op, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIAlu>;
//===----------------------------------------------------------------------===//
// Memory Access Instructions
//===----------------------------------------------------------------------===//
-class LoadM<bits<6> op, string instr_asm, PatFrag OpNode> :
- TA<op, 0x000, (outs CPURegs:$dst), (ins memrr:$addr),
+class LoadM<bits<6> op, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs GPR:$dst), (ins memrr:$addr),
!strconcat(instr_asm, " $dst, $addr"),
- [(set CPURegs:$dst, (OpNode xaddr:$addr))], IILoad>;
+ [], IILoad>;
class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
- TAI<op, (outs CPURegs:$dst), (ins memri:$addr),
- !strconcat(instr_asm, " $dst, $addr"),
- [(set CPURegs:$dst, (OpNode iaddr:$addr))], IILoad>;
+ TB<op, (outs GPR:$dst), (ins memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>;
-class StoreM<bits<6> op, string instr_asm, PatFrag OpNode> :
- TA<op, 0x000, (outs), (ins CPURegs:$dst, memrr:$addr),
+class StoreM<bits<6> op, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs), (ins GPR:$dst, memrr:$addr),
!strconcat(instr_asm, " $dst, $addr"),
- [(OpNode CPURegs:$dst, xaddr:$addr)], IIStore>;
+ [], IIStore>;
class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
- TAI<op, (outs), (ins CPURegs:$dst, memri:$addr),
- !strconcat(instr_asm, " $dst, $addr"),
- [(OpNode CPURegs:$dst, iaddr:$addr)], IIStore>;
+ TB<op, (outs), (ins GPR:$dst, memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIStore>;
//===----------------------------------------------------------------------===//
// Branch Instructions
//===----------------------------------------------------------------------===//
class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
- TBR<op, br, flags, (outs), (ins CPURegs:$target),
- !strconcat(instr_asm, " $target"),
- [(brind CPURegs:$target)], IIBranch>;
+ TA<op, flags, (outs), (ins GPR:$target),
+ !strconcat(instr_asm, " $target"),
+ [], IIBranch> {
+ let rd = 0x0;
+ let ra = br;
+ let Form = FCCR;
+}
-class BranchI<bits<6> op, bits<5> brf, string instr_asm> :
- TBRI<op, brf, (outs), (ins brtarget:$target),
- !strconcat(instr_asm, " $target"),
- [(br bb:$target)], IIBranch>;
+class BranchI<bits<6> op, bits<5> br, string instr_asm> :
+ TB<op, (outs), (ins brtarget:$target),
+ !strconcat(instr_asm, " $target"),
+ [], IIBranch> {
+ let rd = 0;
+ let ra = br;
+ let Form = FCCI;
+}
//===----------------------------------------------------------------------===//
// Branch and Link Instructions
//===----------------------------------------------------------------------===//
class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
- TBRL<op, br, flags, (outs), (ins CPURegs:$target),
- !strconcat(instr_asm, " r15, $target"),
- [], IIBranch>;
+ TA<op, flags, (outs), (ins GPR:$link, GPR:$target, variable_ops),
+ !strconcat(instr_asm, " $link, $target"),
+ [], IIBranch> {
+ let ra = br;
+ let Form = FRCR;
+}
class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
- TBRLI<op, br, (outs), (ins calltarget:$target),
- !strconcat(instr_asm, " r15, $target"),
- [], IIBranch>;
+ TB<op, (outs), (ins GPR:$link, calltarget:$target, variable_ops),
+ !strconcat(instr_asm, " $link, $target"),
+ [], IIBranch> {
+ let ra = br;
+ let Form = FRCI;
+}
//===----------------------------------------------------------------------===//
// Conditional Branch Instructions
//===----------------------------------------------------------------------===//
-class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm,
- PatFrag cond_op> :
- TBRC<op, br, flags, (outs),
- (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
- !strconcat(instr_asm, " $a, $b, $offset"),
- [], IIBranch>;
- //(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
- //IIBranch>;
+class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TA<op, flags, (outs),
+ (ins GPR:$a, GPR:$b),
+ !strconcat(instr_asm, " $a, $b"),
+ [], IIBranch> {
+ let rd = br;
+ let Form = FCRR;
+}
-class BranchCI<bits<6> op, bits<5> br, string instr_asm, PatFrag cond_op> :
- TBRCI<op, br, (outs), (ins CPURegs:$a, brtarget:$offset),
- !strconcat(instr_asm, " $a, $offset"),
- [], IIBranch>;
+class BranchCI<bits<6> op, bits<5> br, string instr_asm> :
+ TB<op, (outs), (ins GPR:$a, brtarget:$offset),
+ !strconcat(instr_asm, " $a, $offset"),
+ [], IIBranch> {
+ let rd = br;
+ let Form = FCRI;
+}
//===----------------------------------------------------------------------===//
// MBlaze arithmetic instructions
//===----------------------------------------------------------------------===//
let isCommutable = 1, isAsCheapAsAMove = 1 in {
- def ADD : Arith<0x00, 0x000, "add ", add, IIAlu>;
- def ADDC : Arith<0x02, 0x000, "addc ", adde, IIAlu>;
- def ADDK : Arith<0x04, 0x000, "addk ", addc, IIAlu>;
+ def ADDK : Arith<0x04, 0x000, "addk ", add, IIAlu>;
+ def AND : Logic<0x21, 0x000, "and ", and>;
+ def OR : Logic<0x20, 0x000, "or ", or>;
+ def XOR : Logic<0x22, 0x000, "xor ", xor>;
+ def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">;
+ def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">;
+ def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">;
+
+ let Defs = [CARRY] in {
+ def ADD : Arith<0x00, 0x000, "add ", addc, IIAlu>;
+
+ let Uses = [CARRY] in {
+ def ADDC : Arith<0x02, 0x000, "addc ", adde, IIAlu>;
+ }
+ }
+
+ let Uses = [CARRY] in {
def ADDKC : ArithN<0x06, 0x000, "addkc ", IIAlu>;
- def AND : Logic<0x21, 0x000, "and ", and>;
- def OR : Logic<0x20, 0x000, "or ", or>;
- def XOR : Logic<0x22, 0x000, "xor ", xor>;
+ }
}
let isAsCheapAsAMove = 1 in {
- def ANDN : ArithN<0x23, 0x000, "andn ", IIAlu>;
- def CMP : ArithN<0x05, 0x001, "cmp ", IIAlu>;
- def CMPU : ArithN<0x05, 0x003, "cmpu ", IIAlu>;
- def RSUB : ArithR<0x01, 0x000, "rsub ", sub, IIAlu>;
- def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIAlu>;
- def RSUBK : ArithR<0x05, 0x000, "rsubk ", subc, IIAlu>;
+ def ANDN : ArithN<0x23, 0x000, "andn ", IIAlu>;
+ def CMP : ArithN<0x05, 0x001, "cmp ", IIAlu>;
+ def CMPU : ArithN<0x05, 0x003, "cmpu ", IIAlu>;
+ def RSUBK : ArithR<0x05, 0x000, "rsubk ", sub, IIAlu>;
+
+ let Defs = [CARRY] in {
+ def RSUB : ArithR<0x01, 0x000, "rsub ", subc, IIAlu>;
+
+ let Uses = [CARRY] in {
+ def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIAlu>;
+ }
+ }
+
+ let Uses = [CARRY] in {
def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>;
+ }
}
let isCommutable = 1, Predicates=[HasMul] in {
- def MUL : Arith<0x10, 0x000, "mul ", mul, IIAlu>;
+ def MUL : Arith<0x10, 0x000, "mul ", mul, IIAlu>;
}
let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
- def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIAlu>;
- def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIAlu>;
+ def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIAlu>;
+ def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIAlu>;
}
let Predicates=[HasMul,HasMul64] in {
- def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>;
+ def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>;
}
let Predicates=[HasBarrel] in {
- def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIAlu>;
- def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIAlu>;
- def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIAlu>;
- def BSRLI : ArithI<0x11, "bsrli ", srl, uimm5, immZExt5>;
- def BSRAI : ArithI<0x11, "bsrai ", sra, uimm5, immZExt5>;
- def BSLLI : ArithI<0x11, "bslli ", shl, uimm5, immZExt5>;
+ def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIAlu>;
+ def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIAlu>;
+ def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIAlu>;
+ def BSRLI : ShiftI<0x19, 0x0, "bsrli ", srl, uimm5, immZExt5>;
+ def BSRAI : ShiftI<0x19, 0x1, "bsrai ", sra, uimm5, immZExt5>;
+ def BSLLI : ShiftI<0x19, 0x2, "bslli ", shl, uimm5, immZExt5>;
}
let Predicates=[HasDiv] in {
- def IDIV : Arith<0x12, 0x000, "idiv ", sdiv, IIAlu>;
- def IDIVU : Arith<0x12, 0x002, "idivu ", udiv, IIAlu>;
+ def IDIV : ArithR<0x12, 0x000, "idiv ", sdiv, IIAlu>;
+ def IDIVU : ArithR<0x12, 0x002, "idivu ", udiv, IIAlu>;
}
//===----------------------------------------------------------------------===//
@@ -357,22 +402,31 @@ let Predicates=[HasDiv] in {
//===----------------------------------------------------------------------===//
let isAsCheapAsAMove = 1 in {
- def ADDI : ArithI<0x08, "addi ", add, simm16, immSExt16>;
- def ADDIC : ArithNI<0x0A, "addic ", simm16, immSExt16>;
- def ADDIK : ArithNI<0x0C, "addik ", simm16, immSExt16>;
- def ADDIKC : ArithI<0x0E, "addikc ", addc, simm16, immSExt16>;
- def RSUBI : ArithRI<0x09, "rsubi ", sub, simm16, immSExt16>;
- def RSUBIC : ArithRNI<0x0B, "rsubi ", simm16, immSExt16>;
- def RSUBIK : ArithRNI<0x0E, "rsubic ", simm16, immSExt16>;
- def RSUBIKC : ArithRI<0x0F, "rsubikc", subc, simm16, immSExt16>;
- def ANDNI : ArithNI<0x2B, "andni ", uimm16, immZExt16>;
- def ANDI : LogicI<0x29, "andi ", and>;
- def ORI : LogicI<0x28, "ori ", or>;
- def XORI : LogicI<0x2A, "xori ", xor>;
+ def ADDIK : ArithI<0x0C, "addik ", add, simm16, immSExt16>;
+ def RSUBIK : ArithRI<0x0D, "rsubik ", sub, simm16, immSExt16>;
+ def ANDNI : ArithNI<0x2B, "andni ", uimm16, immZExt16>;
+ def ANDI : LogicI<0x29, "andi ", and>;
+ def ORI : LogicI<0x28, "ori ", or>;
+ def XORI : LogicI<0x2A, "xori ", xor>;
+
+ let Defs = [CARRY] in {
+ def ADDI : ArithI<0x08, "addi ", addc, simm16, immSExt16>;
+ def RSUBI : ArithRI<0x09, "rsubi ", subc, simm16, immSExt16>;
+
+ let Uses = [CARRY] in {
+ def ADDIC : ArithI<0x0A, "addic ", adde, simm16, immSExt16>;
+ def RSUBIC : ArithRI<0x0B, "rsubic ", sube, simm16, immSExt16>;
+ }
+ }
+
+ let Uses = [CARRY] in {
+ def ADDIKC : ArithNI<0x0E, "addikc ", simm16, immSExt16>;
+ def RSUBIKC : ArithRNI<0x0F, "rsubikc", simm16, immSExt16>;
+ }
}
let Predicates=[HasMul] in {
- def MULI : ArithI<0x18, "muli ", mul, simm16, immSExt16>;
+ def MULI : ArithI<0x18, "muli ", mul, simm16, immSExt16>;
}
//===----------------------------------------------------------------------===//
@@ -380,290 +434,445 @@ let Predicates=[HasMul] in {
//===----------------------------------------------------------------------===//
let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def LBU : LoadM<0x30, "lbu ", zextloadi8>;
- def LHU : LoadM<0x31, "lhu ", zextloadi16>;
- def LW : LoadM<0x32, "lw ", load>;
+ def LBU : LoadM<0x30, 0x000, "lbu ">;
+ def LBUR : LoadM<0x30, 0x200, "lbur ">;
+
+ def LHU : LoadM<0x31, 0x000, "lhu ">;
+ def LHUR : LoadM<0x31, 0x200, "lhur ">;
+
+ def LW : LoadM<0x32, 0x000, "lw ">;
+ def LWR : LoadM<0x32, 0x200, "lwr ">;
- def LBUI : LoadMI<0x30, "lbui ", zextloadi8>;
- def LHUI : LoadMI<0x31, "lhui ", zextloadi16>;
- def LWI : LoadMI<0x32, "lwi ", load>;
+ let Defs = [CARRY] in {
+ def LWX : LoadM<0x32, 0x400, "lwx ">;
+ }
+
+ def LBUI : LoadMI<0x38, "lbui ", zextloadi8>;
+ def LHUI : LoadMI<0x39, "lhui ", zextloadi16>;
+ def LWI : LoadMI<0x3A, "lwi ", load>;
}
- def SB : StoreM<0x34, "sb ", truncstorei8>;
- def SH : StoreM<0x35, "sh ", truncstorei16>;
- def SW : StoreM<0x36, "sw ", store>;
+def SB : StoreM<0x34, 0x000, "sb ">;
+def SBR : StoreM<0x34, 0x200, "sbr ">;
+
+def SH : StoreM<0x35, 0x000, "sh ">;
+def SHR : StoreM<0x35, 0x200, "shr ">;
+
+def SW : StoreM<0x36, 0x000, "sw ">;
+def SWR : StoreM<0x36, 0x200, "swr ">;
- def SBI : StoreMI<0x34, "sbi ", truncstorei8>;
- def SHI : StoreMI<0x35, "shi ", truncstorei16>;
- def SWI : StoreMI<0x36, "swi ", store>;
+let Defs = [CARRY] in {
+ def SWX : StoreM<0x36, 0x400, "swx ">;
+}
+
+def SBI : StoreMI<0x3C, "sbi ", truncstorei8>;
+def SHI : StoreMI<0x3D, "shi ", truncstorei16>;
+def SWI : StoreMI<0x3E, "swi ", store>;
//===----------------------------------------------------------------------===//
// MBlaze branch instructions
//===----------------------------------------------------------------------===//
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+ def BRI : BranchI<0x2E, 0x00, "bri ">;
+ def BRAI : BranchI<0x2E, 0x08, "brai ">;
+}
+
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
- def BRI : BranchI<0x2E, 0x00, "bri ">;
- def BRAI : BranchI<0x2E, 0x08, "brai ">;
- def BEQI : BranchCI<0x2F, 0x00, "beqi ", seteq>;
- def BNEI : BranchCI<0x2F, 0x01, "bnei ", setne>;
- def BLTI : BranchCI<0x2F, 0x02, "blti ", setlt>;
- def BLEI : BranchCI<0x2F, 0x03, "blei ", setle>;
- def BGTI : BranchCI<0x2F, 0x04, "bgti ", setgt>;
- def BGEI : BranchCI<0x2F, 0x05, "bgei ", setge>;
+ def BEQI : BranchCI<0x2F, 0x00, "beqi ">;
+ def BNEI : BranchCI<0x2F, 0x01, "bnei ">;
+ def BLTI : BranchCI<0x2F, 0x02, "blti ">;
+ def BLEI : BranchCI<0x2F, 0x03, "blei ">;
+ def BGTI : BranchCI<0x2F, 0x04, "bgti ">;
+ def BGEI : BranchCI<0x2F, 0x05, "bgei ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1,
+ isBarrier = 1 in {
+ def BR : Branch<0x26, 0x00, 0x000, "br ">;
+ def BRA : Branch<0x26, 0x08, 0x000, "bra ">;
}
let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
- def BR : Branch<0x26, 0x00, 0x000, "br ">;
- def BRA : Branch<0x26, 0x08, 0x000, "bra ">;
- def BEQ : BranchC<0x27, 0x00, 0x000, "beq ", seteq>;
- def BNE : BranchC<0x27, 0x01, 0x000, "bne ", setne>;
- def BLT : BranchC<0x27, 0x02, 0x000, "blt ", setlt>;
- def BLE : BranchC<0x27, 0x03, 0x000, "ble ", setle>;
- def BGT : BranchC<0x27, 0x04, 0x000, "bgt ", setgt>;
- def BGE : BranchC<0x27, 0x05, 0x000, "bge ", setge>;
+ def BEQ : BranchC<0x27, 0x00, 0x000, "beq ">;
+ def BNE : BranchC<0x27, 0x01, 0x000, "bne ">;
+ def BLT : BranchC<0x27, 0x02, 0x000, "blt ">;
+ def BLE : BranchC<0x27, 0x03, 0x000, "ble ">;
+ def BGT : BranchC<0x27, 0x04, 0x000, "bgt ">;
+ def BGE : BranchC<0x27, 0x05, 0x000, "bge ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+ isBarrier = 1 in {
+ def BRID : BranchI<0x2E, 0x10, "brid ">;
+ def BRAID : BranchI<0x2E, 0x18, "braid ">;
}
let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in {
- def BRID : BranchI<0x2E, 0x10, "brid ">;
- def BRAID : BranchI<0x2E, 0x18, "braid ">;
- def BEQID : BranchCI<0x2F, 0x10, "beqid ", seteq>;
- def BNEID : BranchCI<0x2F, 0x11, "bneid ", setne>;
- def BLTID : BranchCI<0x2F, 0x12, "bltid ", setlt>;
- def BLEID : BranchCI<0x2F, 0x13, "bleid ", setle>;
- def BGTID : BranchCI<0x2F, 0x14, "bgtid ", setgt>;
- def BGEID : BranchCI<0x2F, 0x15, "bgeid ", setge>;
+ def BEQID : BranchCI<0x2F, 0x10, "beqid ">;
+ def BNEID : BranchCI<0x2F, 0x11, "bneid ">;
+ def BLTID : BranchCI<0x2F, 0x12, "bltid ">;
+ def BLEID : BranchCI<0x2F, 0x13, "bleid ">;
+ def BGTID : BranchCI<0x2F, 0x14, "bgtid ">;
+ def BGEID : BranchCI<0x2F, 0x15, "bgeid ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+ hasDelaySlot = 1, hasCtrlDep = 1, isBarrier = 1 in {
+ def BRD : Branch<0x26, 0x10, 0x000, "brd ">;
+ def BRAD : Branch<0x26, 0x18, 0x000, "brad ">;
}
let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
hasDelaySlot = 1, hasCtrlDep = 1 in {
- def BRD : Branch<0x26, 0x10, 0x000, "brd ">;
- def BRAD : Branch<0x26, 0x18, 0x000, "brad ">;
- def BEQD : BranchC<0x27, 0x10, 0x000, "beqd ", seteq>;
- def BNED : BranchC<0x27, 0x11, 0x000, "bned ", setne>;
- def BLTD : BranchC<0x27, 0x12, 0x000, "bltd ", setlt>;
- def BLED : BranchC<0x27, 0x13, 0x000, "bled ", setle>;
- def BGTD : BranchC<0x27, 0x14, 0x000, "bgtd ", setgt>;
- def BGED : BranchC<0x27, 0x15, 0x000, "bged ", setge>;
+ def BEQD : BranchC<0x27, 0x10, 0x000, "beqd ">;
+ def BNED : BranchC<0x27, 0x11, 0x000, "bned ">;
+ def BLTD : BranchC<0x27, 0x12, 0x000, "bltd ">;
+ def BLED : BranchC<0x27, 0x13, 0x000, "bled ">;
+ def BGTD : BranchC<0x27, 0x14, 0x000, "bgtd ">;
+ def BGED : BranchC<0x27, 0x15, 0x000, "bged ">;
+}
+
+let isCall =1, hasDelaySlot = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+ Uses = [R1] in {
+ def BRLID : BranchLI<0x2E, 0x14, "brlid ">;
+ def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+}
+
+let isCall = 1, hasDelaySlot = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+ Uses = [R1] in {
+ def BRLD : BranchL<0x26, 0x14, 0x000, "brld ">;
+ def BRALD : BranchL<0x26, 0x1C, 0x000, "brald ">;
}
-let isCall = 1, hasCtrlDep = 1, isIndirectBranch = 1,
- Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
- Uses = [R1,R5,R6,R7,R8,R9,R10] in {
- def BRL : BranchL<0x26, 0x04, 0x000, "brl ">;
- def BRAL : BranchL<0x26, 0x0C, 0x000, "bral ">;
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x10, Form=FCRI in {
+ def RTSD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rtsd $target, $imm",
+ [],
+ IIBranch>;
}
-let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1,
- Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
- Uses = [R1,R5,R6,R7,R8,R9,R10] in {
- def BRLID : BranchLI<0x2E, 0x14, "brlid ">;
- def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x11, Form=FCRI in {
+ def RTID : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rtid $target, $imm",
+ [],
+ IIBranch>;
}
-let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, isIndirectBranch = 1,
- Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
- Uses = [R1,R5,R6,R7,R8,R9,R10] in {
- def BRLD : BranchL<0x26, 0x14, 0x000, "brld ">;
- def BRALD : BranchL<0x26, 0x1C, 0x000, "brald ">;
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x12, Form=FCRI in {
+ def RTBD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rtbd $target, $imm",
+ [],
+ IIBranch>;
}
-let isReturn=1, isTerminator=1, hasDelaySlot=1,
- isBarrier=1, hasCtrlDep=1, imm16=0x8 in {
- def RTSD : TRET<0x2D, (outs), (ins CPURegs:$target),
- "rtsd $target, 8",
- [(MBlazeRet CPURegs:$target)],
- IIBranch>;
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+ rd=0x14, Form=FCRI in {
+ def RTED : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+ "rted $target, $imm",
+ [],
+ IIBranch>;
}
//===----------------------------------------------------------------------===//
// MBlaze misc instructions
//===----------------------------------------------------------------------===//
-let addr = 0 in {
- def NOP : TADDR<0x00, (outs), (ins), "nop ", [], IIAlu>;
+let neverHasSideEffects = 1 in {
+ def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIAlu>;
}
let usesCustomInserter = 1 in {
- //class PseudoSelCC<RegisterClass RC, string asmstr>:
- // MBlazePseudo<(outs RC:$D), (ins RC:$T, RC:$F, CPURegs:$CMP), asmstr,
- // [(set RC:$D, (MBlazeSelectCC RC:$T, RC:$F, CPURegs:$CMP))]>;
- //def Select_CC : PseudoSelCC<CPURegs, "# MBlazeSelect_CC">;
-
- def Select_CC : MBlazePseudo<(outs CPURegs:$dst),
- (ins CPURegs:$T, CPURegs:$F, CPURegs:$CMP, i32imm:$CC),
+ def Select_CC : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed
"; SELECT_CC PSEUDO!",
[]>;
- def ShiftL : MBlazePseudo<(outs CPURegs:$dst),
- (ins CPURegs:$L, CPURegs:$R),
+ def ShiftL : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$L, GPR:$R),
"; ShiftL PSEUDO!",
[]>;
- def ShiftRA : MBlazePseudo<(outs CPURegs:$dst),
- (ins CPURegs:$L, CPURegs:$R),
+ def ShiftRA : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$L, GPR:$R),
"; ShiftRA PSEUDO!",
[]>;
- def ShiftRL : MBlazePseudo<(outs CPURegs:$dst),
- (ins CPURegs:$L, CPURegs:$R),
+ def ShiftRL : MBlazePseudo<(outs GPR:$dst),
+ (ins GPR:$L, GPR:$R),
"; ShiftRL PSEUDO!",
[]>;
}
-
let rb = 0 in {
- def SEXT16 : TA<0x24, 0x061, (outs CPURegs:$dst), (ins CPURegs:$src),
- "sext16 $dst, $src", [], IIAlu>;
- def SEXT8 : TA<0x24, 0x060, (outs CPURegs:$dst), (ins CPURegs:$src),
- "sext8 $dst, $src", [], IIAlu>;
- def SRL : TA<0x24, 0x041, (outs CPURegs:$dst), (ins CPURegs:$src),
- "srl $dst, $src", [], IIAlu>;
- def SRA : TA<0x24, 0x001, (outs CPURegs:$dst), (ins CPURegs:$src),
- "sra $dst, $src", [], IIAlu>;
- def SRC : TA<0x24, 0x021, (outs CPURegs:$dst), (ins CPURegs:$src),
- "src $dst, $src", [], IIAlu>;
+ def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src),
+ "sext16 $dst, $src", [], IIAlu>;
+ def SEXT8 : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src),
+ "sext8 $dst, $src", [], IIAlu>;
+ let Defs = [CARRY] in {
+ def SRL : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src),
+ "srl $dst, $src", [], IIAlu>;
+ def SRA : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src),
+ "sra $dst, $src", [], IIAlu>;
+ let Uses = [CARRY] in {
+ def SRC : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src),
+ "src $dst, $src", [], IIAlu>;
+ }
+ }
+}
+
+let isCodeGenOnly=1 in {
+ def ADDIK32 : ArithI32<0x08, "addik ", simm16, immSExt16>;
+ def ORI32 : LogicI32<0x28, "ori ">;
+ def BRLID32 : BranchLI<0x2E, 0x14, "brlid ">;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc. instructions
+//===----------------------------------------------------------------------===//
+let Form=FRCS in {
+ def MFS : SPC<0x25, 0x2, (outs GPR:$dst), (ins SPR:$src),
+ "mfs $dst, $src", [], IIAlu>;
+}
+
+let Form=FCRCS in {
+ def MTS : SPC<0x25, 0x3, (outs SPR:$dst), (ins GPR:$src),
+ "mts $dst, $src", [], IIAlu>;
+}
+
+def MSRSET : MSR<0x25, 0x20, (outs GPR:$dst), (ins uimm15:$set),
+ "msrset $dst, $set", [], IIAlu>;
+
+def MSRCLR : MSR<0x25, 0x22, (outs GPR:$dst), (ins uimm15:$clr),
+ "msrclr $dst, $clr", [], IIAlu>;
+
+let rd=0x0, Form=FCRR in {
+ def WDC : TA<0x24, 0x64, (outs), (ins GPR:$a, GPR:$b),
+ "wdc $a, $b", [], IIAlu>;
+ def WDCF : TA<0x24, 0x74, (outs), (ins GPR:$a, GPR:$b),
+ "wdc.flush $a, $b", [], IIAlu>;
+ def WDCC : TA<0x24, 0x66, (outs), (ins GPR:$a, GPR:$b),
+ "wdc.clear $a, $b", [], IIAlu>;
+ def WIC : TA<0x24, 0x68, (outs), (ins GPR:$a, GPR:$b),
+ "wic $a, $b", [], IIAlu>;
}
-def LEA_ADDI : EffectiveAddress<"addi $dst, ${addr:stackloc}">;
+def BRK : BranchL<0x26, 0x0C, 0x000, "brk ">;
+def BRKI : BranchLI<0x2E, 0x0C, "brki ">;
+
+def IMM : MBlazeInst<0x2C, FCCI, (outs), (ins simm16:$imm),
+ "imm $imm", [], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions for atomic operations
+//===----------------------------------------------------------------------===//
+let usesCustomInserter=1 in {
+ def CAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$cmp, GPR:$swp),
+ "# atomic compare and swap",
+ [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$cmp, GPR:$swp))]>;
+
+ def SWP32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$swp),
+ "# atomic swap",
+ [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$swp))]>;
+
+ def LAA32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and add",
+ [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and sub",
+ [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAD32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and and",
+ [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAO32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and or",
+ [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAX32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and xor",
+ [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$val))]>;
+
+ def LAN32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+ "# atomic load and nand",
+ [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$val))]>;
+
+ def MEMBARRIER : MBlazePseudo<(outs), (ins),
+ "# memory barrier",
+ [(membarrier (i32 imm), (i32 imm), (i32 imm), (i32 imm), (i32 imm))]>;
+}
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
//===----------------------------------------------------------------------===//
// Small immediates
-def : Pat<(i32 0), (ADD R0, R0)>;
-def : Pat<(i32 immSExt16:$imm), (ADDI R0, imm:$imm)>;
-def : Pat<(i32 immZExt16:$imm), (ORI R0, imm:$imm)>;
+def : Pat<(i32 0), (ADDK (i32 R0), (i32 R0))>;
+def : Pat<(i32 immSExt16:$imm), (ADDIK (i32 R0), imm:$imm)>;
+def : Pat<(i32 immZExt16:$imm), (ORI (i32 R0), imm:$imm)>;
// Arbitrary immediates
-def : Pat<(i32 imm:$imm), (ADDI R0, imm:$imm)>;
+def : Pat<(i32 imm:$imm), (ADDIK (i32 R0), imm:$imm)>;
// In register sign extension
-def : Pat<(sext_inreg CPURegs:$src, i16), (SEXT16 CPURegs:$src)>;
-def : Pat<(sext_inreg CPURegs:$src, i8), (SEXT8 CPURegs:$src)>;
+def : Pat<(sext_inreg GPR:$src, i16), (SEXT16 GPR:$src)>;
+def : Pat<(sext_inreg GPR:$src, i8), (SEXT8 GPR:$src)>;
// Call
-def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)), (BRLID tglobaladdr:$dst)>;
-def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),(BRLID texternalsym:$dst)>;
-def : Pat<(MBlazeJmpLink CPURegs:$dst), (BRLD CPURegs:$dst)>;
+def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)),
+ (BRLID (i32 R15), tglobaladdr:$dst)>;
+
+def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),
+ (BRLID (i32 R15), texternalsym:$dst)>;
+
+def : Pat<(MBlazeJmpLink GPR:$dst),
+ (BRALD (i32 R15), GPR:$dst)>;
// Shift Instructions
-def : Pat<(shl CPURegs:$L, CPURegs:$R), (ShiftL CPURegs:$L, CPURegs:$R)>;
-def : Pat<(sra CPURegs:$L, CPURegs:$R), (ShiftRA CPURegs:$L, CPURegs:$R)>;
-def : Pat<(srl CPURegs:$L, CPURegs:$R), (ShiftRL CPURegs:$L, CPURegs:$R)>;
+def : Pat<(shl GPR:$L, GPR:$R), (ShiftL GPR:$L, GPR:$R)>;
+def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
+def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
// SET_CC operations
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETEQ),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (CMP CPURegs:$L, CPURegs:$R), 1)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETNE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (CMP CPURegs:$L, CPURegs:$R), 2)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGT),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (CMP CPURegs:$L, CPURegs:$R), 3)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLT),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (CMP CPURegs:$L, CPURegs:$R), 4)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (CMP CPURegs:$L, CPURegs:$R), 5)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (CMP CPURegs:$L, CPURegs:$R), 6)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGT),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (CMPU CPURegs:$L, CPURegs:$R), 3)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULT),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (CMPU CPURegs:$L, CPURegs:$R), 4)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (CMPU CPURegs:$L, CPURegs:$R), 5)>;
-def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULE),
- (Select_CC (ADDI R0, 1), (ADDI R0, 0),
- (CMPU CPURegs:$L, CPURegs:$R), 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, GPR:$L), 6)>;
// SELECT operations
-def : Pat<(select CPURegs:$C, CPURegs:$T, CPURegs:$F),
- (Select_CC CPURegs:$T, CPURegs:$F, CPURegs:$C, 2)>;
-
-// SELECT_CC
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETEQ),
- (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 1)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETNE),
- (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 2)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGT),
- (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 3)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLT),
- (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 4)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGE),
- (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 5)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLE),
- (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 6)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGT),
- (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 3)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULT),
- (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 4)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGE),
- (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 5)>;
-def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULE),
- (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 6)>;
+def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
+ (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
+
+// SELECT_CC
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETNE),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGT),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLT),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGE),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLE),
+ (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 6)>;
+
+// Ret instructions
+def : Pat<(MBlazeRet GPR:$target), (RTSD GPR:$target, 0x8)>;
+def : Pat<(MBlazeIRet GPR:$target), (RTID GPR:$target, 0x0)>;
+
+// BR instructions
+def : Pat<(br bb:$T), (BRID bb:$T)>;
+def : Pat<(brind GPR:$T), (BRAD GPR:$T)>;
// BRCOND instructions
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETEQ), bb:$T),
- (BEQID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETNE), bb:$T),
- (BNEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGT), bb:$T),
- (BGTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLT), bb:$T),
- (BLTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGE), bb:$T),
- (BGEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLE), bb:$T),
- (BLEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGT), bb:$T),
- (BGTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULT), bb:$T),
- (BLTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGE), bb:$T),
- (BGEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULE), bb:$T),
- (BLEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
-def : Pat<(brcond CPURegs:$C, bb:$T),
- (BNEID CPURegs:$C, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
+ (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
+ (BNEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGT), bb:$T),
+ (BGTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLT), bb:$T),
+ (BLTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGE), bb:$T),
+ (BGEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLE), bb:$T),
+ (BLEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT), bb:$T),
+ (BGTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULT), bb:$T),
+ (BLTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE), bb:$T),
+ (BGEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULE), bb:$T),
+ (BLEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (i32 GPR:$C), bb:$T),
+ (BNEID GPR:$C, bb:$T)>;
// Jump tables, global addresses, and constant pools
-def : Pat<(MBWrapper tglobaladdr:$in), (ORI R0, tglobaladdr:$in)>;
-def : Pat<(MBWrapper tjumptable:$in), (ORI R0, tjumptable:$in)>;
-def : Pat<(MBWrapper tconstpool:$in), (ORI R0, tconstpool:$in)>;
+def : Pat<(MBWrapper tglobaladdr:$in), (ORI (i32 R0), tglobaladdr:$in)>;
+def : Pat<(MBWrapper tjumptable:$in), (ORI (i32 R0), tjumptable:$in)>;
+def : Pat<(MBWrapper tconstpool:$in), (ORI (i32 R0), tconstpool:$in)>;
// Misc instructions
-def : Pat<(and CPURegs:$lh, (not CPURegs:$rh)),(ANDN CPURegs:$lh, CPURegs:$rh)>;
+def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>;
// Arithmetic with immediates
-def : Pat<(add CPURegs:$in, imm:$imm),(ADDI CPURegs:$in, imm:$imm)>;
-def : Pat<(or CPURegs:$in, imm:$imm),(ORI CPURegs:$in, imm:$imm)>;
-def : Pat<(xor CPURegs:$in, imm:$imm),(XORI CPURegs:$in, imm:$imm)>;
-
-// extended load and stores
-def : Pat<(extloadi1 iaddr:$src), (LBUI iaddr:$src)>;
-def : Pat<(extloadi8 iaddr:$src), (LBUI iaddr:$src)>;
-def : Pat<(extloadi16 iaddr:$src), (LHUI iaddr:$src)>;
-def : Pat<(extloadi1 xaddr:$src), (LBU xaddr:$src)>;
-def : Pat<(extloadi8 xaddr:$src), (LBU xaddr:$src)>;
-def : Pat<(extloadi16 xaddr:$src), (LHU xaddr:$src)>;
-
-def : Pat<(sextloadi1 iaddr:$src), (SEXT8 (LBUI iaddr:$src))>;
-def : Pat<(sextloadi8 iaddr:$src), (SEXT8 (LBUI iaddr:$src))>;
-def : Pat<(sextloadi16 iaddr:$src), (SEXT16 (LHUI iaddr:$src))>;
-def : Pat<(sextloadi1 xaddr:$src), (SEXT8 (LBU xaddr:$src))>;
-def : Pat<(sextloadi8 xaddr:$src), (SEXT8 (LBU xaddr:$src))>;
-def : Pat<(sextloadi16 xaddr:$src), (SEXT16 (LHU xaddr:$src))>;
-
-// peepholes
-def : Pat<(store (i32 0), iaddr:$dst), (SWI R0, iaddr:$dst)>;
+def : Pat<(add (i32 GPR:$in), imm:$imm),(ADDIK GPR:$in, imm:$imm)>;
+def : Pat<(or (i32 GPR:$in), imm:$imm),(ORI GPR:$in, imm:$imm)>;
+def : Pat<(xor (i32 GPR:$in), imm:$imm),(XORI GPR:$in, imm:$imm)>;
+
+// Convert any extend loads into zero extend loads
+def : Pat<(extloadi8 iaddr:$src), (i32 (LBUI iaddr:$src))>;
+def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>;
+def : Pat<(extloadi8 xaddr:$src), (i32 (LBU xaddr:$src))>;
+def : Pat<(extloadi16 xaddr:$src), (i32 (LHU xaddr:$src))>;
+
+// 32-bit load and store
+def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>;
+def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>;
+
+// 16-bit load and store
+def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>;
+def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>;
+
+// 8-bit load and store
+def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>;
+def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
+
+// Peepholes
+def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>;
//===----------------------------------------------------------------------===//
// Floating Point Support
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 4931860..7e4a2f5 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -48,7 +48,7 @@ std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
if (IntrID < Intrinsic::num_intrinsics)
return 0;
- assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics &&
+ assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics &&
"Invalid intrinsic ID");
std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
@@ -94,12 +94,12 @@ static const FunctionType *getType(LLVMContext &Context, unsigned id) {
const Type *ResultTy = NULL;
std::vector<const Type*> ArgTys;
bool IsVarArg = false;
-
+
#define GET_INTRINSIC_GENERATOR
#include "MBlazeGenIntrinsics.inc"
#undef GET_INTRINSIC_GENERATOR
- return FunctionType::get(ResultTy, ArgTys, IsVarArg);
+ return FunctionType::get(ResultTy, ArgTys, IsVarArg);
}
Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td
index a27cb5b..278afbe 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -1,10 +1,10 @@
//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines all of the MicroBlaze-specific intrinsics.
@@ -16,7 +16,7 @@
//
// MBlaze intrinsic classes.
-let TargetPrefix = "mblaze", isTarget = 1 in {
+let TargetPrefix = "mblaze", isTarget = 1 in {
class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
index 4abeb2e..1467141 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
@@ -14,14 +14,9 @@
#include "MBlazeMCAsmInfo.h"
using namespace llvm;
-MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, StringRef TT) {
+MBlazeMCAsmInfo::MBlazeMCAsmInfo() {
+ SupportsDebugInformation = true;
AlignmentIsInBytes = false;
- Data16bitsDirective = "\t.half\t";
- Data32bitsDirective = "\t.word\t";
- Data64bitsDirective = 0;
PrivateGlobalPrefix = "$";
- CommentString = "#";
- ZeroDirective = "\t.space\t";
GPRel32Directive = "\t.gpword\t";
- HasSetDirective = false;
}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeMCAsmInfo.h
index 9d6ff3a..e68dd58 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCAsmInfo.h
@@ -19,10 +19,10 @@
namespace llvm {
class Target;
-
+
class MBlazeMCAsmInfo : public MCAsmInfo {
public:
- explicit MBlazeMCAsmInfo(const Target &T, StringRef TT);
+ explicit MBlazeMCAsmInfo();
};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
new file mode 100644
index 0000000..3ece1a8
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
@@ -0,0 +1,223 @@
+//===-- MBlazeMCCodeEmitter.cpp - Convert MBlaze code to machine code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MBlaze.h"
+#include "MBlazeInstrInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class MBlazeMCCodeEmitter : public MCCodeEmitter {
+ MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ MCContext &Ctx;
+
+public:
+ MBlazeMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
+ : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
+ }
+
+ ~MBlazeMCCodeEmitter() {}
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ unsigned getBinaryCodeForInstr(const MCInst &MI) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO) const;
+ unsigned getMachineOpValue(const MCInst &MI, unsigned OpIdx) const {
+ return getMachineOpValue(MI, MI.getOperand(OpIdx));
+ }
+
+ static unsigned GetMBlazeRegNum(const MCOperand &MO) {
+ // FIXME: getMBlazeRegisterNumbering() is sufficient?
+ assert(0 && "MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet implemented.");
+ return 0;
+ }
+
+ void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+ // The MicroBlaze uses a bit reversed format so we need to reverse the
+ // order of the bits. Taken from:
+ // http://graphics.stanford.edu/~seander/bithacks.html
+ C = ((C * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
+
+ OS << (char)C;
+ ++CurByte;
+ }
+
+ void EmitRawByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+ OS << (char)C;
+ ++CurByte;
+ }
+
+ void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+ raw_ostream &OS) const {
+ assert(Size <= 8 && "size too big in emit constant");
+
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, CurByte, OS);
+ Val >>= 8;
+ }
+ }
+
+ void EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const;
+ void EmitIMM(const MCInst &MI, unsigned &CurByte, raw_ostream &OS) const;
+
+ void EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel,
+ unsigned &CurByte, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const Target &,
+ TargetMachine &TM,
+ MCContext &Ctx) {
+ return new MBlazeMCCodeEmitter(TM, Ctx);
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO) const {
+ if (MO.isReg())
+ return MBlazeRegisterInfo::getRegisterNumbering(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isExpr())
+ return 0; // The relocation has already been recorded at this point.
+ else {
+#ifndef NDEBUG
+ errs() << MO;
+#endif
+ llvm_unreachable(0);
+ }
+ return 0;
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const {
+ int32_t val = (int32_t)imm.getImm();
+ if (val > 32767 || val < -32768) {
+ EmitByte(0x0D, CurByte, OS);
+ EmitByte(0x00, CurByte, OS);
+ EmitRawByte((val >> 24) & 0xFF, CurByte, OS);
+ EmitRawByte((val >> 16) & 0xFF, CurByte, OS);
+ }
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCInst &MI, unsigned &CurByte,raw_ostream &OS) const {
+ switch (MI.getOpcode()) {
+ default: break;
+
+ case MBlaze::ADDIK32:
+ case MBlaze::ORI32:
+ case MBlaze::BRLID32:
+ EmitByte(0x0D, CurByte, OS);
+ EmitByte(0x00, CurByte, OS);
+ EmitRawByte(0, CurByte, OS);
+ EmitRawByte(0, CurByte, OS);
+ }
+}
+
+void MBlazeMCCodeEmitter::
+EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel, unsigned &CurByte,
+ raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getNumOperands()>opNo && "Not enought operands for instruction");
+
+ MCOperand oper = MI.getOperand(opNo);
+
+ if (oper.isImm()) {
+ EmitIMM(oper, CurByte, OS);
+ } else if (oper.isExpr()) {
+ MCFixupKind FixupKind;
+ switch (MI.getOpcode()) {
+ default:
+ FixupKind = pcrel ? FK_PCRel_2 : FK_Data_2;
+ Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+ break;
+ case MBlaze::ORI32:
+ case MBlaze::ADDIK32:
+ case MBlaze::BRLID32:
+ FixupKind = pcrel ? FK_PCRel_4 : FK_Data_4;
+ Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+ break;
+ }
+ }
+}
+
+
+
+void MBlazeMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = TII.get(Opcode);
+ uint64_t TSFlags = Desc.TSFlags;
+ // Keep track of the current byte being emitted.
+ unsigned CurByte = 0;
+
+ // Emit an IMM instruction if the instruction we are encoding requires it
+ EmitIMM(MI,CurByte,OS);
+
+ switch ((TSFlags & MBlazeII::FormMask)) {
+ default: break;
+ case MBlazeII::FPseudo:
+ // Pseudo instructions don't get encoded.
+ return;
+ case MBlazeII::FRRI:
+ EmitImmediate(MI, 2, false, CurByte, OS, Fixups);
+ break;
+ case MBlazeII::FRIR:
+ EmitImmediate(MI, 1, false, CurByte, OS, Fixups);
+ break;
+ case MBlazeII::FCRI:
+ EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+ break;
+ case MBlazeII::FRCI:
+ EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+ case MBlazeII::FCCI:
+ EmitImmediate(MI, 0, true, CurByte, OS, Fixups);
+ break;
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted
+ unsigned Value = getBinaryCodeForInstr(MI);
+ EmitConstant(Value, 4, CurByte, OS);
+}
+
+// FIXME: These #defines shouldn't be necessary. Instead, tblgen should
+// be able to generate code emitter helpers for either variant, like it
+// does for the AsmWriter.
+#define MBlazeCodeEmitter MBlazeMCCodeEmitter
+#define MachineInstr MCInst
+#include "MBlazeGenCodeEmitter.inc"
+#undef MBlazeCodeEmitter
+#undef MachineInstr
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp
new file mode 100644
index 0000000..a7e400b
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -0,0 +1,166 @@
+//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MBlaze MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCInstLower.h"
+#include "MBlazeInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+MCSymbol *MBlazeMCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+ switch (MO.getTargetFlags()) {
+ default: llvm_unreachable("Unknown target flag on GV operand");
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+ SmallString<256> Name;
+ raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+ << Printer.getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+
+ case 0: break;
+ }
+
+ // Create a symbol for the name.
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+ switch (MO.getTargetFlags()) {
+ default:
+ assert(0 && "Unknown target flag on GV operand");
+
+ case 0: break;
+ }
+
+ return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MBlazeMCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+
+ case 0: break;
+ }
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default: llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit()) continue;
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_FPImmediate:
+ bool ignored;
+ APFloat FVal = MO.getFPImm()->getValueAPF();
+ FVal.convert(APFloat::IEEEsingle, APFloat::rmTowardZero, &ignored);
+
+ APInt IVal = FVal.bitcastToAPInt();
+ uint64_t Val = *IVal.getRawData();
+ MCOp = MCOperand::CreateImm(Val);
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.h b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h
index b81a306..92196f2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -1,4 +1,4 @@
-//===-- ARMMCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARM_MCINSTLOWER_H
-#define ARM_MCINSTLOWER_H
+#ifndef MBLAZE_MCINSTLOWER_H
+#define MBLAZE_MCINSTLOWER_H
#include "llvm/Support/Compiler.h"
@@ -23,32 +23,26 @@ namespace llvm {
class MachineModuleInfoMachO;
class MachineOperand;
class Mangler;
- //class ARMSubtarget;
-
-/// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst.
-class LLVM_LIBRARY_VISIBILITY ARMMCInstLower {
+
+ /// MBlazeMCInstLower - This class is used to lower an MachineInstr
+ /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MBlazeMCInstLower {
MCContext &Ctx;
Mangler &Mang;
- AsmPrinter &Printer;
- //const ARMSubtarget &getSubtarget() const;
+ AsmPrinter &Printer;
public:
- ARMMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
+ MBlazeMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
: Ctx(ctx), Mang(mang), Printer(printer) {}
-
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
- //MCSymbol *GetPICBaseSymbol() const;
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
- MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
-
-/*
-private:
- MachineModuleInfoMachO &getMachOMMI() const;
- */
+ MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
};
}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h b/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h
index 1f956c1..df39509 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -14,6 +14,7 @@
#ifndef MBLAZE_MACHINE_FUNCTION_INFO_H
#define MBLAZE_MACHINE_FUNCTION_INFO_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -26,20 +27,14 @@ namespace llvm {
class MBlazeFunctionInfo : public MachineFunctionInfo {
private:
- /// Holds for each function where on the stack the Frame Pointer must be
+ /// Holds for each function where on the stack the Frame Pointer must be
/// saved. This is used on Prologue and Epilogue to emit FP save/restore
int FPStackOffset;
- /// Holds for each function where on the stack the Return Address must be
+ /// Holds for each function where on the stack the Return Address must be
/// saved. This is used on Prologue and Epilogue to emit RA save/restore
int RAStackOffset;
- /// At each function entry a special bitmask directive must be emitted
- /// to help in debugging CPU callee saved registers. It needs a negative
- /// offset from the final stack size and its higher register location on
- /// the stack.
- int CPUTopSavedRegOff;
-
/// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
struct MBlazeFIHolder {
@@ -50,25 +45,30 @@ private:
: FI(FrameIndex), SPOffset(StackPointerOffset) {}
};
- /// When PIC is used the GP must be saved on the stack on the function
- /// prologue and must be reloaded from this stack location after every
- /// call. A reference to its stack location and frame index must be kept
+ /// When PIC is used the GP must be saved on the stack on the function
+ /// prologue and must be reloaded from this stack location after every
+ /// call. A reference to its stack location and frame index must be kept
/// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
MBlazeFIHolder GPHolder;
/// On LowerFormalArguments the stack size is unknown, so the Stack
- /// Pointer Offset calculation of "not in register arguments" must be
- /// postponed to emitPrologue.
+ /// Pointer Offset calculation of "not in register arguments" must be
+ /// postponed to emitPrologue.
SmallVector<MBlazeFIHolder, 16> FnLoadArgs;
bool HasLoadArgs;
- // When VarArgs, we must write registers back to caller stack, preserving
- // on register arguments. Since the stack size is unknown on
+ // When VarArgs, we must write registers back to caller stack, preserving
+ // on register arguments. Since the stack size is unknown on
// LowerFormalArguments, the Stack Pointer Offset calculation must be
- // postponed to emitPrologue.
+ // postponed to emitPrologue.
SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs;
bool HasStoreVarArgs;
+ // When determining the final stack layout some of the frame indexes may
+ // be replaced by new frame indexes that reside in the caller's stack
+ // frame. The replacements are recorded in this structure.
+ DenseMap<int,int> FIReplacements;
+
/// SRetReturnReg - Some subtargets require that sret lowering includes
/// returning the value of the returned struct in a register. This field
/// holds the virtual register into which the sret argument is passed.
@@ -82,11 +82,15 @@ private:
// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex;
+ /// LiveInFI - keeps track of the frame indexes in a callers stack
+ /// frame that are live into a function.
+ SmallVector<int, 16> LiveInFI;
+
public:
- MBlazeFunctionInfo(MachineFunction& MF)
- : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0),
- GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false),
- SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0)
+ MBlazeFunctionInfo(MachineFunction& MF)
+ : FPStackOffset(0), RAStackOffset(0), GPHolder(-1,-1), HasLoadArgs(false),
+ HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
+ VarArgsFrameIndex(0), LiveInFI()
{}
int getFPStackOffset() const { return FPStackOffset; }
@@ -95,9 +99,6 @@ public:
int getRAStackOffset() const { return RAStackOffset; }
void setRAStackOffset(int Off) { RAStackOffset = Off; }
- int getCPUTopSavedRegOff() const { return CPUTopSavedRegOff; }
- void setCPUTopSavedRegOff(int Off) { CPUTopSavedRegOff = Off; }
-
int getGPStackOffset() const { return GPHolder.SPOffset; }
int getGPFI() const { return GPHolder.FI; }
void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
@@ -105,12 +106,38 @@ public:
bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
bool hasLoadArgs() const { return HasLoadArgs; }
- bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+ bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+
+ void recordLiveIn(int FI) {
+ LiveInFI.push_back(FI);
+ }
+
+ bool isLiveIn(int FI) {
+ for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i)
+ if (FI == LiveInFI[i]) return true;
+
+ return false;
+ }
+
+ const SmallVector<int, 16>& getLiveIn() const { return LiveInFI; }
+
+ void recordReplacement(int OFI, int NFI) {
+ FIReplacements.insert(std::make_pair(OFI,NFI));
+ }
+
+ bool hasReplacement(int OFI) const {
+ return FIReplacements.find(OFI) != FIReplacements.end();
+ }
+
+ int getReplacement(int OFI) const {
+ return FIReplacements.lookup(OFI);
+ }
void recordLoadArgsFI(int FI, int SPOffset) {
if (!HasLoadArgs) HasLoadArgs=true;
FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset));
}
+
void recordStoreVarArgsFI(int FI, int SPOffset) {
if (!HasStoreVarArgs) HasStoreVarArgs=true;
FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset));
@@ -118,13 +145,14 @@ public:
void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
if (!hasLoadArgs()) return;
- for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
- MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset );
+ for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+ MFI->setObjectOffset(FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset);
}
+
void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
- if (!hasStoreVarArgs()) return;
- for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
- MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset );
+ if (!hasStoreVarArgs()) return;
+ for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+ MFI->setObjectOffset(FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset);
}
unsigned getSRetReturnReg() const { return SRetReturnReg; }
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 22b6a30..fa9140d 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mblaze-reg-info"
+#define DEBUG_TYPE "mblaze-frame-info"
#include "MBlaze.h"
#include "MBlazeSubtarget.h"
@@ -26,7 +26,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -48,38 +48,62 @@ MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
switch (RegEnum) {
- case MBlaze::R0 : case MBlaze::F0 : return 0;
- case MBlaze::R1 : case MBlaze::F1 : return 1;
- case MBlaze::R2 : case MBlaze::F2 : return 2;
- case MBlaze::R3 : case MBlaze::F3 : return 3;
- case MBlaze::R4 : case MBlaze::F4 : return 4;
- case MBlaze::R5 : case MBlaze::F5 : return 5;
- case MBlaze::R6 : case MBlaze::F6 : return 6;
- case MBlaze::R7 : case MBlaze::F7 : return 7;
- case MBlaze::R8 : case MBlaze::F8 : return 8;
- case MBlaze::R9 : case MBlaze::F9 : return 9;
- case MBlaze::R10 : case MBlaze::F10 : return 10;
- case MBlaze::R11 : case MBlaze::F11 : return 11;
- case MBlaze::R12 : case MBlaze::F12 : return 12;
- case MBlaze::R13 : case MBlaze::F13 : return 13;
- case MBlaze::R14 : case MBlaze::F14 : return 14;
- case MBlaze::R15 : case MBlaze::F15 : return 15;
- case MBlaze::R16 : case MBlaze::F16 : return 16;
- case MBlaze::R17 : case MBlaze::F17 : return 17;
- case MBlaze::R18 : case MBlaze::F18 : return 18;
- case MBlaze::R19 : case MBlaze::F19 : return 19;
- case MBlaze::R20 : case MBlaze::F20 : return 20;
- case MBlaze::R21 : case MBlaze::F21 : return 21;
- case MBlaze::R22 : case MBlaze::F22 : return 22;
- case MBlaze::R23 : case MBlaze::F23 : return 23;
- case MBlaze::R24 : case MBlaze::F24 : return 24;
- case MBlaze::R25 : case MBlaze::F25 : return 25;
- case MBlaze::R26 : case MBlaze::F26 : return 26;
- case MBlaze::R27 : case MBlaze::F27 : return 27;
- case MBlaze::R28 : case MBlaze::F28 : return 28;
- case MBlaze::R29 : case MBlaze::F29 : return 29;
- case MBlaze::R30 : case MBlaze::F30 : return 30;
- case MBlaze::R31 : case MBlaze::F31 : return 31;
+ case MBlaze::R0 : return 0;
+ case MBlaze::R1 : return 1;
+ case MBlaze::R2 : return 2;
+ case MBlaze::R3 : return 3;
+ case MBlaze::R4 : return 4;
+ case MBlaze::R5 : return 5;
+ case MBlaze::R6 : return 6;
+ case MBlaze::R7 : return 7;
+ case MBlaze::R8 : return 8;
+ case MBlaze::R9 : return 9;
+ case MBlaze::R10 : return 10;
+ case MBlaze::R11 : return 11;
+ case MBlaze::R12 : return 12;
+ case MBlaze::R13 : return 13;
+ case MBlaze::R14 : return 14;
+ case MBlaze::R15 : return 15;
+ case MBlaze::R16 : return 16;
+ case MBlaze::R17 : return 17;
+ case MBlaze::R18 : return 18;
+ case MBlaze::R19 : return 19;
+ case MBlaze::R20 : return 20;
+ case MBlaze::R21 : return 21;
+ case MBlaze::R22 : return 22;
+ case MBlaze::R23 : return 23;
+ case MBlaze::R24 : return 24;
+ case MBlaze::R25 : return 25;
+ case MBlaze::R26 : return 26;
+ case MBlaze::R27 : return 27;
+ case MBlaze::R28 : return 28;
+ case MBlaze::R29 : return 29;
+ case MBlaze::R30 : return 30;
+ case MBlaze::R31 : return 31;
+ case MBlaze::RPC : return 0x0000;
+ case MBlaze::RMSR : return 0x0001;
+ case MBlaze::REAR : return 0x0003;
+ case MBlaze::RESR : return 0x0005;
+ case MBlaze::RFSR : return 0x0007;
+ case MBlaze::RBTR : return 0x000B;
+ case MBlaze::REDR : return 0x000D;
+ case MBlaze::RPID : return 0x1000;
+ case MBlaze::RZPR : return 0x1001;
+ case MBlaze::RTLBX : return 0x1002;
+ case MBlaze::RTLBLO : return 0x1003;
+ case MBlaze::RTLBHI : return 0x1004;
+ case MBlaze::RPVR0 : return 0x2000;
+ case MBlaze::RPVR1 : return 0x2001;
+ case MBlaze::RPVR2 : return 0x2002;
+ case MBlaze::RPVR3 : return 0x2003;
+ case MBlaze::RPVR4 : return 0x2004;
+ case MBlaze::RPVR5 : return 0x2005;
+ case MBlaze::RPVR6 : return 0x2006;
+ case MBlaze::RPVR7 : return 0x2007;
+ case MBlaze::RPVR8 : return 0x2008;
+ case MBlaze::RPVR9 : return 0x2009;
+ case MBlaze::RPVR10 : return 0x200A;
+ case MBlaze::RPVR11 : return 0x200B;
default: llvm_unreachable("Unknown register number!");
}
return 0; // Not reached
@@ -126,6 +150,37 @@ unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) {
return 0; // Not reached
}
+unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) {
+ switch (Reg) {
+ case 0x0000 : return MBlaze::RPC;
+ case 0x0001 : return MBlaze::RMSR;
+ case 0x0003 : return MBlaze::REAR;
+ case 0x0005 : return MBlaze::RESR;
+ case 0x0007 : return MBlaze::RFSR;
+ case 0x000B : return MBlaze::RBTR;
+ case 0x000D : return MBlaze::REDR;
+ case 0x1000 : return MBlaze::RPID;
+ case 0x1001 : return MBlaze::RZPR;
+ case 0x1002 : return MBlaze::RTLBX;
+ case 0x1003 : return MBlaze::RTLBLO;
+ case 0x1004 : return MBlaze::RTLBHI;
+ case 0x2000 : return MBlaze::RPVR0;
+ case 0x2001 : return MBlaze::RPVR1;
+ case 0x2002 : return MBlaze::RPVR2;
+ case 0x2003 : return MBlaze::RPVR3;
+ case 0x2004 : return MBlaze::RPVR4;
+ case 0x2005 : return MBlaze::RPVR5;
+ case 0x2006 : return MBlaze::RPVR6;
+ case 0x2007 : return MBlaze::RPVR7;
+ case 0x2008 : return MBlaze::RPVR8;
+ case 0x2009 : return MBlaze::RPVR9;
+ case 0x200A : return MBlaze::RPVR10;
+ case 0x200B : return MBlaze::RPVR11;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
unsigned MBlazeRegisterInfo::getPICCallReg() {
return MBlaze::R20;
}
@@ -164,77 +219,40 @@ getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-//===----------------------------------------------------------------------===//
-//
-// Stack Frame Processing methods
-// +----------------------------+
-//
-// The stack is allocated decrementing the stack pointer on
-// the first instruction of a function prologue. Once decremented,
-// all stack references are are done through a positive offset
-// from the stack/frame pointer, so the stack is considered
-// to grow up.
-//
-//===----------------------------------------------------------------------===//
-
-void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
-
- // See the description at MicroBlazeMachineFunction.h
- int TopCPUSavedRegOff = -1;
-
- // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
- // be saved in this CPU Area there is the need. This whole Area must
- // be aligned to the default Stack Alignment requirements.
- unsigned StackOffset = MFI->getStackSize();
- unsigned RegSize = 4;
-
- // Replace the dummy '0' SPOffset by the negative offsets, as explained on
- // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
- // the approach done by calculateFrameObjectOffsets to the stack frame.
- MBlazeFI->adjustLoadArgsFI(MFI);
- MBlazeFI->adjustStoreVarArgsFI(MFI);
-
- if (hasFP(MF)) {
- MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
- StackOffset);
- MBlazeFI->setFPStackOffset(StackOffset);
- TopCPUSavedRegOff = StackOffset;
- StackOffset += RegSize;
- }
-
- if (MFI->adjustsStack()) {
- MBlazeFI->setRAStackOffset(0);
- MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
- StackOffset);
- TopCPUSavedRegOff = StackOffset;
- StackOffset += RegSize;
- }
-
- // Update frame info
- MFI->setStackSize(StackOffset);
-
- // Recalculate the final tops offset. The final values must be '0'
- // if there isn't a callee saved register for CPU or FPU, otherwise
- // a negative offset is needed.
- if (TopCPUSavedRegOff >= 0)
- MBlazeFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);
-}
-
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-bool MBlazeRegisterInfo::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
-}
-
-// This function eliminate ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
+// This function eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
void MBlazeRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
+ // 'addi r1, r1, <amt>'
+ MachineInstr *Old = I;
+ int Amount = Old->getOperand(0).getImm() + 4;
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = TFI->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New;
+ if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
+ New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(-Amount);
+ } else {
+ assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
+ New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
// Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
MBB.erase(I);
}
@@ -247,6 +265,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned i = 0;
while (!MI.getOperand(i).isFI()) {
@@ -257,117 +276,34 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned oi = i == 2 ? 1 : 2;
- DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
- errs() << "<--------->\n" << MI);
+ DEBUG(dbgs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ dbgs() << "<--------->\n" << MI);
int FrameIndex = MI.getOperand(i).getIndex();
- int stackSize = MF.getFrameInfo()->getStackSize();
- int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+ int stackSize = MFI->getStackSize();
+ int spOffset = MFI->getObjectOffset(FrameIndex);
- DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
+ DEBUG(MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ dbgs() << "FrameIndex : " << FrameIndex << "\n"
<< "spOffset : " << spOffset << "\n"
- << "stackSize : " << stackSize << "\n");
+ << "stackSize : " << stackSize << "\n"
+ << "isFixed : " << MFI->isFixedObjectIndex(FrameIndex) << "\n"
+ << "isLiveIn : " << MBlazeFI->isLiveIn(FrameIndex) << "\n"
+ << "isSpill : " << MFI->isSpillSlotObjectIndex(FrameIndex)
+ << "\n" );
// as explained on LowerFormalArguments, detect negative offsets
// and adjust SPOffsets considering the final stack size.
- int Offset = (spOffset < 0) ? (stackSize - spOffset) : (spOffset + 4);
- Offset += MI.getOperand(oi).getImm();
+ int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset;
+ Offset += MI.getOperand(oi).getImm();
- DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+ DEBUG(dbgs() << "Offset : " << Offset << "\n" << "<--------->\n");
MI.getOperand(oi).ChangeToImmediate(Offset);
MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
}
void MBlazeRegisterInfo::
-emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
- MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- // Get the right frame order for MBlaze.
- adjustMBlazeStackFrame(MF);
-
- // Get the number of bytes to allocate from the FrameInfo.
- unsigned StackSize = MFI->getStackSize();
-
- // No need to allocate space on the stack.
- if (StackSize == 0 && !MFI->adjustsStack()) return;
- if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28;
-
- int FPOffset = MBlazeFI->getFPStackOffset();
- int RAOffset = MBlazeFI->getRAStackOffset();
-
- // Adjust stack : addi R1, R1, -imm
- BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADDI), MBlaze::R1)
- .addReg(MBlaze::R1).addImm(-StackSize);
-
- // Save the return address only if the function isnt a leaf one.
- // swi R15, R1, stack_loc
- if (MFI->adjustsStack()) {
- BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
- .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1);
- }
-
- // if framepointer enabled, save it and set it
- // to point to the stack pointer
- if (hasFP(MF)) {
- // swi R19, R1, stack_loc
- BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
- .addReg(MBlaze::R19).addImm(FPOffset).addReg(MBlaze::R1);
-
- // add R19, R1, R0
- BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADD), MBlaze::R19)
- .addReg(MBlaze::R1).addReg(MBlaze::R0);
- }
-}
-
-void MBlazeRegisterInfo::
-emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
- DebugLoc dl = MBBI->getDebugLoc();
-
- // Get the FI's where RA and FP are saved.
- int FPOffset = MBlazeFI->getFPStackOffset();
- int RAOffset = MBlazeFI->getRAStackOffset();
-
- // if framepointer enabled, restore it and restore the
- // stack pointer
- if (hasFP(MF)) {
- // add R1, R19, R0
- BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1)
- .addReg(MBlaze::R19).addReg(MBlaze::R0);
-
- // lwi R19, R1, stack_loc
- BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19)
- .addImm(FPOffset).addReg(MBlaze::R1);
- }
-
- // Restore the return address only if the function isnt a leaf one.
- // lwi R15, R1, stack_loc
- if (MFI->adjustsStack()) {
- BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
- .addImm(RAOffset).addReg(MBlaze::R1);
- }
-
- // Get the number of bytes from FrameInfo
- int StackSize = (int) MFI->getStackSize();
- if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28;
-
- // adjust stack.
- // addi R1, R1, imm
- if (StackSize) {
- BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1)
- .addReg(MBlaze::R1).addImm(StackSize);
- }
-}
-
-
-void MBlazeRegisterInfo::
processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
// Set the stack offset where GP must be saved/loaded from.
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -381,7 +317,9 @@ unsigned MBlazeRegisterInfo::getRARegister() const {
}
unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return hasFP(MF) ? MBlaze::R19 : MBlaze::R1;
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? MBlaze::R19 : MBlaze::R1;
}
unsigned MBlazeRegisterInfo::getEHExceptionRegister() const {
@@ -394,9 +332,8 @@ unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
return 0;
}
-int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- llvm_unreachable("What is the dwarf register number");
- return -1;
+int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
+ return MBlazeGenRegisterInfo::getDwarfRegNumFull(RegNo,0);
}
#include "MBlazeGenRegisterInfo.inc"
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 1e1fde1..839536d 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -25,8 +25,8 @@ class TargetInstrInfo;
class Type;
namespace MBlaze {
- /// SubregIndex - The index of various sized subregister classes. Note that
- /// these indices must be kept in sync with the class indices in the
+ /// SubregIndex - The index of various sized subregister classes. Note that
+ /// these indices must be kept in sync with the class indices in the
/// MBlazeRegisterInfo.td file.
enum SubregIndex {
SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
@@ -36,7 +36,7 @@ namespace MBlaze {
struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
const MBlazeSubtarget &Subtarget;
const TargetInstrInfo &TII;
-
+
MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget,
const TargetInstrInfo &tii);
@@ -44,20 +44,16 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
/// MBlaze::RA, return the number that it corresponds to (e.g. 31).
static unsigned getRegisterNumbering(unsigned RegEnum);
static unsigned getRegisterFromNumbering(unsigned RegEnum);
+ static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum);
/// Get PIC indirect call register
static unsigned getPICCallReg();
- /// Adjust the MBlaze stack frame.
- void adjustMBlazeStackFrame(MachineFunction &MF) const;
-
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasFP(const MachineFunction &MF) const;
-
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
@@ -68,9 +64,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
/// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -79,11 +72,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
unsigned getEHExceptionRegister() const;
unsigned getEHHandlerRegister() const;
- /// targetHandlesStackFrameRounding - Returns true if the target is
- /// responsible for rounding up the stack frame (probably at emitPrologue
- /// time).
- bool targetHandlesStackFrameRounding() const { return true; }
-
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
};
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td
index 5e93510..fbefb22 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -17,15 +17,20 @@ class MBlazeReg<string n> : Register<n> {
let Namespace = "MBlaze";
}
-// MBlaze CPU Registers
+// Special purpose registers have 15-bit values
+class MBlazeSReg<string n> : Register<n> {
+ field bits<15> Num;
+ let Namespace = "MBlaze";
+}
+
+// MBlaze general purpose registers
class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
let Num = num;
}
-// MBlaze 32-bit (aliased) FPU Registers
-class FPR<bits<5> num, string n, list<Register> aliases> : MBlazeReg<n> {
+// MBlaze special purpose registers
+class MBlazeSPRReg<bits<15> num, string n> : MBlazeSReg<n> {
let Num = num;
- let Aliases = aliases;
}
//===----------------------------------------------------------------------===//
@@ -33,7 +38,6 @@ class FPR<bits<5> num, string n, list<Register> aliases> : MBlazeReg<n> {
//===----------------------------------------------------------------------===//
let Namespace = "MBlaze" in {
-
// General Purpose Registers
def R0 : MBlazeGPRReg< 0, "r0">, DwarfRegNum<[0]>;
def R1 : MBlazeGPRReg< 1, "r1">, DwarfRegNum<[1]>;
@@ -68,46 +72,43 @@ let Namespace = "MBlaze" in {
def R30 : MBlazeGPRReg< 30, "r30">, DwarfRegNum<[30]>;
def R31 : MBlazeGPRReg< 31, "r31">, DwarfRegNum<[31]>;
- /// MBlaze Single point precision FPU Registers
- def F0 : FPR< 0, "r0", [R0]>, DwarfRegNum<[32]>;
- def F1 : FPR< 1, "r1", [R1]>, DwarfRegNum<[33]>;
- def F2 : FPR< 2, "r2", [R2]>, DwarfRegNum<[34]>;
- def F3 : FPR< 3, "r3", [R3]>, DwarfRegNum<[35]>;
- def F4 : FPR< 4, "r4", [R4]>, DwarfRegNum<[36]>;
- def F5 : FPR< 5, "r5", [R5]>, DwarfRegNum<[37]>;
- def F6 : FPR< 6, "r6", [R6]>, DwarfRegNum<[38]>;
- def F7 : FPR< 7, "r7", [R7]>, DwarfRegNum<[39]>;
- def F8 : FPR< 8, "r8", [R8]>, DwarfRegNum<[40]>;
- def F9 : FPR< 9, "r9", [R9]>, DwarfRegNum<[41]>;
- def F10 : FPR<10, "r10", [R10]>, DwarfRegNum<[42]>;
- def F11 : FPR<11, "r11", [R11]>, DwarfRegNum<[43]>;
- def F12 : FPR<12, "r12", [R12]>, DwarfRegNum<[44]>;
- def F13 : FPR<13, "r13", [R13]>, DwarfRegNum<[45]>;
- def F14 : FPR<14, "r14", [R14]>, DwarfRegNum<[46]>;
- def F15 : FPR<15, "r15", [R15]>, DwarfRegNum<[47]>;
- def F16 : FPR<16, "r16", [R16]>, DwarfRegNum<[48]>;
- def F17 : FPR<17, "r17", [R17]>, DwarfRegNum<[49]>;
- def F18 : FPR<18, "r18", [R18]>, DwarfRegNum<[50]>;
- def F19 : FPR<19, "r19", [R19]>, DwarfRegNum<[51]>;
- def F20 : FPR<20, "r20", [R20]>, DwarfRegNum<[52]>;
- def F21 : FPR<21, "r21", [R21]>, DwarfRegNum<[53]>;
- def F22 : FPR<22, "r22", [R22]>, DwarfRegNum<[54]>;
- def F23 : FPR<23, "r23", [R23]>, DwarfRegNum<[55]>;
- def F24 : FPR<24, "r24", [R24]>, DwarfRegNum<[56]>;
- def F25 : FPR<25, "r25", [R25]>, DwarfRegNum<[57]>;
- def F26 : FPR<26, "r26", [R26]>, DwarfRegNum<[58]>;
- def F27 : FPR<27, "r27", [R27]>, DwarfRegNum<[59]>;
- def F28 : FPR<28, "r28", [R28]>, DwarfRegNum<[60]>;
- def F29 : FPR<29, "r29", [R29]>, DwarfRegNum<[61]>;
- def F30 : FPR<30, "r30", [R30]>, DwarfRegNum<[62]>;
- def F31 : FPR<31, "r31", [R31]>, DwarfRegNum<[63]>;
+ // Special Purpose Registers
+ def RPC : MBlazeSPRReg<0x0000, "rpc">, DwarfRegNum<[32]>;
+ def RMSR : MBlazeSPRReg<0x0001, "rmsr">, DwarfRegNum<[33]>;
+ def REAR : MBlazeSPRReg<0x0003, "rear">, DwarfRegNum<[34]>;
+ def RESR : MBlazeSPRReg<0x0005, "resr">, DwarfRegNum<[35]>;
+ def RFSR : MBlazeSPRReg<0x0007, "rfsr">, DwarfRegNum<[36]>;
+ def RBTR : MBlazeSPRReg<0x000B, "rbtr">, DwarfRegNum<[37]>;
+ def REDR : MBlazeSPRReg<0x000D, "redr">, DwarfRegNum<[38]>;
+ def RPID : MBlazeSPRReg<0x1000, "rpid">, DwarfRegNum<[39]>;
+ def RZPR : MBlazeSPRReg<0x1001, "rzpr">, DwarfRegNum<[40]>;
+ def RTLBX : MBlazeSPRReg<0x1002, "rtlbx">, DwarfRegNum<[41]>;
+ def RTLBLO : MBlazeSPRReg<0x1003, "rtlblo">, DwarfRegNum<[42]>;
+ def RTLBHI : MBlazeSPRReg<0x1004, "rtlbhi">, DwarfRegNum<[43]>;
+ def RPVR0 : MBlazeSPRReg<0x2000, "rpvr0">, DwarfRegNum<[44]>;
+ def RPVR1 : MBlazeSPRReg<0x2001, "rpvr1">, DwarfRegNum<[45]>;
+ def RPVR2 : MBlazeSPRReg<0x2002, "rpvr2">, DwarfRegNum<[46]>;
+ def RPVR3 : MBlazeSPRReg<0x2003, "rpvr3">, DwarfRegNum<[47]>;
+ def RPVR4 : MBlazeSPRReg<0x2004, "rpvr4">, DwarfRegNum<[48]>;
+ def RPVR5 : MBlazeSPRReg<0x2005, "rpvr5">, DwarfRegNum<[49]>;
+ def RPVR6 : MBlazeSPRReg<0x2006, "rpvr6">, DwarfRegNum<[50]>;
+ def RPVR7 : MBlazeSPRReg<0x2007, "rpvr7">, DwarfRegNum<[51]>;
+ def RPVR8 : MBlazeSPRReg<0x2008, "rpvr8">, DwarfRegNum<[52]>;
+ def RPVR9 : MBlazeSPRReg<0x2009, "rpvr9">, DwarfRegNum<[53]>;
+ def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[54]>;
+ def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[55]>;
+
+ // The carry bit. In the Microblaze this is really bit 29 of the
+ // MSR register but this is the only bit of that register that we
+ // are interested in modeling.
+ def CARRY : MBlazeSPRReg<0x0000, "rmsr[c]">, DwarfRegNum<[33]>;
}
//===----------------------------------------------------------------------===//
// Register Classes
//===----------------------------------------------------------------------===//
-def CPURegs : RegisterClass<"MBlaze", [i32], 32,
+def GPR : RegisterClass<"MBlaze", [i32,f32], 32,
[
// Return Values and Arguments
R3, R4, R5, R6, R7, R8, R9, R10,
@@ -135,46 +136,55 @@ def CPURegs : RegisterClass<"MBlaze", [i32], 32,
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- CPURegsClass::iterator
- CPURegsClass::allocation_order_end(const MachineFunction &MF) const {
+ GPRClass::iterator
+ GPRClass::allocation_order_end(const MachineFunction &MF) const {
// The last 10 registers on the list above are reserved
return end()-10;
}
}];
}
-def FGR32 : RegisterClass<"MBlaze", [f32], 32,
+def SPR : RegisterClass<"MBlaze", [i32], 32,
[
- // Return Values and Arguments
- F3, F4, F5, F6, F7, F8, F9, F10,
-
- // Not preserved across procedure calls
- F11, F12,
-
- // Callee save
- F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31,
-
// Reserved
- F0, // Always zero
- F1, // The stack pointer
- F2, // Read-only small data area anchor
- F13, // Read-write small data area anchor
- F14, // Return address for interrupts
- F15, // Return address for sub-routines
- F16, // Return address for trap
- F17, // Return address for exceptions
- F18, // Reserved for assembler
- F19 // The frame pointer
+ RPC,
+ RMSR,
+ REAR,
+ RESR,
+ RFSR,
+ RBTR,
+ REDR,
+ RPID,
+ RZPR,
+ RTLBX,
+ RTLBLO,
+ RTLBHI,
+ RPVR0,
+ RPVR1,
+ RPVR2,
+ RPVR3,
+ RPVR4,
+ RPVR5,
+ RPVR6,
+ RPVR7,
+ RPVR8,
+ RPVR9,
+ RPVR10,
+ RPVR11
]>
{
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- FGR32Class::iterator
- FGR32Class::allocation_order_end(const MachineFunction &MF) const {
- // The last 10 registers on the list above are reserved
- return end()-10;
+ SPRClass::iterator
+ SPRClass::allocation_order_end(const MachineFunction &MF) const {
+ // None of the special purpose registers are allocatable.
+ return end()-24;
}
}];
}
+
+def CRC : RegisterClass<"MBlaze", [i32], 32, [CARRY]> {
+ let CopyCost = -1;
+}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h b/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h
new file mode 100644
index 0000000..c298eda
--- /dev/null
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h
@@ -0,0 +1,47 @@
+//===- MBlazeRelocations.h - MBlaze Code Relocations ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZERELOCATIONS_H
+#define MBLAZERELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace MBlaze {
+ enum RelocationType {
+ /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+ /// the value already in memory, after we adjust it for where the PC is.
+ reloc_pcrel_word = 0,
+
+ /// reloc_picrel_word - PIC base relative relocation, add the relocated
+ /// value to the value already in memory, after we adjust it for where the
+ /// PIC base is.
+ reloc_picrel_word = 1,
+
+ /// reloc_absolute_word - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_word = 2,
+
+ /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+ /// value to the value already in memory. In object files, it represents a
+ /// value which must be sign-extended when resolving the relocation.
+ reloc_absolute_word_sext = 3,
+
+ /// reloc_absolute_dword - absolute relocation, just add the relocated
+ /// value to the value already in memory.
+ reloc_absolute_dword = 4
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td
index 4a65542..ac4d98c 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td
@@ -14,7 +14,7 @@ def ALU : FuncUnit;
def IMULDIV : FuncUnit;
//===----------------------------------------------------------------------===//
-// Instruction Itinerary classes used for MBlaze
+// Instruction Itinerary classes used for MBlaze
//===----------------------------------------------------------------------===//
def IIAlu : InstrItinClass;
def IILoad : InstrItinClass;
@@ -41,7 +41,7 @@ def IIPseudo : InstrItinClass;
// MBlaze Generic instruction itineraries.
//===----------------------------------------------------------------------===//
def MBlazeGenericItineraries : ProcessorItineraries<
- [ALU, IMULDIV], [
+ [ALU, IMULDIV], [], [
InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 4252953..cd949e1 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -15,13 +15,62 @@
#include "MBlazeMCAsmInfo.h"
#include "MBlazeTargetMachine.h"
#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+ Triple TheTriple(TT);
+ switch (TheTriple.getOS()) {
+ default:
+ return new MBlazeMCAsmInfo();
+ }
+}
+
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+ MCContext &Ctx, TargetAsmBackend &TAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+ switch (TheTriple.getOS()) {
+ case Triple::Darwin:
+ llvm_unreachable("MBlaze does not support Darwin MACH-O format");
+ return NULL;
+ case Triple::MinGW32:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ llvm_unreachable("MBlaze does not support Windows COFF format");
+ return NULL;
+ default:
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll,
+ NoExecStack);
+ }
+}
+
+
extern "C" void LLVMInitializeMBlazeTarget() {
// Register the target.
RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
- RegisterAsmInfo<MBlazeMCAsmInfo> A(TheMBlazeTarget);
+
+ // Register the target asm info.
+ RegisterAsmInfoFn A(TheMBlazeTarget, createMCAsmInfo);
+
+ // Register the MC code emitter
+ TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget,
+ llvm::createMBlazeMCCodeEmitter);
+
+ // Register the asm backend
+ TargetRegistry::RegisterAsmBackend(TheMBlazeTarget,
+ createMBlazeAsmBackend);
+
+ // Register the object streamer
+ TargetRegistry::RegisterObjectStreamer(TheMBlazeTarget,
+ createMCStreamer);
+
}
// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -35,11 +84,10 @@ MBlazeTargetMachine(const Target &T, const std::string &TT,
const std::string &FS):
LLVMTargetMachine(T, TT),
Subtarget(TT, FS),
- DataLayout("E-p:32:32-i8:8:8-i16:16:16-i64:32:32-"
- "f64:32:32-v64:32:32-v128:32:32-n32"),
+ DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
InstrInfo(*this),
- FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
- TLInfo(*this), TSInfo(*this) {
+ FrameLowering(Subtarget),
+ TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this) {
if (getRelocationModel() == Reloc::Default) {
setRelocationModel(Reloc::Static);
}
@@ -50,8 +98,8 @@ MBlazeTargetMachine(const Target &T, const std::string &TT,
// Install an instruction selector pass using
// the ISelDag to gen MBlaze code.
-bool MBlazeTargetMachine::
-addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
PM.add(createMBlazeISelDag(*this));
return false;
}
@@ -59,8 +107,8 @@ addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
// Implemented by targets that want to run passes immediately before
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
-bool MBlazeTargetMachine::
-addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
PM.add(createMBlazeDelaySlotFillerPass(*this));
return true;
}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
index 6a57e58..45ad078 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -19,21 +19,25 @@
#include "MBlazeISelLowering.h"
#include "MBlazeSelectionDAGInfo.h"
#include "MBlazeIntrinsicInfo.h"
+#include "MBlazeFrameLowering.h"
+#include "MBlazeELFWriterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
class formatted_raw_ostream;
class MBlazeTargetMachine : public LLVMTargetMachine {
- MBlazeSubtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
- MBlazeInstrInfo InstrInfo;
- TargetFrameInfo FrameInfo;
- MBlazeTargetLowering TLInfo;
+ MBlazeSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MBlazeInstrInfo InstrInfo;
+ MBlazeFrameLowering FrameLowering;
+ MBlazeTargetLowering TLInfo;
MBlazeSelectionDAGInfo TSInfo;
- MBlazeIntrinsicInfo IntrinsicInfo;
+ MBlazeIntrinsicInfo IntrinsicInfo;
+ MBlazeELFWriterInfo ELFWriterInfo;
public:
MBlazeTargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
@@ -41,8 +45,8 @@ namespace llvm {
virtual const MBlazeInstrInfo *getInstrInfo() const
{ return &InstrInfo; }
- virtual const TargetFrameInfo *getFrameInfo() const
- { return &FrameInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const
+ { return &FrameLowering; }
virtual const MBlazeSubtarget *getSubtargetImpl() const
{ return &Subtarget; }
@@ -62,12 +66,13 @@ namespace llvm {
const TargetIntrinsicInfo *getIntrinsicInfo() const
{ return &IntrinsicInfo; }
- // Pass Pipeline Configuration
- virtual bool addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
+ virtual const MBlazeELFWriterInfo *getELFWriterInfo() const {
+ return &ELFWriterInfo;
+ }
- virtual bool addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level Opt);
+ virtual bool addPreEmitPass(PassManagerBase &PM,CodeGenOpt::Level Opt);
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
index 05c01ef..abd1b0b 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -16,6 +16,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
void MBlazeTargetObjectFile::
@@ -23,13 +24,13 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
SmallDataSection =
- getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getDataRel());
SmallBSSSection =
- getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
- MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getBSS());
}
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h
index 20e7702..c313722 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h
@@ -18,10 +18,9 @@ namespace llvm {
const MCSection *SmallDataSection;
const MCSection *SmallBSSSection;
public:
-
+
void Initialize(MCContext &Ctx, const TargetMachine &TM);
-
/// IsGlobalInSmallSection - Return true if this global address should be
/// placed into small data/bss section.
bool IsGlobalInSmallSection(const GlobalValue *GV,
@@ -29,8 +28,8 @@ namespace llvm {
SectionKind Kind) const;
bool IsGlobalInSmallSection(const GlobalValue *GV,
- const TargetMachine &TM) const;
-
+ const TargetMachine &TM) const;
+
const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
SectionKind Kind,
Mangler *Mang,
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000..f5458d5
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMSP430AsmPrinter
+ MSP430InstPrinter.cpp
+ )
+add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index c15d408..e10d4fe 100644
--- a/contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -13,7 +13,6 @@
#define DEBUG_TYPE "asm-printer"
#include "MSP430.h"
-#include "MSP430InstrInfo.h"
#include "MSP430InstPrinter.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -24,9 +23,7 @@ using namespace llvm;
// Include the auto-generated portion of the assembly writer.
-#define MachineInstr MCInst
#include "MSP430GenAsmWriter.inc"
-#undef MachineInstr
void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
printInstruction(MI, O);
diff --git a/contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index f0e1ce2..f0e1ce2 100644
--- a/contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile b/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile
new file mode 100644
index 0000000..a5293ab
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSP430AsmPrinter
+
+# Hack: we need to include 'main' MSP430 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.td b/contrib/llvm/lib/Target/MSP430/MSP430.td
index 0f08e3d..5cc5e6e 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430.td
@@ -52,6 +52,7 @@ def MSP430InstrInfo : InstrInfo;
def MSP430InstPrinter : AsmWriter {
string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 56f72bb..a1a7f44 100644
--- a/contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -15,10 +15,10 @@
#define DEBUG_TYPE "asm-printer"
#include "MSP430.h"
#include "MSP430InstrInfo.h"
-#include "MSP430InstPrinter.h"
#include "MSP430MCAsmInfo.h"
#include "MSP430MCInstLower.h"
#include "MSP430TargetMachine.h"
+#include "InstPrinter/MSP430InstPrinter.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
new file mode 100644
index 0000000..c99f4ab
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -0,0 +1,223 @@
+//======-- MSP430FrameLowering.cpp - MSP430 Frame Information -------=========//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430FrameLowering.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ return (DisableFramePointerElim(MF) ||
+ MF.getFrameInfo()->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+bool MSP430FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+ const MSP430InstrInfo &TII =
+ *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+
+ uint64_t NumBytes = 0;
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - 2;
+ NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize();
+
+ // Get the offset of the stack slot for the EBP register... which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save FPW into the appropriate stack slot...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
+ .addReg(MSP430::FPW, RegState::Kill);
+
+ // Update FPW with the new base value...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
+ .addReg(MSP430::SPW);
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(MSP430::FPW);
+
+ } else
+ NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
+
+ // Skip the callee-saved push instructions.
+ while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r))
+ ++MBBI;
+
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ if (NumBytes) { // adjust stack pointer: SPW -= numbytes
+ // If there is an SUB16ri of SPW immediately before this instruction, merge
+ // the two.
+ //NumBytes -= mergeSPUpdates(MBB, MBBI, true);
+ // If there is an ADD16ri or SUB16ri of SPW immediately after this
+ // instruction, merge the two instructions.
+ // mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
+
+ if (NumBytes) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(NumBytes);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ }
+}
+
+void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+ const MSP430InstrInfo &TII =
+ *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ switch (RetOpcode) {
+ case MSP430::RET:
+ case MSP430::RETI: break; // These are ok
+ default:
+ llvm_unreachable("Can only insert epilog into returning blocks");
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+ unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - 2;
+ NumBytes = FrameSize - CSSize;
+
+ // pop FPW.
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
+ } else
+ NumBytes = StackSize - CSSize;
+
+ // Skip the callee-saved pop instructions.
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+ break;
+ --MBBI;
+ }
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD16ri or SUB16ri of SPW immediately before this
+ // instruction, merge the two instructions.
+ //if (NumBytes || MFI->hasVarSizedObjects())
+ // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ if (MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, DL,
+ TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW);
+ if (CSSize) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(CSSize);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ } else {
+ // adjust stack pointer back: SPW += numbytes
+ if (NumBytes) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(NumBytes);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ }
+}
+
+// FIXME: Can we eleminate these in favour of generic code?
+bool
+MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
+ MFI->setCalleeSavedFrameSize(CSI.size() * 2);
+
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ BuildMI(MBB, MI, DL, TII.get(MSP430::PUSH16r))
+ .addReg(Reg, RegState::Kill);
+ }
+ return true;
+}
+
+bool
+MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg());
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
new file mode 100644
index 0000000..b636827
--- /dev/null
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.h
@@ -0,0 +1,53 @@
+//==- MSP430FrameLowering.h - Define frame lowering for MSP430 --*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_FRAMEINFO_H
+#define MSP430_FRAMEINFO_H
+
+#include "MSP430.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MSP430Subtarget;
+
+class MSP430FrameLowering : public TargetFrameLowering {
+protected:
+ const MSP430Subtarget &STI;
+
+public:
+ explicit MSP430FrameLowering(const MSP430Subtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 3395e9f..5430d43 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -60,15 +60,6 @@ namespace {
return GV != 0 || CP != 0 || ES != 0 || JT != -1;
}
- bool hasBaseReg() const {
- return Base.Reg.getNode() != 0;
- }
-
- void setBaseReg(SDValue Reg) {
- BaseType = RegBase;
- Base.Reg = Reg;
- }
-
void dump() {
errs() << "MSP430ISelAddressMode " << this << '\n';
if (BaseType == RegBase && Base.Reg.getNode() != 0) {
@@ -129,7 +120,7 @@ namespace {
SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
unsigned Opc8, unsigned Opc16);
- bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp);
+ bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Disp);
};
} // end anonymous namespace
@@ -254,7 +245,7 @@ bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
/// SelectAddr - returns true if it is able pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
-bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N,
+bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
SDValue &Base, SDValue &Disp) {
MSP430ISelAddressMode AM;
@@ -272,7 +263,7 @@ bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N,
AM.Base.Reg;
if (AM.GV)
- Disp = CurDAG->getTargetGlobalAddress(AM.GV, Op->getDebugLoc(),
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, N->getDebugLoc(),
MVT::i16, AM.Disp,
0/*AM.SymbolFlags*/);
else if (AM.CP)
@@ -298,7 +289,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
switch (ConstraintCode) {
default: return true;
case 'm': // memory
- if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
+ if (!SelectAddr(Op, Op0, Op1))
return true;
break;
}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index a1703a3..30ef4f5 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -366,7 +366,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
if (ObjSize > 2) {
errs() << "LowerFormalArguments Unhandled argument type: "
- << VA.getLocVT().getSimpleVT().SimpleTy
+ << EVT(VA.getLocVT()).getEVTString()
<< "\n";
}
// Create the frame index object for this incoming parameter...
@@ -376,7 +376,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
//from this parameter
SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
@@ -507,8 +507,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(),
- VA.getLocMemOffset(), false, false, 0));
+ MachinePointerInfo(),false, false, 0));
}
}
@@ -537,7 +536,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16);
// Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
@@ -748,7 +747,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
}
TargetCC = DAG.getConstant(TCC, MVT::i8);
- return DAG.getNode(MSP430ISD::CMP, dl, MVT::Flag, LHS, RHS);
+ return DAG.getNode(MSP430ISD::CMP, dl, MVT::Glue, LHS, RHS);
}
@@ -837,7 +836,7 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return SR;
} else {
SDValue Zero = DAG.getConstant(0, VT);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SmallVector<SDValue, 4> Ops;
Ops.push_back(One);
Ops.push_back(Zero);
@@ -859,7 +858,7 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op,
SDValue TargetCC;
SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SmallVector<SDValue, 4> Ops;
Ops.push_back(TrueV);
Ops.push_back(FalseV);
@@ -914,13 +913,13 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
- NULL, 0, false, false, 0);
+ MachinePointerInfo(), false, false, 0);
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, NULL, 0, false, false, 0);
+ RetAddrFI, MachinePointerInfo(), false, false, 0);
}
SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -934,7 +933,8 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
MSP430::FPW, VT);
while (Depth--)
- FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
false, false, 0);
return FrameAddr;
}
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index bfab844..424df13 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -40,8 +40,9 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
- MachineMemOperand::MOStore, 0,
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MachineMemOperand::MOStore,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
@@ -68,8 +69,9 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
- MachineMemOperand::MOLoad, 0,
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MachineMemOperand::MOLoad,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
@@ -99,48 +101,6 @@ void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
-bool
-MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
- MFI->setCalleeSavedFrameSize(CSI.size() * 2);
-
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
- // Add the callee-saved register as live-in. It's killed at the spill.
- MBB.addLiveIn(Reg);
- BuildMI(MBB, MI, DL, get(MSP430::PUSH16r))
- .addReg(Reg, RegState::Kill);
- }
- return true;
-}
-
-bool
-MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- for (unsigned i = 0, e = CSI.size(); i != e; ++i)
- BuildMI(MBB, MI, DL, get(MSP430::POP16r), CSI[i].getReg());
-
- return true;
-}
-
unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
unsigned Count = 0;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
index 49ccc03..e885cd3 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -66,15 +66,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
- virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
// Branch folding goodness
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
index 8792b22..59cb598 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td
@@ -40,28 +40,28 @@ def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
// MSP430 Specific Node Definitions.
//===----------------------------------------------------------------------===//
def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOptInGlue]>;
def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOptInGlue]>;
def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
- [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>;
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
def MSP430callseq_start :
SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def MSP430callseq_end :
SDNode<"ISD::CALLSEQ_END", SDT_MSP430CallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
-def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>;
+def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutGlue]>;
def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC,
- [SDNPHasChain, SDNPInFlag]>;
+ [SDNPHasChain, SDNPInGlue]>;
def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC,
- [SDNPInFlag]>;
+ [SDNPInGlue]>;
def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
diff --git a/contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
index d1d9a11..d1d9a11 100644
--- a/contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
diff --git a/contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
index e937696..e937696 100644
--- a/contrib/llvm/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.h
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 3c3fa73..1da6d8d 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -33,11 +33,12 @@ MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
const TargetInstrInfo &tii)
: MSP430GenRegisterInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
TM(tm), TII(tii) {
- StackAlign = TM.getFrameInfo()->getStackAlignment();
+ StackAlign = TM.getFrameLowering()->getStackAlignment();
}
const unsigned*
MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
const Function* F = MF->getFunction();
static const unsigned CalleeSavedRegs[] = {
MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
@@ -62,7 +63,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
0
};
- if (hasFP(*MF))
+ if (TFI->hasFP(*MF))
return (F->getCallingConv() == CallingConv::MSP430_INTR ?
CalleeSavedRegsIntrFP : CalleeSavedRegsFP);
else
@@ -73,6 +74,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
// Mark 4 special registers as reserved.
Reserved.set(MSP430::PCW);
@@ -81,7 +83,7 @@ BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(MSP430::CGW);
// Mark frame pointer as reserved if needed.
- if (hasFP(MF))
+ if (TFI->hasFP(MF))
Reserved.set(MSP430::FPW);
return Reserved;
@@ -92,23 +94,12 @@ MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const {
return &MSP430::GR16RegClass;
}
-
-bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- return (DisableFramePointerElim(MF) ||
- MF.getFrameInfo()->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken());
-}
-
-bool MSP430RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
- return !MF.getFrameInfo()->hasVarSizedObjects();
-}
-
void MSP430RegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (!hasReservedCallFrame(MF)) {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
// If the stack pointer can be changed after prologue, turn the
// adjcallstackup instruction into a 'sub SPW, <amt>' and the
// adjcallstackdown instruction into 'add SPW, <amt>'
@@ -172,6 +163,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
while (!MI.getOperand(i).isFI()) {
++i;
@@ -180,13 +172,13 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FrameIndex = MI.getOperand(i).getIndex();
- unsigned BasePtr = (hasFP(MF) ? MSP430::FPW : MSP430::SPW);
+ unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW);
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
// Skip the saved PC
Offset += 2;
- if (!hasFP(MF))
+ if (!TFI->hasFP(MF))
Offset += MF.getFrameInfo()->getStackSize();
else
Offset += 2; // Skip the saved FPW
@@ -224,8 +216,10 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
void
MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
// Create a frame entry for the FPW register that must be saved.
- if (hasFP(MF)) {
+ if (TFI->hasFP(MF)) {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
(void)FrameIdx;
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
@@ -233,144 +227,14 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
}
-
-void MSP430RegisterInfo::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
- MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- // Get the number of bytes to allocate from the FrameInfo.
- uint64_t StackSize = MFI->getStackSize();
-
- uint64_t NumBytes = 0;
- if (hasFP(MF)) {
- // Calculate required stack adjustment
- uint64_t FrameSize = StackSize - 2;
- NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize();
-
- // Get the offset of the stack slot for the EBP register... which is
- // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
- // Update the frame offset adjustment.
- MFI->setOffsetAdjustment(-NumBytes);
-
- // Save FPW into the appropriate stack slot...
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
- .addReg(MSP430::FPW, RegState::Kill);
-
- // Update FPW with the new base value...
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
- .addReg(MSP430::SPW);
-
- // Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
- I != E; ++I)
- I->addLiveIn(MSP430::FPW);
-
- } else
- NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
-
- // Skip the callee-saved push instructions.
- while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r))
- ++MBBI;
-
- if (MBBI != MBB.end())
- DL = MBBI->getDebugLoc();
-
- if (NumBytes) { // adjust stack pointer: SPW -= numbytes
- // If there is an SUB16ri of SPW immediately before this instruction, merge
- // the two.
- //NumBytes -= mergeSPUpdates(MBB, MBBI, true);
- // If there is an ADD16ri or SUB16ri of SPW immediately after this
- // instruction, merge the two instructions.
- // mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
-
- if (NumBytes) {
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(NumBytes);
- // The SRW implicit def is dead.
- MI->getOperand(3).setIsDead();
- }
- }
-}
-
-void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- unsigned RetOpcode = MBBI->getOpcode();
- DebugLoc DL = MBBI->getDebugLoc();
-
- switch (RetOpcode) {
- case MSP430::RET:
- case MSP430::RETI: break; // These are ok
- default:
- llvm_unreachable("Can only insert epilog into returning blocks");
- }
-
- // Get the number of bytes to allocate from the FrameInfo
- uint64_t StackSize = MFI->getStackSize();
- unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
- uint64_t NumBytes = 0;
-
- if (hasFP(MF)) {
- // Calculate required stack adjustment
- uint64_t FrameSize = StackSize - 2;
- NumBytes = FrameSize - CSSize;
-
- // pop FPW.
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
- } else
- NumBytes = StackSize - CSSize;
-
- // Skip the callee-saved pop instructions.
- while (MBBI != MBB.begin()) {
- MachineBasicBlock::iterator PI = prior(MBBI);
- unsigned Opc = PI->getOpcode();
- if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
- break;
- --MBBI;
- }
-
- DL = MBBI->getDebugLoc();
-
- // If there is an ADD16ri or SUB16ri of SPW immediately before this
- // instruction, merge the two instructions.
- //if (NumBytes || MFI->hasVarSizedObjects())
- // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
-
- if (MFI->hasVarSizedObjects()) {
- BuildMI(MBB, MBBI, DL,
- TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW);
- if (CSSize) {
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL,
- TII.get(MSP430::SUB16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(CSSize);
- // The SRW implicit def is dead.
- MI->getOperand(3).setIsDead();
- }
- } else {
- // adjust stack pointer back: SPW += numbytes
- if (NumBytes) {
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
- .addReg(MSP430::SPW).addImm(NumBytes);
- // The SRW implicit def is dead.
- MI->getOperand(3).setIsDead();
- }
- }
-}
-
unsigned MSP430RegisterInfo::getRARegister() const {
return MSP430::PCW;
}
unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return hasFP(MF) ? MSP430::FPW : MSP430::SPW;
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW;
}
int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
index 4d2795b..56744fa 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -39,9 +39,6 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const;
const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
- bool hasFP(const MachineFunction &MF) const;
- bool hasReservedCallFrame(const MachineFunction &MF) const;
-
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
@@ -49,9 +46,6 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
// Debug information queries.
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
index f8aec66..ab7b59b 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -79,10 +79,10 @@ def GR8 : RegisterClass<"MSP430", [i8], 8,
GR8Class::iterator
GR8Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
// Depending on whether the function uses frame pointer or not, last 5 or 4
// registers on the list above are reserved
- if (RI->hasFP(MF))
+ if (TFI->hasFP(MF))
return end()-5;
else
return end()-4;
@@ -106,10 +106,10 @@ def GR16 : RegisterClass<"MSP430", [i16], 16,
GR16Class::iterator
GR16Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
// Depending on whether the function uses frame pointer or not, last 5 or 4
// registers on the list above are reserved
- if (RI->hasFP(MF))
+ if (TFI->hasFP(MF))
return end()-5;
else
return end()-4;
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index 99877c8..fba9536 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -28,13 +28,13 @@ extern "C" void LLVMInitializeMSP430Target() {
MSP430TargetMachine::MSP430TargetMachine(const Target &T,
const std::string &TT,
- const std::string &FS) :
- LLVMTargetMachine(T, TT),
- Subtarget(TT, FS),
- // FIXME: Check TargetData string.
- DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { }
+ const std::string &FS)
+ : LLVMTargetMachine(T, TT),
+ Subtarget(TT, FS),
+ // FIXME: Check TargetData string.
+ DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+ FrameLowering(Subtarget) { }
bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
index b93edfd..cee3b04 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.h
@@ -17,11 +17,12 @@
#include "MSP430InstrInfo.h"
#include "MSP430ISelLowering.h"
+#include "MSP430FrameLowering.h"
#include "MSP430SelectionDAGInfo.h"
#include "MSP430RegisterInfo.h"
#include "MSP430Subtarget.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -34,16 +35,15 @@ class MSP430TargetMachine : public LLVMTargetMachine {
MSP430InstrInfo InstrInfo;
MSP430TargetLowering TLInfo;
MSP430SelectionDAGInfo TSInfo;
-
- // MSP430 does not have any call stack frame, therefore not having
- // any MSP430 specific FrameInfo class.
- TargetFrameInfo FrameInfo;
+ MSP430FrameLowering FrameLowering;
public:
MSP430TargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
- virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
virtual const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetData *getTargetData() const { return &DataLayout;}
virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; }
diff --git a/contrib/llvm/lib/Target/Mangler.cpp b/contrib/llvm/lib/Target/Mangler.cpp
index 49efe75..46c687b 100644
--- a/contrib/llvm/lib/Target/Mangler.cpp
+++ b/contrib/llvm/lib/Target/Mangler.cpp
@@ -224,16 +224,6 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
}
}
-/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
-/// and the specified global variable's name. If the global variable doesn't
-/// have a name, this fills in a unique name for the global.
-std::string Mangler::getNameWithPrefix(const GlobalValue *GV,
- bool isImplicitlyPrivate) {
- SmallString<64> Buf;
- getNameWithPrefix(Buf, GV, isImplicitlyPrivate);
- return std::string(Buf.begin(), Buf.end());
-}
-
/// getSymbol - Return the MCSymbol for the specified global value. This
/// symbol is the main label that is the address of the global.
MCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td
index a51c377..3e6437b 100644
--- a/contrib/llvm/lib/Target/Mips/Mips.td
+++ b/contrib/llvm/lib/Target/Mips/Mips.td
@@ -36,19 +36,15 @@ def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
"Support 64-bit FP registers.">;
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
"true", "Only supports single precision float">;
-def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
- "Mips1 ISA Support">;
-def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
- "Mips2 ISA Support">;
def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32",
"Enable o32 ABI">;
def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
"Enable eabi ABI">;
-def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
+def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
"true", "Enable vector FPU instructions.">;
-def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true",
+def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true",
"Enable 'signext in register' instructions.">;
-def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true",
+def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true",
"Enable 'conditional move' instructions.">;
def FeatureMulDivAdd : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true",
"Enable 'multiply add/sub' instructions.">;
@@ -58,6 +54,16 @@ def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true",
"Enable 'byte/half swap' instructions.">;
def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true",
"Enable 'count leading bits' instructions.">;
+def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
+ "Mips1 ISA Support">;
+def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
+ "Mips2 ISA Support">;
+def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
+ "Mips32 ISA Support",
+ [FeatureCondMov, FeatureBitCount]>;
+def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
+ "Mips32r2", "Mips32r2 ISA Support",
+ [FeatureMips32, FeatureSEInReg]>;
//===----------------------------------------------------------------------===//
// Mips processors supported.
@@ -73,10 +79,12 @@ def : Proc<"r3000", [FeatureMips1]>;
def : Proc<"mips2", [FeatureMips2]>;
def : Proc<"r6000", [FeatureMips2]>;
-// Allegrex is a 32bit subset of r4000, both for interger and fp registers,
-// but much more similar to Mips2 than Mips3. It also contains some of
-// Mips32/Mips32r2 instructions and a custom vector fpu processor.
-def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
+def : Proc<"4ke", [FeatureMips32r2]>;
+
+// Allegrex is a 32bit subset of r4000, both for interger and fp registers,
+// but much more similar to Mips2 than Mips3. It also contains some of
+// Mips32/Mips32r2 instructions and a custom vector fpu processor.
+def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd,
FeatureMinMax, FeatureSwap, FeatureBitCount]>;
diff --git a/contrib/llvm/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 6660f6b..bd28a9b 100644
--- a/contrib/llvm/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -125,9 +125,10 @@ namespace {
// Create a bitmask with all callee saved registers for CPU or Floating Point
// registers. For CPU registers consider RA, GP and FP for saving if necessary.
void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
- const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
const MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
-
+
// CPU and FPU Saved Registers Bitmasks
unsigned int CPUBitmask = 0;
unsigned int FPUBitmask = 0;
@@ -145,13 +146,15 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
}
// Return Address and Frame registers must also be set in CPUBitmask.
- if (RI.hasFP(*MF))
+ // FIXME: Do we really need hasFP() call here? When no FP is present SP is
+ // just returned -- will it be ok?
+ if (TFI->hasFP(*MF))
CPUBitmask |= (1 << MipsRegisterInfo::
- getRegisterNumbering(RI.getFrameRegister(*MF)));
-
- if (MFI->adjustsStack())
+ getRegisterNumbering(RI->getFrameRegister(*MF)));
+
+ if (MFI->adjustsStack())
CPUBitmask |= (1 << MipsRegisterInfo::
- getRegisterNumbering(RI.getRARegister()));
+ getRegisterNumbering(RI->getRARegister()));
// Print CPUBitmask
O << "\t.mask \t"; printHex32(CPUBitmask, O);
@@ -270,12 +273,16 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
switch(MO.getTargetFlags()) {
case MipsII::MO_GPREL: O << "%gp_rel("; break;
case MipsII::MO_GOT_CALL: O << "%call16("; break;
- case MipsII::MO_GOT:
- if (MI->getOpcode() == Mips::LW)
+ case MipsII::MO_GOT: {
+ const MachineOperand &LastMO = MI->getOperand(opNum-1);
+ bool LastMOIsGP = LastMO.getType() == MachineOperand::MO_Register
+ && LastMO.getReg() == Mips::GP;
+ if (MI->getOpcode() == Mips::LW || LastMOIsGP)
O << "%got(";
else
O << "%lo(";
break;
+ }
case MipsII::MO_ABS_HILO:
if (MI->getOpcode() == Mips::LUi)
O << "%hi(";
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 597ea0d..b44a0af 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -31,7 +31,7 @@ namespace {
const TargetInstrInfo *TII;
static char ID;
- Filler(TargetMachine &tm)
+ Filler(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
virtual const char *getPassName() const {
@@ -55,17 +55,22 @@ namespace {
/// Currently, we fill delay slots with NOPs. We assume there is only one
/// delay slot per delayed instruction.
bool Filler::
-runOnMachineBasicBlock(MachineBasicBlock &MBB)
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
{
bool Changed = false;
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- if (I->getDesc().hasDelaySlot()) {
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+ const TargetInstrDesc& Tid = I->getDesc();
+ if (Tid.hasDelaySlot() &&
+ (TM.getSubtarget<MipsSubtarget>().isMips1() ||
+ Tid.isCall() || Tid.isBranch() || Tid.isReturn())) {
MachineBasicBlock::iterator J = I;
++J;
BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP));
++FilledSlots;
Changed = true;
}
+ }
+
return Changed;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
new file mode 100644
index 0000000..87a097a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
@@ -0,0 +1,314 @@
+//=======- MipsFrameLowering.cpp - Mips Frame Information ------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsFrameLowering.h"
+#include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are done thought a positive offset
+// from the stack/frame pointer, so the stack is considering
+// to grow up! Otherwise terrible hacks would have to be made
+// to get this stack ABI compliant :)
+//
+// The stack frame required by the ABI (after call):
+// Offset
+//
+// 0 ----------
+// 4 Args to pass
+// . saved $GP (used in PIC)
+// . Alloca allocations
+// . Local Area
+// . CPU "Callee Saved" Registers
+// . saved FP
+// . saved RA
+// . FPU "Callee Saved" Registers
+// StackSize -----------
+//
+// Offset - offset from sp after stack allocation on function prologue
+//
+// The sp is the stack pointer subtracted/added from the stack size
+// at the Prologue/Epilogue
+//
+// References to the previous stack (to obtain arguments) are done
+// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1))
+//
+// Examples:
+// - reference to the actual stack frame
+// for any local area var there is smt like : FI >= 0, StackOffset: 4
+// sw REGX, 4(SP)
+//
+// - reference to previous stack frame
+// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16.
+// The emitted instruction will be something like:
+// lw REGX, 16+StackSize(SP)
+//
+// Since the total stack size is unknown on LowerFormalArguments, all
+// stack references (ObjectOffset) created to reference the function
+// arguments, are negative numbers. This way, on eliminateFrameIndex it's
+// possible to detect those references and the offsets are adjusted to
+// their real location.
+//
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+void MipsFrameLowering::adjustMipsStackFrame(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ unsigned StackAlign = getStackAlignment();
+ unsigned RegSize = STI.isGP32bit() ? 4 : 8;
+ bool HasGP = MipsFI->needGPSaveRestore();
+
+ // Min and Max CSI FrameIndex.
+ int MinCSFI = -1, MaxCSFI = -1;
+
+ // See the description at MipsMachineFunction.h
+ int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1;
+
+ // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+ // LowerFormalArguments. Leaving '0' for while is necessary to avoid the
+ // approach done by calculateFrameObjectOffsets to the stack frame.
+ MipsFI->adjustLoadArgsFI(MFI);
+ MipsFI->adjustStoreVarArgsFI(MFI);
+
+ // It happens that the default stack frame allocation order does not directly
+ // map to the convention used for mips. So we must fix it. We move the callee
+ // save register slots after the local variables area, as described in the
+ // stack frame above.
+ unsigned CalleeSavedAreaSize = 0;
+ if (!CSI.empty()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size()-1].getFrameIdx();
+ }
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+
+ unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize)
+ : (STI.isABI_O32() ? 16 : 0);
+
+ // Adjust local variables. They should come on the stack right
+ // after the arguments.
+ int LastOffsetFI = -1;
+ for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (i >= MinCSFI && i <= MaxCSFI)
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ unsigned Offset =
+ StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize;
+ if (LastOffsetFI == -1)
+ LastOffsetFI = i;
+ if (Offset > MFI->getObjectOffset(LastOffsetFI))
+ LastOffsetFI = i;
+ MFI->setObjectOffset(i, Offset);
+ }
+
+ // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
+ // be saved in this CPU Area. This whole area must be aligned to the
+ // default Stack Alignment requirements.
+ if (LastOffsetFI >= 0)
+ StackOffset = MFI->getObjectOffset(LastOffsetFI)+
+ MFI->getObjectSize(LastOffsetFI);
+ StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+ for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (!Mips::CPURegsRegisterClass->contains(Reg))
+ break;
+ MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
+ TopCPUSavedRegOff = StackOffset;
+ StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+ }
+
+ // Stack locations for FP and RA. If only one of them is used,
+ // the space must be allocated for both, otherwise no space at all.
+ if (hasFP(MF) || MFI->adjustsStack()) {
+ // FP stack location
+ MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
+ StackOffset);
+ MipsFI->setFPStackOffset(StackOffset);
+ TopCPUSavedRegOff = StackOffset;
+ StackOffset += RegSize;
+
+ // SP stack location
+ MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
+ StackOffset);
+ MipsFI->setRAStackOffset(StackOffset);
+ StackOffset += RegSize;
+
+ if (MFI->adjustsStack())
+ TopCPUSavedRegOff += RegSize;
+ }
+
+ StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+ // Adjust FPU Callee Saved Registers Area. This Area must be
+ // aligned to the default Stack Alignment requirements.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (Mips::CPURegsRegisterClass->contains(Reg))
+ continue;
+ MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
+ TopFPUSavedRegOff = StackOffset;
+ StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+ }
+ StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+ // Update frame info
+ MFI->setStackSize(StackOffset);
+
+ // Recalculate the final tops offset. The final values must be '0'
+ // if there isn't a callee saved register for CPU or FPU, otherwise
+ // a negative offset is needed.
+ if (TopCPUSavedRegOff >= 0)
+ MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);
+
+ if (TopFPUSavedRegOff >= 0)
+ MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset);
+}
+
+void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ const MipsRegisterInfo *RegInfo =
+ static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MipsInstrInfo &TII =
+ *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
+
+ // Get the right frame order for Mips.
+ adjustMipsStackFrame(MF);
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+ int FPOffset = MipsFI->getFPStackOffset();
+ int RAOffset = MipsFI->getRAStackOffset();
+
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
+
+ // TODO: check need from GP here.
+ if (isPIC && STI.isABI_O32())
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD))
+ .addReg(RegInfo->getPICCallReg());
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+
+ // Adjust stack : addi sp, sp, (-imm)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(-StackSize);
+
+ // Save the return address only if the function isnt a leaf one.
+ // sw $ra, stack_loc($sp)
+ if (MFI->adjustsStack()) {
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
+ .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
+ }
+
+ // if framepointer enabled, save it and set it
+ // to point to the stack pointer
+ if (hasFP(MF)) {
+ // sw $fp,stack_loc($sp)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
+ .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP);
+
+ // move $fp, $sp
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
+ .addReg(Mips::SP).addReg(Mips::ZERO);
+ }
+
+ // Restore GP from the saved stack location
+ if (MipsFI->needGPSaveRestore())
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
+ .addImm(MipsFI->getGPStackOffset());
+}
+
+void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ const MipsInstrInfo &TII =
+ *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ // Get the number of bytes from FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ // Get the FI's where RA and FP are saved.
+ int FPOffset = MipsFI->getFPStackOffset();
+ int RAOffset = MipsFI->getRAStackOffset();
+
+ // if framepointer enabled, restore it and restore the
+ // stack pointer
+ if (hasFP(MF)) {
+ // move $sp, $fp
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::SP)
+ .addReg(Mips::FP).addReg(Mips::ZERO);
+
+ // lw $fp,stack_loc($sp)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP)
+ .addImm(FPOffset).addReg(Mips::SP);
+ }
+
+ // Restore the return address only if the function isnt a leaf one.
+ // lw $ra, stack_loc($sp)
+ if (MFI->adjustsStack()) {
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA)
+ .addImm(RAOffset).addReg(Mips::SP);
+ }
+
+ // adjust stack : insert addi sp, sp, (imm)
+ if (NumBytes) {
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(NumBytes);
+ }
+}
+
+void MipsFrameLowering::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+ const MipsRegisterInfo *RegInfo =
+ static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ RegInfo->processFunctionBeforeFrameFinalized(MF);
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
new file mode 100644
index 0000000..a8426c1
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.h
@@ -0,0 +1,48 @@
+//==--- MipsFrameLowering.h - Define frame lowering for Mips --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MipsSubtarget;
+
+class MipsFrameLowering : public TargetFrameLowering {
+protected:
+ const MipsSubtarget &STI;
+
+public:
+ explicit MipsFrameLowering(const MipsSubtarget &sti)
+ // FIXME: Is this correct at all?
+ : TargetFrameLowering(StackGrowsUp, 8, 0), STI(sti) {
+ }
+
+ void adjustMipsStackFrame(MachineFunction &MF) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index a47cf7b..755e04d 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -84,8 +84,7 @@ private:
SDNode *Select(SDNode *N);
// Complex Pattern.
- bool SelectAddr(SDNode *Op, SDValue N,
- SDValue &Base, SDValue &Offset);
+ bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
SDNode *SelectLoadFp64(SDNode *N);
SDNode *SelectStoreFp64(SDNode *N);
@@ -110,8 +109,7 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
/// ComplexPattern used on MipsInstrInfo
/// Used on Mips Load/Store instructions
bool MipsDAGToDAGISel::
-SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base)
-{
+SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
// if Address is FI, get the TargetFrameIndex.
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -193,7 +191,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
SDValue N1 = N->getOperand(1);
SDValue Offset0, Offset1, Base;
- if (!SelectAddr(N, N1, Offset0, Base) ||
+ if (!SelectAddr(N1, Offset0, Base) ||
N1.getValueType() != MVT::i32)
return NULL;
@@ -257,7 +255,7 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
SDValue N2 = N->getOperand(2);
SDValue Offset0, Offset1, Base;
- if (!SelectAddr(N, N2, Offset0, Base) ||
+ if (!SelectAddr(N2, Offset0, Base) ||
N1.getValueType() != MVT::f64 ||
N2.getValueType() != MVT::i32)
return NULL;
@@ -327,7 +325,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
case ISD::SUBE:
case ISD::ADDE: {
SDValue InFlag = Node->getOperand(2), CmpLHS;
- unsigned Opc = InFlag.getOpcode(); Opc=Opc;
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
(Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
"(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
@@ -351,7 +349,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT,
SDValue(Carry,0), RHS);
- return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Flag,
+ return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
LHS, SDValue(AddCarry,0));
}
@@ -369,11 +367,11 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
else
Op = (Opcode == ISD::UDIVREM ? Mips::DIVu : Mips::DIV);
- SDNode *MulDiv = CurDAG->getMachineNode(Op, dl, MVT::Flag, Op1, Op2);
+ SDNode *MulDiv = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
SDValue InFlag = SDValue(MulDiv, 0);
SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32,
- MVT::Flag, InFlag);
+ MVT::Glue, InFlag);
InFlag = SDValue(Lo,1);
SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
@@ -388,6 +386,8 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
/// Special Muls
case ISD::MUL:
+ if (Subtarget.isMips32())
+ break;
case ISD::MULHS:
case ISD::MULHU: {
SDValue MulOp1 = Node->getOperand(0);
@@ -395,7 +395,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl,
- MVT::Flag, MulOp1, MulOp2);
+ MVT::Glue, MulOp1, MulOp2);
SDValue InFlag = SDValue(MulNode, 0);
@@ -421,7 +421,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
Op = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu);
MOp = Mips::MFHI;
}
- SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Flag, Op1, Op2);
+ SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
SDValue InFlag = SDValue(Node, 0);
return CurDAG->getMachineNode(MOp, dl, MVT::i32, InFlag);
@@ -474,7 +474,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
SDValue InFlag;
// Skip the incomming flag if present
- if (Node->getOperand(LastOpNum).getValueType() == MVT::Flag)
+ if (Node->getOperand(LastOpNum).getValueType() == MVT::Glue)
LastOpNum--;
if ( (isa<GlobalAddressSDNode>(Callee)) ||
@@ -496,7 +496,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Callee, InFlag);
// Map the JmpLink operands to JALR
- SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
Ops.push_back(CurDAG->getRegister(Mips::T9, MVT::i32));
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index b0b99ba..1d7a1c0 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -41,12 +41,15 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::Lo : return "MipsISD::Lo";
case MipsISD::GPRel : return "MipsISD::GPRel";
case MipsISD::Ret : return "MipsISD::Ret";
- case MipsISD::CMov : return "MipsISD::CMov";
case MipsISD::SelectCC : return "MipsISD::SelectCC";
case MipsISD::FPSelectCC : return "MipsISD::FPSelectCC";
case MipsISD::FPBrcond : return "MipsISD::FPBrcond";
case MipsISD::FPCmp : return "MipsISD::FPCmp";
case MipsISD::FPRound : return "MipsISD::FPRound";
+ case MipsISD::MAdd : return "MipsISD::MAdd";
+ case MipsISD::MAddu : return "MipsISD::MAddu";
+ case MipsISD::MSub : return "MipsISD::MSub";
+ case MipsISD::MSubu : return "MipsISD::MSubu";
default : return NULL;
}
}
@@ -57,7 +60,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
Subtarget = &TM.getSubtarget<MipsSubtarget>();
// Mips does not have i1 type, so use i32 for
- // setcc operations results (slt, sgt, ...).
+ // setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
// Set up the register classes
@@ -69,7 +72,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
if (!Subtarget->isFP64bit())
addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
- // Load extented operations for i1 types must be promoted
+ // Load extented operations for i1 types must be promoted
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
@@ -78,9 +81,9 @@ MipsTargetLowering(MipsTargetMachine &TM)
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
- // Used by legalize types to correctly generate the setcc result.
- // Without this, every float setcc comes with a AND/OR with the result,
- // we don't want this, since the fpcmp result goes to a flag register,
+ // Used by legalize types to correctly generate the setcc result.
+ // Without this, every float setcc comes with a AND/OR with the result,
+ // we don't want this, since the fpcmp result goes to a flag register,
// which is used implicitly by brcond and select operations.
AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
@@ -100,8 +103,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::VASTART, MVT::Other, Custom);
- // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors'
- // with operands comming from setcc fp comparions. This is necessary since
+ // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors'
+ // with operands comming from setcc fp comparions. This is necessary since
// the result from these setcc are in a flag registers (FCR31).
setOperationAction(ISD::AND, MVT::i32, Custom);
setOperationAction(ISD::OR, MVT::i32, Custom);
@@ -116,7 +119,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
- setOperationAction(ISD::ROTR, MVT::i32, Expand);
+
+ if (!Subtarget->isMips32r2())
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
@@ -152,6 +158,9 @@ MipsTargetLowering(MipsTargetMachine &TM)
if (!Subtarget->hasSwap())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setTargetDAGCombine(ISD::ADDE);
+ setTargetDAGCombine(ISD::SUBE);
+
setStackPointerRegisterToSaveRestore(Mips::SP);
computeRegisterProperties();
}
@@ -165,10 +174,198 @@ unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const {
return 2;
}
+// SelectMadd -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if mattern matching was successful.
+static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
+ // ADDENode's second operand must be a flag output of an ADDC node in order
+ // for the matching to be successful.
+ SDNode* ADDCNode = ADDENode->getOperand(2).getNode();
+
+ if (ADDCNode->getOpcode() != ISD::ADDC)
+ return false;
+
+ SDValue MultHi = ADDENode->getOperand(0);
+ SDValue MultLo = ADDCNode->getOperand(0);
+ SDNode* MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than ADDENode or ADDCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MADD instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ SDValue Chain = CurDAG->getEntryNode();
+ DebugLoc dl = ADDENode->getDebugLoc();
+
+ // create MipsMAdd(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+ SDValue MAdd = CurDAG->getNode(MultOpc, dl,
+ MVT::Glue,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ ADDCNode->getOperand(1),// Lo0
+ ADDENode->getOperand(1));// Hi0
+
+ // create CopyFromReg nodes
+ SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+ MAdd);
+ SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+ Mips::HI, MVT::i32,
+ CopyFromLo.getValue(2));
+
+ // replace uses of adde and addc here
+ if (!SDValue(ADDCNode, 0).use_empty())
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo);
+
+ if (!SDValue(ADDENode, 0).use_empty())
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi);
+
+ return true;
+}
+
+// SelectMsub -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+// (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+// multHi/Lo: product of multiplication
+// Lo0: initial value of Lo register
+// Hi0: initial value of Hi register
+// Return true if mattern matching was successful.
+static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
+ // SUBENode's second operand must be a flag output of an SUBC node in order
+ // for the matching to be successful.
+ SDNode* SUBCNode = SUBENode->getOperand(2).getNode();
+
+ if (SUBCNode->getOpcode() != ISD::SUBC)
+ return false;
+
+ SDValue MultHi = SUBENode->getOperand(1);
+ SDValue MultLo = SUBCNode->getOperand(1);
+ SDNode* MultNode = MultHi.getNode();
+ unsigned MultOpc = MultHi.getOpcode();
+
+ // MultHi and MultLo must be generated by the same node,
+ if (MultLo.getNode() != MultNode)
+ return false;
+
+ // and it must be a multiplication.
+ if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+ return false;
+
+ // MultLo amd MultHi must be the first and second output of MultNode
+ // respectively.
+ if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+ return false;
+
+ // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+ // of the values of MultNode, in which case MultNode will be removed in later
+ // phases.
+ // If there exist users other than SUBENode or SUBCNode, this function returns
+ // here, which will result in MultNode being mapped to a single MULT
+ // instruction node rather than a pair of MULT and MSUB instructions being
+ // produced.
+ if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+ return false;
+
+ SDValue Chain = CurDAG->getEntryNode();
+ DebugLoc dl = SUBENode->getDebugLoc();
+
+ // create MipsSub(u) node
+ MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+ SDValue MSub = CurDAG->getNode(MultOpc, dl,
+ MVT::Glue,
+ MultNode->getOperand(0),// Factor 0
+ MultNode->getOperand(1),// Factor 1
+ SUBCNode->getOperand(0),// Lo0
+ SUBENode->getOperand(0));// Hi0
+
+ // create CopyFromReg nodes
+ SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+ MSub);
+ SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+ Mips::HI, MVT::i32,
+ CopyFromLo.getValue(2));
+
+ // replace uses of sube and subc here
+ if (!SDValue(SUBCNode, 0).use_empty())
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo);
+
+ if (!SDValue(SUBENode, 0).use_empty())
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi);
+
+ return true;
+}
+
+static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget->isMips32() && SelectMadd(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ if (Subtarget->isMips32() && SelectMsub(N, &DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
+ const {
+ SelectionDAG &DAG = DCI.DAG;
+ unsigned opc = N->getOpcode();
+
+ switch (opc) {
+ default: break;
+ case ISD::ADDE:
+ return PerformADDECombine(N, DAG, DCI, Subtarget);
+ case ISD::SUBE:
+ return PerformSUBECombine(N, DAG, DCI, Subtarget);
+ }
+
+ return SDValue();
+}
+
SDValue MipsTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
- switch (Op.getOpcode())
+ switch (Op.getOpcode())
{
case ISD::AND: return LowerANDOR(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
@@ -194,7 +391,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
// MachineFunction as a live in value. It also creates a corresponding
// virtual register for it.
static unsigned
-AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
+AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
{
assert(RC->contains(PReg) && "Not the correct regclass!");
unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
@@ -212,7 +409,7 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
return Mips::BRANCH_INVALID;
}
-
+
static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) {
switch(BC) {
default:
@@ -227,24 +424,24 @@ static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) {
static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Unknown fp condition code!");
- case ISD::SETEQ:
+ case ISD::SETEQ:
case ISD::SETOEQ: return Mips::FCOND_EQ;
case ISD::SETUNE: return Mips::FCOND_OGL;
- case ISD::SETLT:
+ case ISD::SETLT:
case ISD::SETOLT: return Mips::FCOND_OLT;
- case ISD::SETGT:
+ case ISD::SETGT:
case ISD::SETOGT: return Mips::FCOND_OGT;
- case ISD::SETLE:
- case ISD::SETOLE: return Mips::FCOND_OLE;
+ case ISD::SETLE:
+ case ISD::SETOLE: return Mips::FCOND_OLE;
case ISD::SETGE:
case ISD::SETOGE: return Mips::FCOND_OGE;
case ISD::SETULT: return Mips::FCOND_ULT;
- case ISD::SETULE: return Mips::FCOND_ULE;
+ case ISD::SETULE: return Mips::FCOND_ULE;
case ISD::SETUGT: return Mips::FCOND_UGT;
case ISD::SETUGE: return Mips::FCOND_UGE;
- case ISD::SETUO: return Mips::FCOND_UN;
+ case ISD::SETUO: return Mips::FCOND_UN;
case ISD::SETO: return Mips::FCOND_OR;
- case ISD::SETNE:
+ case ISD::SETNE:
case ISD::SETONE: return Mips::FCOND_NEQ;
case ISD::SETUEQ: return Mips::FCOND_UEQ;
}
@@ -364,7 +561,7 @@ LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
// Emit the round instruction and bit convert to integer
SDValue Trunc = DAG.getNode(MipsISD::FPRound, dl, MVT::f32,
Src, CondReg.getValue(1));
- SDValue BitCvt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Trunc);
+ SDValue BitCvt = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Trunc);
return BitCvt;
}
@@ -382,11 +579,11 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
// obtain the new stack size.
SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
- // The Sub result contains the new stack start address, so it
+ // The Sub result contains the new stack start address, so it
// must be placed in the stack pointer register.
Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub);
-
- // This node always has two return values: a new stack pointer
+
+ // This node always has two return values: a new stack pointer
// value and a chain
SDValue Ops[2] = { Sub, Chain };
return DAG.getMergeValues(Ops, 2, dl);
@@ -405,9 +602,9 @@ LowerANDOR(SDValue Op, SelectionDAG &DAG) const
SDValue True = DAG.getConstant(1, MVT::i32);
SDValue False = DAG.getConstant(0, MVT::i32);
- SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
+ SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
LHS, True, False, LHS.getOperand(2));
- SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
+ SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
RHS, True, False, RHS.getOperand(2));
return DAG.getNode(Op.getOpcode(), dl, MVT::i32, LSEL, RSEL);
@@ -416,7 +613,7 @@ LowerANDOR(SDValue Op, SelectionDAG &DAG) const
SDValue MipsTargetLowering::
LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
{
- // The first operand is the chain, the second is the condition, the third is
+ // The first operand is the chain, the second is the condition, the third is
// the block to branch to if the condition is true.
SDValue Chain = Op.getOperand(0);
SDValue Dest = Op.getOperand(2);
@@ -424,55 +621,55 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
if (Op.getOperand(1).getOpcode() != MipsISD::FPCmp)
return Op;
-
+
SDValue CondRes = Op.getOperand(1);
SDValue CCNode = CondRes.getOperand(2);
Mips::CondCode CC =
(Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
- SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
+ SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
- return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
+ return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
Dest, CondRes);
}
SDValue MipsTargetLowering::
LowerSETCC(SDValue Op, SelectionDAG &DAG) const
{
- // The operands to this are the left and right operands to compare (ops #0,
- // and #1) and the condition code to compare them with (op #2) as a
+ // The operands to this are the left and right operands to compare (ops #0,
+ // and #1) and the condition code to compare them with (op #2) as a
// CondCodeSDNode.
- SDValue LHS = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
DebugLoc dl = Op.getDebugLoc();
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-
- return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS,
+
+ return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS,
DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
}
SDValue MipsTargetLowering::
LowerSELECT(SDValue Op, SelectionDAG &DAG) const
{
- SDValue Cond = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(0);
SDValue True = Op.getOperand(1);
SDValue False = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
- // if the incomming condition comes from a integer compare, the select
- // operation must be SelectCC or a conditional move if the subtarget
+ // if the incomming condition comes from a integer compare, the select
+ // operation must be SelectCC or a conditional move if the subtarget
// supports it.
if (Cond.getOpcode() != MipsISD::FPCmp) {
if (Subtarget->hasCondMov() && !True.getValueType().isFloatingPoint())
return Op;
- return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(),
+ return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(),
Cond, True, False);
}
// if the incomming condition comes from fpcmp, the select
// operation must use FPSelectCC.
SDValue CCNode = Cond.getOperand(2);
- return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
+ return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
Cond, True, False, CCNode);
}
@@ -484,16 +681,16 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
SDVTList VTs = DAG.getVTList(MVT::i32);
-
+
MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering();
-
+
// %gp_rel relocation
- if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
MipsII::MO_GPREL);
SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1);
SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
- return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
}
// %hi/%lo relocation
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
@@ -505,8 +702,8 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
} else {
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
MipsII::MO_GOT);
- SDValue ResNode = DAG.getLoad(MVT::i32, dl,
- DAG.getEntryNode(), GA, NULL, 0,
+ SDValue ResNode = DAG.getLoad(MVT::i32, dl,
+ DAG.getEntryNode(), GA, MachinePointerInfo(),
false, false, 0);
// On functions and global targets not internal linked only
// a load from got/GP is necessary for PIC to work.
@@ -531,7 +728,7 @@ SDValue MipsTargetLowering::
LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
{
SDValue ResNode;
- SDValue HiPart;
+ SDValue HiPart;
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
@@ -546,7 +743,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
SDValue Ops[] = { JTI };
HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
} else // Emit Load from Global Pointer
- HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0,
+ HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI,
+ MachinePointerInfo(),
false, false, 0);
SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI);
@@ -565,26 +763,27 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
DebugLoc dl = Op.getDebugLoc();
// gp_rel relocation
- // FIXME: we should reference the constant pool using small data sections,
+ // FIXME: we should reference the constant pool using small data sections,
// but the asm printer currently doens't support this feature without
- // hacking it. This feature should come soon so we can uncomment the
+ // hacking it. This feature should come soon so we can uncomment the
// stuff below.
//if (IsInSmallSection(C->getType())) {
// SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
// SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
- // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
+ // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
- SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
N->getOffset(), MipsII::MO_ABS_HILO);
SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP);
SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
} else {
- SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
N->getOffset(), MipsII::MO_GOT);
- SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
- CP, NULL, 0, false, false, 0);
+ SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
+ CP, MachinePointerInfo::getConstantPool(),
+ false, false, 0);
SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
}
@@ -603,7 +802,8 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0,
+ return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+ MachinePointerInfo(SV),
false, false, 0);
}
@@ -614,23 +814,23 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
#include "MipsGenCallingConv.inc"
//===----------------------------------------------------------------------===//
-// TODO: Implement a generic logic using tblgen that can support this.
+// TODO: Implement a generic logic using tblgen that can support this.
// Mips O32 ABI rules:
// ---
// i32 - Passed in A0, A1, A2, A3 and stack
-// f32 - Only passed in f32 registers if no int reg has been used yet to hold
+// f32 - Only passed in f32 registers if no int reg has been used yet to hold
// an argument. Otherwise, passed in A1, A2, A3 and stack.
-// f64 - Only passed in two aliased f32 registers if no int reg has been used
-// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
+// f64 - Only passed in two aliased f32 registers if no int reg has been used
+// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
// not used, it must be shadowed. If only A3 is avaiable, shadow it and
// go to stack.
//===----------------------------------------------------------------------===//
-static bool CC_MipsO32(unsigned ValNo, EVT ValVT,
- EVT LocVT, CCValAssign::LocInfo LocInfo,
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
- static const unsigned IntRegsSize=4, FloatRegsSize=2;
+ static const unsigned IntRegsSize=4, FloatRegsSize=2;
static const unsigned IntRegs[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
@@ -642,9 +842,15 @@ static bool CC_MipsO32(unsigned ValNo, EVT ValVT,
Mips::D6, Mips::D7
};
- unsigned Reg=0;
- unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize);
- bool IntRegUsed = (IntRegs[UnallocIntReg] != (unsigned (Mips::A0)));
+ unsigned Reg = 0;
+ static bool IntRegUsed = false;
+
+ // This must be the first arg of the call if no regs have been allocated.
+ // Initialize IntRegUsed in that case.
+ if (IntRegs[State.getFirstUnallocated(IntRegs, IntRegsSize)] == Mips::A0 &&
+ F32Regs[State.getFirstUnallocated(F32Regs, FloatRegsSize)] == Mips::F12 &&
+ F64Regs[State.getFirstUnallocated(F64Regs, FloatRegsSize)] == Mips::D6)
+ IntRegUsed = false;
// Promote i8 and i16
if (LocVT == MVT::i8 || LocVT == MVT::i16) {
@@ -657,30 +863,48 @@ static bool CC_MipsO32(unsigned ValNo, EVT ValVT,
LocInfo = CCValAssign::AExt;
}
- if (ValVT == MVT::i32 || (ValVT == MVT::f32 && IntRegUsed)) {
+ if (ValVT == MVT::i32) {
Reg = State.AllocateReg(IntRegs, IntRegsSize);
IntRegUsed = true;
- LocVT = MVT::i32;
- }
-
- if (ValVT.isFloatingPoint() && !IntRegUsed) {
- if (ValVT == MVT::f32)
- Reg = State.AllocateReg(F32Regs, FloatRegsSize);
- else
- Reg = State.AllocateReg(F64Regs, FloatRegsSize);
- }
+ } else if (ValVT == MVT::f32) {
+ // An int reg has to be marked allocated regardless of whether or not
+ // IntRegUsed is true.
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
- if (ValVT == MVT::f64 && IntRegUsed) {
- if (UnallocIntReg != IntRegsSize) {
- // If we hit register A3 as the first not allocated, we must
- // mark it as allocated (shadow) and use the stack instead.
- if (IntRegs[UnallocIntReg] != (unsigned (Mips::A3)))
- Reg = Mips::A2;
- for (;UnallocIntReg < IntRegsSize; ++UnallocIntReg)
- State.AllocateReg(UnallocIntReg);
- }
- LocVT = MVT::i32;
- }
+ if (IntRegUsed) {
+ if (Reg) // Int reg is available
+ LocVT = MVT::i32;
+ } else {
+ unsigned FReg = State.AllocateReg(F32Regs, FloatRegsSize);
+ if (FReg) // F32 reg is available
+ Reg = FReg;
+ else if (Reg) // No F32 regs are available, but an int reg is available.
+ LocVT = MVT::i32;
+ }
+ } else if (ValVT == MVT::f64) {
+ // Int regs have to be marked allocated regardless of whether or not
+ // IntRegUsed is true.
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ if (Reg == Mips::A1)
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ else if (Reg == Mips::A3)
+ Reg = 0;
+ State.AllocateReg(IntRegs, IntRegsSize);
+
+ // At this point, Reg is A0, A2 or 0, and all the unavailable integer regs
+ // are marked as allocated.
+ if (IntRegUsed) {
+ if (Reg)// if int reg is available
+ LocVT = MVT::i32;
+ } else {
+ unsigned FReg = State.AllocateReg(F64Regs, FloatRegsSize);
+ if (FReg) // F64 reg is available.
+ Reg = FReg;
+ else if (Reg) // No F64 regs are available, but an int reg is available.
+ LocVT = MVT::i32;
+ }
+ } else
+ assert(false && "cannot handle this ValVT");
if (!Reg) {
unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
@@ -692,8 +916,8 @@ static bool CC_MipsO32(unsigned ValNo, EVT ValVT,
return false; // CC must always match
}
-static bool CC_MipsO32_VarArgs(unsigned ValNo, EVT ValVT,
- EVT LocVT, CCValAssign::LocInfo LocInfo,
+static bool CC_MipsO32_VarArgs(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
static const unsigned IntRegsSize=4;
@@ -736,7 +960,7 @@ static bool CC_MipsO32_VarArgs(unsigned ValNo, EVT ValVT,
IntRegs[UnallocIntReg] == (unsigned (Mips::A2))) {
unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize);
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
- // Shadow the next register so it can be used
+ // Shadow the next register so it can be used
// later to get the other 32bit part.
State.AllocateReg(IntRegs, IntRegsSize);
return false;
@@ -786,13 +1010,13 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// To meet O32 ABI, Mips must always allocate 16 bytes on
// the stack (even if less than 4 are used as arguments)
if (Subtarget->isABI_O32()) {
- int VTsize = EVT(MVT::i32).getSizeInBits()/8;
+ int VTsize = MVT(MVT::i32).getSizeInBits()/8;
MFI->CreateFixedObject(VTsize, (VTsize*3), true);
- CCInfo.AnalyzeCallOperands(Outs,
+ CCInfo.AnalyzeCallOperands(Outs,
isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32);
} else
CCInfo.AnalyzeCallOperands(Outs, CC_Mips);
-
+
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
@@ -801,7 +1025,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
- // First/LastArgStackLoc contains the first/last
+ // First/LastArgStackLoc contains the first/last
// "at stack" argument location.
int LastArgStackLoc = 0;
unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16);
@@ -814,12 +1038,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full:
+ case CCValAssign::Full:
if (Subtarget->isABI_O32() && VA.isRegLoc()) {
if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Arg);
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
DAG.getConstant(0, getPointerTy()));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
@@ -827,7 +1051,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi));
continue;
- }
+ }
}
break;
case CCValAssign::SExt:
@@ -840,17 +1064,17 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
break;
}
-
- // Arguments that can be passed on register must be kept at
+
+ // Arguments that can be passed on register must be kept at
// RegsToPass vector
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
continue;
}
-
+
// Register can't get to this point...
assert(VA.isMemLoc());
-
+
// Create the frame index object for this incoming parameter
// This guarantees that when allocating Local Area the firsts
// 16 bytes which are alwayes reserved won't be overwritten
@@ -861,50 +1085,51 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
- // emit ISD::STORE whichs stores the
+ // emit ISD::STORE whichs stores the
// parameter value to a stack Location
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
false, false, 0));
}
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Build a sequence of copy-to-reg nodes chained together with token
+ // Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
// The InFlag in necessary since all emited instructions must be
// stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
- // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
- // node so that legalize doesn't hack it.
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
getPointerTy(), 0, OpFlag);
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
getPointerTy(), OpFlag);
// MipsJmpLink = #chain, #target_address, #opt_in_flags...
- // = Chain, Callee, Reg#1, Reg#2, ...
+ // = Chain, Callee, Reg#1, Reg#2, ...
//
// Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
- // Add argument registers to the end of the list so that they are
+ // Add argument registers to the end of the list so that they are
// known live into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
@@ -916,17 +1141,17 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
- // Create a stack location to hold GP when PIC is used. This stack
- // location is used on function prologue to save GP and also after all
- // emited CALL's to restore GP.
+ // Create a stack location to hold GP when PIC is used. This stack
+ // location is used on function prologue to save GP and also after all
+ // emited CALL's to restore GP.
if (IsPIC) {
- // Function can have an arbitrary number of calls, so
+ // Function can have an arbitrary number of calls, so
// hold the LastArgStackLoc with the biggest offset.
int FI;
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
if (LastArgStackLoc >= MipsFI->getGPStackOffset()) {
LastArgStackLoc = (!LastArgStackLoc) ? (16) : (LastArgStackLoc+4);
- // Create the frame index only once. SPOffset here can be anything
+ // Create the frame index only once. SPOffset here can be anything
// (this will be fixed on processFunctionBeforeFrameFinalized)
if (MipsFI->getGPStackOffset() == -1) {
FI = MFI->CreateFixedObject(4, 0, true);
@@ -937,14 +1162,15 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Reload GP value.
FI = MipsFI->getGPFI();
- SDValue FIN = DAG.getFrameIndex(FI,getPointerTy());
- SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0,
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0);
Chain = GPLoad.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32),
+ Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32),
GPLoad, SDValue(0,0));
InFlag = Chain.getValue(1);
- }
+ }
// Create the CALLSEQ_END node.
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
@@ -988,7 +1214,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// Formal Arguments Calling Convention Implementation
//===----------------------------------------------------------------------===//
-/// LowerFormalArguments - transform physical registers into virtual registers
+/// LowerFormalArguments - transform physical registers into virtual registers
/// and generate load operations for arguments places on the stack.
SDValue
MipsTargetLowering::LowerFormalArguments(SDValue Chain,
@@ -1018,7 +1244,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
ArgLocs, *DAG.getContext());
if (Subtarget->isABI_O32())
- CCInfo.AnalyzeFormalArguments(Ins,
+ CCInfo.AnalyzeFormalArguments(Ins,
isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32);
else
CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
@@ -1037,22 +1263,22 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
TargetRegisterClass *RC = 0;
if (RegVT == MVT::i32)
- RC = Mips::CPURegsRegisterClass;
- else if (RegVT == MVT::f32)
+ RC = Mips::CPURegsRegisterClass;
+ else if (RegVT == MVT::f32)
RC = Mips::FGR32RegisterClass;
else if (RegVT == MVT::f64) {
- if (!Subtarget->isSingleFloat())
+ if (!Subtarget->isSingleFloat())
RC = Mips::AFGR64RegisterClass;
- } else
+ } else
llvm_unreachable("RegVT not supported by FormalArguments Lowering");
- // Transform the arguments stored on
+ // Transform the arguments stored on
// physical registers into virtual ones
unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
-
- // If this is an 8 or 16-bit value, it has been passed promoted
- // to 32 bits. Insert an assert[sz]ext to capture this, then
+
+ // If this is an 8 or 16-bit value, it has been passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
// truncate to the right size.
if (VA.getLocInfo() != CCValAssign::Full) {
unsigned Opcode = 0;
@@ -1061,22 +1287,21 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
else if (VA.getLocInfo() == CCValAssign::ZExt)
Opcode = ISD::AssertZext;
if (Opcode)
- ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+ ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
}
- // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64
+ // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64
if (Subtarget->isABI_O32()) {
- if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32)
- ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue);
+ if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32)
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue);
if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
- unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
+ unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
VA.getLocReg()+1, RC);
SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
- SDValue Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue);
- SDValue Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue2);
- ArgValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::f64, Lo, Hi);
+ SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, ArgValue2, ArgValue);
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Pair);
}
}
@@ -1088,13 +1313,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// The last argument is not a register anymore
ArgRegEnd = 0;
-
- // The stack pointer offset is relative to the caller stack frame.
- // Since the real stack size is unknown here, a negative SPOffset
+
+ // The stack pointer offset is relative to the caller stack frame.
+ // Since the real stack size is unknown here, a negative SPOffset
// is used so there's a way to adjust these offsets when the stack
- // size get known (on EliminateFrameIndex). A dummy SPOffset is
+ // size get known (on EliminateFrameIndex). A dummy SPOffset is
// used instead of a direct negative address (which is recorded to
- // be used on emitPrologue) to avoid mis-calc of the first stack
+ // be used on emitPrologue) to avoid mis-calc of the first stack
// offset on PEI::calculateFrameObjectOffsets.
// Arguments are always 32-bit.
unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
@@ -1104,7 +1329,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
@@ -1124,11 +1350,11 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// To meet ABI, when VARARGS are passed on registers, the registers
// must have their values written to the caller stack frame. If the last
- // argument was placed in the stack, there's no need to save any register.
+ // argument was placed in the stack, there's no need to save any register.
if ((isVarArg) && (Subtarget->isABI_O32() && ArgRegEnd)) {
if (StackPtr.getNode() == 0)
StackPtr = DAG.getRegister(StackReg, getPointerTy());
-
+
// The last register argument that must be saved is Mips::A3
TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
unsigned StackLoc = ArgLocs.size()-1;
@@ -1140,7 +1366,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
int FI = MFI->CreateFixedObject(4, 0, true);
MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
- OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+ OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+ MachinePointerInfo(),
false, false, 0));
// Record the frame index of the first variable argument
@@ -1150,7 +1377,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
}
}
- // All stores are grouped in one node to allow the matching between
+ // All stores are grouped in one node to allow the matching between
// the size of Ins and InVals. This only happens when on varg functions
if (!OutChains.empty()) {
OutChains.push_back(Chain);
@@ -1183,7 +1410,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
- // If this is the first return lowered for this function, add
+ // If this is the first return lowered for this function, add
// the regs to the liveout set for the function.
if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
for (unsigned i = 0; i != RVLocs.size(); ++i)
@@ -1198,7 +1425,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
OutVals[i], Flag);
// guarantee that all emitted copies are
@@ -1215,7 +1442,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
unsigned Reg = MipsFI->getSRetReturnReg();
- if (!Reg)
+ if (!Reg)
llvm_unreachable("sret virtual register not created in the entry block");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
@@ -1225,10 +1452,10 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
// Return on Mips is always a "jr $ra"
if (Flag.getNode())
- return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag);
else // Return Void
- return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
Chain, DAG.getRegister(Mips::RA, MVT::i32));
}
@@ -1239,21 +1466,21 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
MipsTargetLowering::ConstraintType MipsTargetLowering::
-getConstraintType(const std::string &Constraint) const
+getConstraintType(const std::string &Constraint) const
{
- // Mips specific constrainy
+ // Mips specific constrainy
// GCC config/mips/constraints.md
//
- // 'd' : An address register. Equivalent to r
- // unless generating MIPS16 code.
- // 'y' : Equivalent to r; retained for
- // backwards compatibility.
- // 'f' : Floating Point registers.
+ // 'd' : An address register. Equivalent to r
+ // unless generating MIPS16 code.
+ // 'y' : Equivalent to r; retained for
+ // backwards compatibility.
+ // 'f' : Floating Point registers.
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default : break;
- case 'd':
- case 'y':
+ case 'd':
+ case 'y':
case 'f':
return C_RegisterClass;
break;
@@ -1262,6 +1489,37 @@ getConstraintType(const std::string &Constraint) const
return TargetLowering::getConstraintType(Constraint);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MipsTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'd':
+ case 'y':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
/// return a list of registers that can be used to satisfy the constraint.
/// This should only be used for C_RegisterClass constraints.
@@ -1275,7 +1533,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
case 'f':
if (VT == MVT::f32)
return std::make_pair(0U, Mips::FGR32RegisterClass);
- if (VT == MVT::f64)
+ if (VT == MVT::f64)
if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
return std::make_pair(0U, Mips::AFGR64RegisterClass);
}
@@ -1293,15 +1551,15 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
if (Constraint.size() != 1)
return std::vector<unsigned>();
- switch (Constraint[0]) {
+ switch (Constraint[0]) {
default : break;
case 'r':
// GCC Mips Constraint Letters
- case 'd':
- case 'y':
- return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3,
- Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1,
- Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7,
+ case 'd':
+ case 'y':
+ return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3,
+ Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1,
+ Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7,
Mips::T8, 0);
case 'f':
@@ -1313,15 +1571,15 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29,
Mips::F30, Mips::F31, 0);
else
- return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8,
- Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26,
+ return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8,
+ Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26,
Mips::F28, Mips::F30, 0);
}
- if (VT == MVT::f64)
+ if (VT == MVT::f64)
if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
- return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4,
- Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13,
+ return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4,
+ Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13,
Mips::D14, Mips::D15, 0);
}
return std::vector<unsigned>();
@@ -1336,5 +1594,7 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
if (VT != MVT::f32 && VT != MVT::f64)
return false;
+ if (Imm.isNegZero())
+ return false;
return Imm.isZero();
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index 460747b..9d6b9f3 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -40,9 +40,6 @@ namespace llvm {
// Handle gp_rel (small data/bss sections) relocation.
GPRel,
- // Conditional Move
- CMov,
-
// Select CC Pseudo Instruction
SelectCC,
@@ -59,7 +56,13 @@ namespace llvm {
FPRound,
// Return
- Ret
+ Ret,
+
+ // MAdd/Sub nodes
+ MAdd,
+ MAddu,
+ MSub,
+ MSubu
};
}
@@ -83,6 +86,8 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
private:
// Subtarget Info
const MipsSubtarget *Subtarget;
@@ -139,6 +144,11 @@ namespace llvm {
// Inline asm support
ConstraintType getConstraintType(const std::string &Constraint) const;
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
index cff79966d..977e0df 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -32,7 +32,7 @@ def SDT_MipsFPCmp : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>,
SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
-def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInFlag]>;
+def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>;
def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
[SDNPHasChain]>;
def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index 320c5b8..b70266a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -19,41 +19,53 @@ include "MipsInstrFormats.td"
def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
-def SDT_MipsSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>,
+def SDT_MipsSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>,
SDTCisSameAs<2, 3>, SDTCisInt<1>]>;
-def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>,
SDTCisInt<4>]>;
def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDT_MipsMAddMSub : SDTypeProfile<0, 4,
+ [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<2, 3>]>;
+
// Call
-def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
- [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag,
+def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
-// Hi and Lo nodes are used to handle global addresses. Used on
-// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
+// Hi and Lo nodes are used to handle global addresses. Used on
+// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
// static model. (nothing to do with Mips Registers Hi and Lo)
def MipsHi : SDNode<"MipsISD::Hi", SDTIntUnaryOp>;
def MipsLo : SDNode<"MipsISD::Lo", SDTIntUnaryOp>;
def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>;
// Return
-def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain,
- SDNPOptInFlag]>;
+def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain,
+ SDNPOptInGlue]>;
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
// Select Condition Code
def MipsSelectCC : SDNode<"MipsISD::SelectCC", SDT_MipsSelectCC>;
-// Conditional Move
-def MipsCMov : SDNode<"MipsISD::CMov", SDT_MipsCMov>;
+// MAdd*/MSub* nodes
+def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
+ [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub,
+ [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub,
+ [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub,
+ [SDNPOptInGlue, SDNPOutGlue]>;
//===----------------------------------------------------------------------===//
// Mips Instruction Predicate Definitions.
@@ -62,6 +74,8 @@ def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">;
def HasBitCount : Predicate<"Subtarget.hasBitCount()">;
def HasSwap : Predicate<"Subtarget.hasSwap()">;
def HasCondMov : Predicate<"Subtarget.hasCondMov()">;
+def IsMips32 : Predicate<"Subtarget.isMips32()">;
+def IsMips32r2 : Predicate<"Subtarget.isMips32r2()">;
//===----------------------------------------------------------------------===//
// Mips Operand, Complex Patterns and Transformations Definitions.
@@ -126,90 +140,66 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
let isCommutable = 1 in
class ArithR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
InstrItinClass itin>:
- FR< op,
- func,
- (outs CPURegs:$dst),
- (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+ FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
let isCommutable = 1 in
class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm>:
- FR< op,
- func,
- (outs CPURegs:$dst),
- (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [], IIAlu>;
+ FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>;
// Arithmetic 2 register operands
class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
Operand Od, PatLeaf imm_type> :
- FI< op,
- (outs CPURegs:$dst),
- (ins CPURegs:$b, Od:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+ FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
Operand Od, PatLeaf imm_type> :
- FI< op,
- (outs CPURegs:$dst),
- (ins CPURegs:$b, Od:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [], IIAlu>;
+ FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>;
// Arithmetic Multiply ADD/SUB
-let rd=0 in
-class MArithR<bits<6> func, string instr_asm> :
- FR< 0x1c,
- func,
- (outs CPURegs:$rs),
- (ins CPURegs:$rt),
- !strconcat(instr_asm, "\t$rs, $rt"),
- [], IIImul>;
+let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
+class MArithR<bits<6> func, string instr_asm, SDNode op> :
+ FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rs, $rt"),
+ [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul>;
// Logical
class LogicR<bits<6> func, string instr_asm, SDNode OpNode>:
- FR< 0x00,
- func,
- (outs CPURegs:$dst),
- (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+ FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
class LogicI<bits<6> op, string instr_asm, SDNode OpNode>:
- FI< op,
- (outs CPURegs:$dst),
- (ins CPURegs:$b, uimm16:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>;
+ FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>;
class LogicNOR<bits<6> op, bits<6> func, string instr_asm>:
- FR< op,
- func,
- (outs CPURegs:$dst),
- (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
+ FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
// Shifts
-let rt = 0 in
-class LogicR_shift_imm<bits<6> func, string instr_asm, SDNode OpNode>:
- FR< 0x00,
- func,
- (outs CPURegs:$dst),
- (ins CPURegs:$b, shamt:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu>;
+class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
+ SDNode OpNode>:
+ FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu> {
+ let rs = _rs;
+}
-class LogicR_shift_reg<bits<6> func, string instr_asm, SDNode OpNode>:
- FR< 0x00,
- func,
- (outs CPURegs:$dst),
- (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm,
+ SDNode OpNode>:
+ FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu> {
+ let shamt = _shamt;
+}
// Load Upper Imediate
class LoadUpper<bits<6> op, string instr_asm>:
@@ -222,76 +212,55 @@ class LoadUpper<bits<6> op, string instr_asm>:
// Memory Load/Store
let canFoldAsLoad = 1, hasDelaySlot = 1 in
class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>:
- FI< op,
- (outs CPURegs:$dst),
- (ins mem:$addr),
- !strconcat(instr_asm, "\t$dst, $addr"),
- [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>;
+ FI<op, (outs CPURegs:$dst), (ins mem:$addr),
+ !strconcat(instr_asm, "\t$dst, $addr"),
+ [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>;
class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>:
- FI< op,
- (outs),
- (ins CPURegs:$dst, mem:$addr),
- !strconcat(instr_asm, "\t$dst, $addr"),
- [(OpNode CPURegs:$dst, addr:$addr)], IIStore>;
+ FI<op, (outs), (ins CPURegs:$dst, mem:$addr),
+ !strconcat(instr_asm, "\t$dst, $addr"),
+ [(OpNode CPURegs:$dst, addr:$addr)], IIStore>;
// Conditional Branch
let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in {
class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>:
- FI< op,
- (outs),
- (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
- !strconcat(instr_asm, "\t$a, $b, $offset"),
- [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
- IIBranch>;
-
+ FI<op, (outs), (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
+ !strconcat(instr_asm, "\t$a, $b, $offset"),
+ [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
+ IIBranch>;
class CBranchZero<bits<6> op, string instr_asm, PatFrag cond_op>:
- FI< op,
- (outs),
- (ins CPURegs:$src, brtarget:$offset),
- !strconcat(instr_asm, "\t$src, $offset"),
- [(brcond (cond_op CPURegs:$src, 0), bb:$offset)],
- IIBranch>;
+ FI<op, (outs), (ins CPURegs:$src, brtarget:$offset),
+ !strconcat(instr_asm, "\t$src, $offset"),
+ [(brcond (cond_op CPURegs:$src, 0), bb:$offset)],
+ IIBranch>;
}
// SetCC
class SetCC_R<bits<6> op, bits<6> func, string instr_asm,
PatFrag cond_op>:
- FR< op,
- func,
- (outs CPURegs:$dst),
- (ins CPURegs:$b, CPURegs:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))],
- IIAlu>;
+ FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))],
+ IIAlu>;
class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op,
Operand Od, PatLeaf imm_type>:
- FI< op,
- (outs CPURegs:$dst),
- (ins CPURegs:$b, Od:$c),
- !strconcat(instr_asm, "\t$dst, $b, $c"),
- [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))],
- IIAlu>;
+ FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))],
+ IIAlu>;
// Unconditional branch
let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
class JumpFJ<bits<6> op, string instr_asm>:
- FJ< op,
- (outs),
- (ins brtarget:$target),
- !strconcat(instr_asm, "\t$target"),
- [(br bb:$target)], IIBranch>;
+ FJ<op, (outs), (ins brtarget:$target),
+ !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>;
let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
- FR< op,
- func,
- (outs),
- (ins CPURegs:$target),
- !strconcat(instr_asm, "\t$target"),
- [(brind CPURegs:$target)], IIBranch>;
+ FR<op, func, (outs), (ins CPURegs:$target),
+ !strconcat(instr_asm, "\t$target"), [(brind CPURegs:$target)], IIBranch>;
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1,
@@ -299,86 +268,64 @@ let isCall=1, hasDelaySlot=1,
Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in {
class JumpLink<bits<6> op, string instr_asm>:
- FJ< op,
- (outs),
- (ins calltarget:$target, variable_ops),
- !strconcat(instr_asm, "\t$target"),
- [(MipsJmpLink imm:$target)], IIBranch>;
+ FJ<op, (outs), (ins calltarget:$target, variable_ops),
+ !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
+ IIBranch>;
let rd=31 in
class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
- FR< op,
- func,
- (outs),
- (ins CPURegs:$rs, variable_ops),
- !strconcat(instr_asm, "\t$rs"),
- [(MipsJmpLink CPURegs:$rs)], IIBranch>;
+ FR<op, func, (outs), (ins CPURegs:$rs, variable_ops),
+ !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch>;
class BranchLink<string instr_asm>:
- FI< 0x1,
- (outs),
- (ins CPURegs:$rs, brtarget:$target, variable_ops),
- !strconcat(instr_asm, "\t$rs, $target"),
- [], IIBranch>;
+ FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$target, variable_ops),
+ !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch>;
}
// Mul, Div
class MulDiv<bits<6> func, string instr_asm, InstrItinClass itin>:
- FR< 0x00,
- func,
- (outs),
- (ins CPURegs:$a, CPURegs:$b),
- !strconcat(instr_asm, "\t$a, $b"),
- [], itin>;
+ FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
+ !strconcat(instr_asm, "\t$a, $b"), [], itin>;
// Move from Hi/Lo
class MoveFromLOHI<bits<6> func, string instr_asm>:
- FR< 0x00,
- func,
- (outs CPURegs:$dst),
- (ins),
- !strconcat(instr_asm, "\t$dst"),
- [], IIHiLo>;
+ FR<0x00, func, (outs CPURegs:$dst), (ins),
+ !strconcat(instr_asm, "\t$dst"), [], IIHiLo>;
class MoveToLOHI<bits<6> func, string instr_asm>:
- FR< 0x00,
- func,
- (outs),
- (ins CPURegs:$src),
- !strconcat(instr_asm, "\t$src"),
- [], IIHiLo>;
+ FR<0x00, func, (outs), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$src"), [], IIHiLo>;
class EffectiveAddress<string instr_asm> :
- FI<0x09,
- (outs CPURegs:$dst),
- (ins mem:$addr),
- instr_asm,
- [(set CPURegs:$dst, addr:$addr)], IIAlu>;
+ FI<0x09, (outs CPURegs:$dst), (ins mem:$addr),
+ instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>;
// Count Leading Ones/Zeros in Word
-class CountLeading<bits<6> func, string instr_asm, SDNode CountOp>:
- FR< 0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src),
- !strconcat(instr_asm, "\t$dst, $src"),
- [(set CPURegs:$dst, (CountOp CPURegs:$src))], IIAlu>;
+class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>:
+ FR<0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>,
+ Requires<[HasBitCount]> {
+ let shamt = 0;
+ let rt = rd;
+}
// Sign Extend in Register.
class SignExtInReg<bits<6> func, string instr_asm, ValueType vt>:
- FR< 0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
- !strconcat(instr_asm, "\t$dst, $src"),
- [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>;
+ FR<0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"),
+ [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>;
// Byte Swap
class ByteSwap<bits<6> func, string instr_asm>:
- FR< 0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
- !strconcat(instr_asm, "\t$dst, $src"),
- [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>;
+ FR<0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"),
+ [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>;
// Conditional Move
class CondMov<bits<6> func, string instr_asm, PatLeaf MovCode>:
- FR< 0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T,
- CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"),
- [(set CPURegs:$dst, (MipsCMov CPURegs:$F, CPURegs:$T,
- CPURegs:$cond, MovCode))], NoItinerary>;
+ FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T,
+ CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"),
+ [], NoItinerary>;
//===----------------------------------------------------------------------===//
// Pseudo instructions
@@ -408,13 +355,13 @@ def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
def CPRESTORE : MipsPseudo<(outs), (ins uimm16:$loc), ".cprestore\t$loc\n", []>;
-// The supported Mips ISAs dont have any instruction close to the SELECT_CC
+// The supported Mips ISAs dont have any instruction close to the SELECT_CC
// operation. The solution is to create a Mips pseudo SELECT_CC instruction
-// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally
+// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally
// replace it for real supported nodes into EmitInstrWithCustomInserter
let usesCustomInserter = 1 in {
- class PseudoSelCC<RegisterClass RC, string asmstr>:
- MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr,
+ class PseudoSelCC<RegisterClass RC, string asmstr>:
+ MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr,
[(set RC:$dst, (MipsSelectCC CPURegs:$CmpRes, RC:$T, RC:$F))]>;
}
@@ -451,12 +398,18 @@ def XOR : LogicR<0x26, "xor", xor>;
def NOR : LogicNOR<0x00, 0x27, "nor">;
/// Shift Instructions
-def SLL : LogicR_shift_imm<0x00, "sll", shl>;
-def SRL : LogicR_shift_imm<0x02, "srl", srl>;
-def SRA : LogicR_shift_imm<0x03, "sra", sra>;
-def SLLV : LogicR_shift_reg<0x04, "sllv", shl>;
-def SRLV : LogicR_shift_reg<0x06, "srlv", srl>;
-def SRAV : LogicR_shift_reg<0x07, "srav", sra>;
+def SLL : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>;
+def SRL : LogicR_shift_rotate_imm<0x02, 0x00, "srl", srl>;
+def SRA : LogicR_shift_rotate_imm<0x03, 0x00, "sra", sra>;
+def SLLV : LogicR_shift_rotate_reg<0x04, 0x00, "sllv", shl>;
+def SRLV : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>;
+def SRAV : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>;
+
+// Rotate Instructions
+let Predicates = [IsMips32r2] in {
+ def ROTR : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>;
+ def ROTRV : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>;
+}
/// Load and Store Instructions
def LB : LoadM<0x20, "lb", sextloadi8>;
@@ -493,7 +446,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1,
def RET : FR <0x00, 0x02, (outs), (ins CPURegs:$target),
"jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
-/// Multiply and Divide Instructions.
+/// Multiply and Divide Instructions.
let Defs = [HI, LO] in {
def MULT : MulDiv<0x18, "mult", IIImul>;
def MULTu : MulDiv<0x19, "multu", IIImul>;
@@ -521,10 +474,10 @@ let Predicates = [HasSEInReg] in {
}
/// Count Leading
-let Predicates = [HasBitCount] in {
- let rt = 0 in
- def CLZ : CountLeading<0b010110, "clz", ctlz>;
-}
+def CLZ : CountLeading<0b100000, "clz",
+ [(set CPURegs:$dst, (ctlz CPURegs:$src))]>;
+def CLO : CountLeading<0b100001, "clo",
+ [(set CPURegs:$dst, (ctlz (not CPURegs:$src)))]>;
/// Byte Swap
let Predicates = [HasSwap] in {
@@ -551,15 +504,15 @@ let addr=0 in
// can be matched. It's similar to Sparc LEA_ADDRi
def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">;
-// MADD*/MSUB* are not part of MipsI either.
-//def MADD : MArithR<0x00, "madd">;
-//def MADDU : MArithR<0x01, "maddu">;
-//def MSUB : MArithR<0x04, "msub">;
-//def MSUBU : MArithR<0x05, "msubu">;
+// MADD*/MSUB*
+def MADD : MArithR<0, "madd", MipsMAdd>;
+def MADDU : MArithR<1, "maddu", MipsMAddu>;
+def MSUB : MArithR<4, "msub", MipsMSub>;
+def MSUBU : MArithR<5, "msubu", MipsMSubu>;
// MUL is a assembly macro in the current used ISAs. In recent ISA's
// it is a real instruction.
-//def MUL : ArithR<0x1c, 0x02, "mul", mul, IIImul>;
+def MUL : ArithR<0x1c, 0x02, "mul", mul, IIImul>, Requires<[IsMips32]>;
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
@@ -605,9 +558,9 @@ def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
(ADDiu CPURegs:$hi, tconstpool:$lo)>;
// gp_rel relocs
-def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
+def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
(ADDiu CPURegs:$gp, tglobaladdr:$in)>;
-def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
+def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
(ADDiu CPURegs:$gp, tconstpool:$in)>;
// Mips does not have "not", so we expand our way
@@ -665,9 +618,15 @@ def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
(MOVN CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
-def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F),
+def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F),
(MOVN CPURegs:$F, CPURegs:$T, CPURegs:$cond)>;
+// select patterns with got access
+def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs),
+ (i32 tglobaladdr:$T), CPURegs:$F),
+ (MOVN CPURegs:$F, (ADDiu GP, tglobaladdr:$T),
+ (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+
// setcc patterns
def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs),
(SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
index 5723f9e..1e8e4fe 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -26,11 +26,11 @@ namespace llvm {
class MipsFunctionInfo : public MachineFunctionInfo {
private:
- /// Holds for each function where on the stack the Frame Pointer must be
+ /// Holds for each function where on the stack the Frame Pointer must be
/// saved. This is used on Prologue and Epilogue to emit FP save/restore
int FPStackOffset;
- /// Holds for each function where on the stack the Return Address must be
+ /// Holds for each function where on the stack the Return Address must be
/// saved. This is used on Prologue and Epilogue to emit RA save/restore
int RAStackOffset;
@@ -51,22 +51,22 @@ private:
: FI(FrameIndex), SPOffset(StackPointerOffset) {}
};
- /// When PIC is used the GP must be saved on the stack on the function
- /// prologue and must be reloaded from this stack location after every
- /// call. A reference to its stack location and frame index must be kept
+ /// When PIC is used the GP must be saved on the stack on the function
+ /// prologue and must be reloaded from this stack location after every
+ /// call. A reference to its stack location and frame index must be kept
/// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
MipsFIHolder GPHolder;
/// On LowerFormalArguments the stack size is unknown, so the Stack
- /// Pointer Offset calculation of "not in register arguments" must be
- /// postponed to emitPrologue.
+ /// Pointer Offset calculation of "not in register arguments" must be
+ /// postponed to emitPrologue.
SmallVector<MipsFIHolder, 16> FnLoadArgs;
bool HasLoadArgs;
- // When VarArgs, we must write registers back to caller stack, preserving
- // on register arguments. Since the stack size is unknown on
+ // When VarArgs, we must write registers back to caller stack, preserving
+ // on register arguments. Since the stack size is unknown on
// LowerFormalArguments, the Stack Pointer Offset calculation must be
- // postponed to emitPrologue.
+ // postponed to emitPrologue.
SmallVector<MipsFIHolder, 4> FnStoreVarArgs;
bool HasStoreVarArgs;
@@ -84,9 +84,9 @@ private:
int VarArgsFrameIndex;
public:
- MipsFunctionInfo(MachineFunction& MF)
- : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0),
- FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false),
+ MipsFunctionInfo(MachineFunction& MF)
+ : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0),
+ FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false),
HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0)
{}
@@ -110,7 +110,7 @@ public:
bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
bool hasLoadArgs() const { return HasLoadArgs; }
- bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+ bool hasStoreVarArgs() const { return HasStoreVarArgs; }
void recordLoadArgsFI(int FI, int SPOffset) {
if (!HasLoadArgs) HasLoadArgs=true;
@@ -123,12 +123,12 @@ public:
void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
if (!hasLoadArgs()) return;
- for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+ for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset );
}
void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
- if (!hasStoreVarArgs()) return;
- for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+ if (!hasStoreVarArgs()) return;
+ for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset );
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 69436d2..3719e58 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -25,7 +25,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -117,8 +117,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const
}
BitVector MipsRegisterInfo::
-getReservedRegs(const MachineFunction &MF) const
-{
+getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(Mips::ZERO);
Reserved.set(Mips::AT);
@@ -137,184 +136,6 @@ getReservedRegs(const MachineFunction &MF) const
return Reserved;
}
-//===----------------------------------------------------------------------===//
-//
-// Stack Frame Processing methods
-// +----------------------------+
-//
-// The stack is allocated decrementing the stack pointer on
-// the first instruction of a function prologue. Once decremented,
-// all stack references are done thought a positive offset
-// from the stack/frame pointer, so the stack is considering
-// to grow up! Otherwise terrible hacks would have to be made
-// to get this stack ABI compliant :)
-//
-// The stack frame required by the ABI (after call):
-// Offset
-//
-// 0 ----------
-// 4 Args to pass
-// . saved $GP (used in PIC)
-// . Alloca allocations
-// . Local Area
-// . CPU "Callee Saved" Registers
-// . saved FP
-// . saved RA
-// . FPU "Callee Saved" Registers
-// StackSize -----------
-//
-// Offset - offset from sp after stack allocation on function prologue
-//
-// The sp is the stack pointer subtracted/added from the stack size
-// at the Prologue/Epilogue
-//
-// References to the previous stack (to obtain arguments) are done
-// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1))
-//
-// Examples:
-// - reference to the actual stack frame
-// for any local area var there is smt like : FI >= 0, StackOffset: 4
-// sw REGX, 4(SP)
-//
-// - reference to previous stack frame
-// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16.
-// The emitted instruction will be something like:
-// lw REGX, 16+StackSize(SP)
-//
-// Since the total stack size is unknown on LowerFormalArguments, all
-// stack references (ObjectOffset) created to reference the function
-// arguments, are negative numbers. This way, on eliminateFrameIndex it's
-// possible to detect those references and the offsets are adjusted to
-// their real location.
-//
-//===----------------------------------------------------------------------===//
-
-void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
-{
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
- unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8;
- bool HasGP = MipsFI->needGPSaveRestore();
-
- // Min and Max CSI FrameIndex.
- int MinCSFI = -1, MaxCSFI = -1;
-
- // See the description at MipsMachineFunction.h
- int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1;
-
- // Replace the dummy '0' SPOffset by the negative offsets, as explained on
- // LowerFormalArguments. Leaving '0' for while is necessary to avoid
- // the approach done by calculateFrameObjectOffsets to the stack frame.
- MipsFI->adjustLoadArgsFI(MFI);
- MipsFI->adjustStoreVarArgsFI(MFI);
-
- // It happens that the default stack frame allocation order does not directly
- // map to the convention used for mips. So we must fix it. We move the callee
- // save register slots after the local variables area, as described in the
- // stack frame above.
- unsigned CalleeSavedAreaSize = 0;
- if (!CSI.empty()) {
- MinCSFI = CSI[0].getFrameIdx();
- MaxCSFI = CSI[CSI.size()-1].getFrameIdx();
- }
- for (unsigned i = 0, e = CSI.size(); i != e; ++i)
- CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx());
-
- unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize)
- : (Subtarget.isABI_O32() ? 16 : 0);
-
- // Adjust local variables. They should come on the stack right
- // after the arguments.
- int LastOffsetFI = -1;
- for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (i >= MinCSFI && i <= MaxCSFI)
- continue;
- if (MFI->isDeadObjectIndex(i))
- continue;
- unsigned Offset =
- StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize;
- if (LastOffsetFI == -1)
- LastOffsetFI = i;
- if (Offset > MFI->getObjectOffset(LastOffsetFI))
- LastOffsetFI = i;
- MFI->setObjectOffset(i, Offset);
- }
-
- // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
- // be saved in this CPU Area. This whole area must be aligned to the
- // default Stack Alignment requirements.
- if (LastOffsetFI >= 0)
- StackOffset = MFI->getObjectOffset(LastOffsetFI)+
- MFI->getObjectSize(LastOffsetFI);
- StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
-
- for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (!Mips::CPURegsRegisterClass->contains(Reg))
- break;
- MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
- TopCPUSavedRegOff = StackOffset;
- StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
- }
-
- // Stack locations for FP and RA. If only one of them is used,
- // the space must be allocated for both, otherwise no space at all.
- if (hasFP(MF) || MFI->adjustsStack()) {
- // FP stack location
- MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
- StackOffset);
- MipsFI->setFPStackOffset(StackOffset);
- TopCPUSavedRegOff = StackOffset;
- StackOffset += RegSize;
-
- // SP stack location
- MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
- StackOffset);
- MipsFI->setRAStackOffset(StackOffset);
- StackOffset += RegSize;
-
- if (MFI->adjustsStack())
- TopCPUSavedRegOff += RegSize;
- }
-
- StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
-
- // Adjust FPU Callee Saved Registers Area. This Area must be
- // aligned to the default Stack Alignment requirements.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (Mips::CPURegsRegisterClass->contains(Reg))
- continue;
- MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
- TopFPUSavedRegOff = StackOffset;
- StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
- }
- StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
-
- // Update frame info
- MFI->setStackSize(StackOffset);
-
- // Recalculate the final tops offset. The final values must be '0'
- // if there isn't a callee saved register for CPU or FPU, otherwise
- // a negative offset is needed.
- if (TopCPUSavedRegOff >= 0)
- MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);
-
- if (TopFPUSavedRegOff >= 0)
- MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset);
-}
-
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-bool MipsRegisterInfo::
-hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
-}
-
// This function eliminate ADJCALLSTACKDOWN,
// ADJCALLSTACKUP pseudo instructions
void MipsRegisterInfo::
@@ -363,106 +184,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
}
void MipsRegisterInfo::
-emitPrologue(MachineFunction &MF) const
-{
- MachineBasicBlock &MBB = MF.front();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
- bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
-
- // Get the right frame order for Mips.
- adjustMipsStackFrame(MF);
-
- // Get the number of bytes to allocate from the FrameInfo.
- unsigned StackSize = MFI->getStackSize();
-
- // No need to allocate space on the stack.
- if (StackSize == 0 && !MFI->adjustsStack()) return;
-
- int FPOffset = MipsFI->getFPStackOffset();
- int RAOffset = MipsFI->getRAStackOffset();
-
- BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
-
- // TODO: check need from GP here.
- if (isPIC && Subtarget.isABI_O32())
- BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD)).addReg(getPICCallReg());
- BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
-
- // Adjust stack : addi sp, sp, (-imm)
- BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
- .addReg(Mips::SP).addImm(-StackSize);
-
- // Save the return address only if the function isnt a leaf one.
- // sw $ra, stack_loc($sp)
- if (MFI->adjustsStack()) {
- BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
- .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
- }
-
- // if framepointer enabled, save it and set it
- // to point to the stack pointer
- if (hasFP(MF)) {
- // sw $fp,stack_loc($sp)
- BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
- .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP);
-
- // move $fp, $sp
- BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
- .addReg(Mips::SP).addReg(Mips::ZERO);
- }
-
- // Restore GP from the saved stack location
- if (MipsFI->needGPSaveRestore())
- BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
- .addImm(MipsFI->getGPStackOffset());
-}
-
-void MipsRegisterInfo::
-emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
-{
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- DebugLoc dl = MBBI->getDebugLoc();
-
- // Get the number of bytes from FrameInfo
- int NumBytes = (int) MFI->getStackSize();
-
- // Get the FI's where RA and FP are saved.
- int FPOffset = MipsFI->getFPStackOffset();
- int RAOffset = MipsFI->getRAStackOffset();
-
- // if framepointer enabled, restore it and restore the
- // stack pointer
- if (hasFP(MF)) {
- // move $sp, $fp
- BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::SP)
- .addReg(Mips::FP).addReg(Mips::ZERO);
-
- // lw $fp,stack_loc($sp)
- BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP)
- .addImm(FPOffset).addReg(Mips::SP);
- }
-
- // Restore the return address only if the function isnt a leaf one.
- // lw $ra, stack_loc($sp)
- if (MFI->adjustsStack()) {
- BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA)
- .addImm(RAOffset).addReg(Mips::SP);
- }
-
- // adjust stack : insert addi sp, sp, (imm)
- if (NumBytes) {
- BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
- .addReg(Mips::SP).addImm(NumBytes);
- }
-}
-
-
-void MipsRegisterInfo::
processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
// Set the stack offset where GP must be saved/loaded from.
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -478,7 +199,9 @@ getRARegister() const {
unsigned MipsRegisterInfo::
getFrameRegister(const MachineFunction &MF) const {
- return hasFP(MF) ? Mips::FP : Mips::SP;
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? Mips::FP : Mips::SP;
}
unsigned MipsRegisterInfo::
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
index 89282f8..a7f4bf9 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -44,8 +44,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasFP(const MachineFunction &MF) const;
-
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
@@ -56,9 +54,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
/// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
index 055ff32..49ca5d1 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSchedule.td
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -40,7 +40,7 @@ def IIPseudo : InstrItinClass;
//===----------------------------------------------------------------------===//
// Mips Generic instruction itineraries.
//===----------------------------------------------------------------------===//
-def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [
+def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
index 2d5fd22..e4f4b33 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -31,7 +31,7 @@ public:
protected:
enum MipsArchEnum {
- Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2, Mips64, Mips64r2
+ Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2
};
// Mips architecture version
@@ -100,6 +100,8 @@ public:
const std::string &CPU);
bool isMips1() const { return MipsArchVersion == Mips1; }
+ bool isMips32() const { return MipsArchVersion >= Mips32; }
+ bool isMips32r2() const { return MipsArchVersion == Mips32r2; }
bool isLittle() const { return IsLittle; }
bool isFP64bit() const { return IsFP64bit; }
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index ad3eb9e..7a2dd1f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -30,18 +30,18 @@ extern "C" void LLVMInitializeMipsTarget() {
// The stack is always 8 byte aligned
// On function prologue, the stack is created by decrementing
// its pointer. Once decremented, all references are done with positive
-// offset from the stack/frame pointer, using StackGrowsUp enables
+// offset from the stack/frame pointer, using StackGrowsUp enables
// an easier handling.
// Using CodeModel::Large enables different CALL behavior.
MipsTargetMachine::
MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
bool isLittle=false):
LLVMTargetMachine(T, TT),
- Subtarget(TT, FS, isLittle),
+ Subtarget(TT, FS, isLittle),
DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32-n32") :
- std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")),
- InstrInfo(*this),
- FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
+ std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
TLInfo(*this), TSInfo(*this) {
// Abicall enables PIC by default
if (getRelocationModel() == Reloc::Default) {
@@ -57,20 +57,20 @@ MipselTargetMachine(const Target &T, const std::string &TT,
const std::string &FS) :
MipsTargetMachine(T, TT, FS, true) {}
-// Install an instruction selector pass using
+// Install an instruction selector pass using
// the ISelDag to gen Mips code.
bool MipsTargetMachine::
-addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
{
PM.add(createMipsISelDag(*this));
return false;
}
-// Implemented by targets that want to run passes immediately before
-// machine code is emitted. return true if -print-machineinstrs should
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
bool MipsTargetMachine::
-addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
{
PM.add(createMipsDelaySlotFillerPass(*this));
return true;
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
index d63976f..43ab798 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -17,39 +17,40 @@
#include "MipsSubtarget.h"
#include "MipsInstrInfo.h"
#include "MipsISelLowering.h"
+#include "MipsFrameLowering.h"
#include "MipsSelectionDAGInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
class formatted_raw_ostream;
-
+
class MipsTargetMachine : public LLVMTargetMachine {
MipsSubtarget Subtarget;
const TargetData DataLayout; // Calculates type size & alignment
MipsInstrInfo InstrInfo;
- TargetFrameInfo FrameInfo;
+ MipsFrameLowering FrameLowering;
MipsTargetLowering TLInfo;
MipsSelectionDAGInfo TSInfo;
public:
MipsTargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool isLittle);
-
- virtual const MipsInstrInfo *getInstrInfo() const
+
+ virtual const MipsInstrInfo *getInstrInfo() const
{ return &InstrInfo; }
- virtual const TargetFrameInfo *getFrameInfo() const
- { return &FrameInfo; }
- virtual const MipsSubtarget *getSubtargetImpl() const
+ virtual const TargetFrameLowering *getFrameLowering() const
+ { return &FrameLowering; }
+ virtual const MipsSubtarget *getSubtargetImpl() const
{ return &Subtarget; }
- virtual const TargetData *getTargetData() const
+ virtual const TargetData *getTargetData() const
{ return &DataLayout;}
virtual const MipsRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
- virtual const MipsTargetLowering *getTargetLowering() const {
+ virtual const MipsTargetLowering *getTargetLowering() const {
return &TLInfo;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index 405f419..cf5d1b5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -16,6 +16,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
static cl::opt<unsigned>
@@ -25,21 +26,21 @@ SSThreshold("mips-ssection-threshold", cl::Hidden,
void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
-
+
SmallDataSection =
- getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getDataRel());
-
+
SmallBSSSection =
- getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
- MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getBSS());
-
+
}
-// A address must be loaded from a small section if its size is less than the
-// small section size threshold. Data in this section must be addressed using
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
// gp_rel operator.
static bool IsInSmallSection(uint64_t Size) {
return Size > 0 && Size <= SSThreshold;
@@ -49,7 +50,7 @@ bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
const TargetMachine &TM) const {
if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
return false;
-
+
return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
}
@@ -68,11 +69,11 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
if (!GVA)
return false;
-
+
// We can only do this for datarel or BSS objects for now.
if (!Kind.isBSS() && !Kind.isDataRel())
return false;
-
+
// If this is a internal constant string, there is a special
// section for it, but not in small data/bss.
if (Kind.isMergeable1ByteCString())
@@ -89,13 +90,13 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
// TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
// sections?
-
+
// Handle Small Section classification here.
if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallBSSSection;
if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallDataSection;
-
+
// Otherwise, we work the same as ELF.
return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
}
diff --git a/contrib/llvm/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/contrib/llvm/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
deleted file mode 100644
index b665817..0000000
--- a/contrib/llvm/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ /dev/null
@@ -1,512 +0,0 @@
-//===-- PIC16AsmPrinter.cpp - PIC16 LLVM assembly writer ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to PIC16 assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16ABINames.h"
-#include "PIC16AsmPrinter.h"
-#include "PIC16Section.h"
-#include "PIC16MCAsmInfo.h"
-#include "PIC16MachineFunctionInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-#include <cstring>
-using namespace llvm;
-
-#include "PIC16GenAsmWriter.inc"
-
-PIC16AsmPrinter::PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-: AsmPrinter(TM, Streamer), DbgInfo(Streamer, TM.getMCAsmInfo()) {
- PMAI = static_cast<const PIC16MCAsmInfo*>(TM.getMCAsmInfo());
- PTOF = &getObjFileLowering();
-}
-
-void PIC16AsmPrinter::EmitInstruction(const MachineInstr *MI) {
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- printInstruction(MI, OS);
-
- OutStreamer.EmitRawText(OS.str());
-}
-
-static int getFunctionColor(const Function *F) {
- if (F->hasSection()) {
- std::string Sectn = F->getSection();
- std::string StrToFind = "Overlay=";
- std::string::size_type Pos = Sectn.find(StrToFind);
-
- // Retreive the color number if the key is found.
- if (Pos != std::string::npos) {
- Pos += StrToFind.length();
- std::string Color = "";
- char c = Sectn.at(Pos);
- // A Color can only consist of digits.
- while (c >= '0' && c<= '9') {
- Color.append(1,c);
- Pos++;
- if (Pos >= Sectn.length())
- break;
- c = Sectn.at(Pos);
- }
- return atoi(Color.c_str());
- }
- }
-
- // Color was not set for function, so return -1.
- return -1;
-}
-
-// Color the Auto section of the given function.
-void PIC16AsmPrinter::ColorAutoSection(const Function *F) {
- std::string SectionName = PAN::getAutosSectionName(CurrentFnSym->getName());
- PIC16Section* Section = PTOF->findPIC16Section(SectionName);
- if (Section != NULL) {
- int Color = getFunctionColor(F);
- if (Color >= 0)
- Section->setColor(Color);
- }
-}
-
-
-/// runOnMachineFunction - This emits the frame section, autos section and
-/// assembly for each instruction. Also takes care of function begin debug
-/// directive and file begin debug directive (if required) for the function.
-///
-bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
- // This calls the base class function required to be called at beginning
- // of runOnMachineFunction.
- SetupMachineFunction(MF);
-
- // Put the color information from function to its auto section.
- const Function *F = MF.getFunction();
- ColorAutoSection(F);
-
- // Emit the function frame (args and temps).
- EmitFunctionFrame(MF);
-
- DbgInfo.BeginFunction(MF);
-
- // Now emit the instructions of function in its code section.
- const MCSection *fCodeSection =
- getObjFileLowering().SectionForCode(CurrentFnSym->getName(),
- PAN::isISR(F->getSection()));
-
- // Start the Code Section.
- OutStreamer.SwitchSection(fCodeSection);
-
- // Emit the frame address of the function at the beginning of code.
- OutStreamer.EmitRawText("\tretlw low(" +
- Twine(PAN::getFrameLabel(CurrentFnSym->getName())) +
- ")");
- OutStreamer.EmitRawText("\tretlw high(" +
- Twine(PAN::getFrameLabel(CurrentFnSym->getName())) +
- ")");
-
- // Emit function start label.
- OutStreamer.EmitLabel(CurrentFnSym);
-
- DebugLoc CurDL;
- // Print out code for the function.
- for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
- I != E; ++I) {
-
- // Print a label for the basic block.
- if (I != MF.begin())
- EmitBasicBlockStart(I);
-
- // Print a basic block.
- for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
- II != E; ++II) {
- // Emit the line directive if source line changed.
- DebugLoc DL = II->getDebugLoc();
- if (!DL.isUnknown() && DL != CurDL) {
- DbgInfo.ChangeDebugLoc(MF, DL);
- CurDL = DL;
- }
-
- // Print the assembly for the instruction.
- EmitInstruction(II);
- }
- }
-
- // Emit function end debug directives.
- DbgInfo.EndFunction(MF);
-
- return false; // we didn't modify anything.
-}
-
-
-// printOperand - print operand of insn.
-void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(opNum);
- const Function *F = MI->getParent()->getParent()->getFunction();
-
- switch (MO.getType()) {
- case MachineOperand::MO_Register:
- {
- // For indirect load/store insns, the fsr name is printed as INDF.
- std::string RegName = getRegisterName(MO.getReg());
- if ((MI->getOpcode() == PIC16::load_indirect) ||
- (MI->getOpcode() == PIC16::store_indirect))
- RegName.replace (0, 3, "INDF");
- O << RegName;
- }
- return;
-
- case MachineOperand::MO_Immediate:
- O << (int)MO.getImm();
- return;
-
- case MachineOperand::MO_GlobalAddress: {
- MCSymbol *Sym = Mang->getSymbol(MO.getGlobal());
- // FIXME: currently we do not have a memcpy def coming in the module
- // by any chance, as we do not link in those as .bc lib. So these calls
- // are always external and it is safe to emit an extern.
- if (PAN::isMemIntrinsic(Sym->getName()))
- LibcallDecls.insert(Sym->getName());
-
- O << *Sym;
- break;
- }
- case MachineOperand::MO_ExternalSymbol: {
- const char *Sname = MO.getSymbolName();
- std::string Printname = Sname;
-
- // Intrinsic stuff needs to be renamed if we are printing IL fn.
- if (PAN::isIntrinsicStuff(Printname)) {
- if (PAN::isISR(F->getSection())) {
- Printname = PAN::Rename(Sname);
- }
- // Record these decls, we need to print them in asm as extern.
- LibcallDecls.insert(Printname);
- }
-
- O << Printname;
- break;
- }
- case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
- return;
-
- default:
- llvm_unreachable(" Operand type not supported.");
- }
-}
-
-/// printCCOperand - Print the cond code operand.
-///
-void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O) {
- int CC = (int)MI->getOperand(opNum).getImm();
- O << PIC16CondCodeToString((PIC16CC::CondCodes)CC);
-}
-
-/// printLibcallDecls - print the extern declarations for compiler
-/// intrinsics.
-///
-void PIC16AsmPrinter::printLibcallDecls() {
- // If no libcalls used, return.
- if (LibcallDecls.empty()) return;
-
- OutStreamer.AddComment("External decls for libcalls - BEGIN");
- OutStreamer.AddBlankLine();
-
- for (std::set<std::string>::const_iterator I = LibcallDecls.begin(),
- E = LibcallDecls.end(); I != E; I++)
- OutStreamer.EmitRawText(MAI->getExternDirective() + Twine(*I));
-
- OutStreamer.AddComment("External decls for libcalls - END");
- OutStreamer.AddBlankLine();
-}
-
-/// doInitialization - Perform Module level initializations here.
-/// One task that we do here is to sectionize all global variables.
-/// The MemSelOptimizer pass depends on the sectionizing.
-///
-bool PIC16AsmPrinter::doInitialization(Module &M) {
- bool Result = AsmPrinter::doInitialization(M);
-
- // Every asmbly contains these std headers.
- OutStreamer.EmitRawText(StringRef("\n#include p16f1xxx.inc"));
- OutStreamer.EmitRawText(StringRef("#include stdmacros.inc"));
-
- // Set the section names for all globals.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
-
- // Record External Var Decls.
- if (I->isDeclaration()) {
- ExternalVarDecls.push_back(I);
- continue;
- }
-
- // Record Exteranl Var Defs.
- if (I->hasExternalLinkage() || I->hasCommonLinkage()) {
- ExternalVarDefs.push_back(I);
- }
-
- // Sectionify actual data.
- if (!I->hasAvailableExternallyLinkage()) {
- const MCSection *S = getObjFileLowering().SectionForGlobal(I, Mang, TM);
-
- I->setSection(((const PIC16Section *)S)->getName());
- }
- }
-
- DbgInfo.BeginModule(M);
- EmitFunctionDecls(M);
- EmitUndefinedVars(M);
- EmitDefinedVars(M);
- EmitIData(M);
- EmitUData(M);
- EmitRomData(M);
- EmitSharedUdata(M);
- EmitUserSections(M);
- return Result;
-}
-
-/// Emit extern decls for functions imported from other modules, and emit
-/// global declarations for function defined in this module and which are
-/// available to other modules.
-///
-void PIC16AsmPrinter::EmitFunctionDecls(Module &M) {
- // Emit declarations for external functions.
- OutStreamer.AddComment("Function Declarations - BEGIN");
- OutStreamer.AddBlankLine();
- for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) {
- if (I->isIntrinsic() || I->getName() == "@abort")
- continue;
-
- if (!I->isDeclaration() && !I->hasExternalLinkage())
- continue;
-
- MCSymbol *Sym = Mang->getSymbol(I);
-
- // Do not emit memcpy, memset, and memmove here.
- // Calls to these routines can be generated in two ways,
- // 1. User calling the standard lib function
- // 2. Codegen generating these calls for llvm intrinsics.
- // In the first case a prototype is alread availale, while in
- // second case the call is via and externalsym and the prototype is missing.
- // So declarations for these are currently always getting printing by
- // tracking both kind of references in printInstrunction.
- if (I->isDeclaration() && PAN::isMemIntrinsic(Sym->getName())) continue;
-
- const char *directive = I->isDeclaration() ? MAI->getExternDirective() :
- MAI->getGlobalDirective();
-
- OutStreamer.EmitRawText(directive + Twine(Sym->getName()));
- OutStreamer.EmitRawText(directive +
- Twine(PAN::getRetvalLabel(Sym->getName())));
- OutStreamer.EmitRawText(directive +
- Twine(PAN::getArgsLabel(Sym->getName())));
- }
-
- OutStreamer.AddComment("Function Declarations - END");
- OutStreamer.AddBlankLine();
-
-}
-
-// Emit variables imported from other Modules.
-void PIC16AsmPrinter::EmitUndefinedVars(Module &M) {
- std::vector<const GlobalVariable*> Items = ExternalVarDecls;
- if (!Items.size()) return;
-
- OutStreamer.AddComment("Imported Variables - BEGIN");
- OutStreamer.AddBlankLine();
- for (unsigned j = 0; j < Items.size(); j++)
- OutStreamer.EmitRawText(MAI->getExternDirective() +
- Twine(Mang->getSymbol(Items[j])->getName()));
-
- OutStreamer.AddComment("Imported Variables - END");
- OutStreamer.AddBlankLine();
-}
-
-// Emit variables defined in this module and are available to other modules.
-void PIC16AsmPrinter::EmitDefinedVars(Module &M) {
- std::vector<const GlobalVariable*> Items = ExternalVarDefs;
- if (!Items.size()) return;
-
- OutStreamer.AddComment("Exported Variables - BEGIN");
- OutStreamer.AddBlankLine();
-
- for (unsigned j = 0; j < Items.size(); j++)
- OutStreamer.EmitRawText(MAI->getGlobalDirective() +
- Twine(Mang->getSymbol(Items[j])->getName()));
- OutStreamer.AddComment("Exported Variables - END");
- OutStreamer.AddBlankLine();
-}
-
-// Emit initialized data placed in ROM.
-void PIC16AsmPrinter::EmitRomData(Module &M) {
- EmitSingleSection(PTOF->ROMDATASection());
-}
-
-// Emit Shared section udata.
-void PIC16AsmPrinter::EmitSharedUdata(Module &M) {
- EmitSingleSection(PTOF->SHAREDUDATASection());
-}
-
-bool PIC16AsmPrinter::doFinalization(Module &M) {
- EmitAllAutos(M);
- printLibcallDecls();
- DbgInfo.EndModule(M);
- OutStreamer.EmitRawText(StringRef("\tEND"));
- return AsmPrinter::doFinalization(M);
-}
-
-void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) {
- const Function *F = MF.getFunction();
- const TargetData *TD = TM.getTargetData();
- PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
-
- // Emit the data section name.
-
- PIC16Section *fPDataSection =
- const_cast<PIC16Section *>(getObjFileLowering().
- SectionForFrame(CurrentFnSym->getName()));
-
- fPDataSection->setColor(getFunctionColor(F));
- OutStreamer.SwitchSection(fPDataSection);
-
- // Emit function frame label
- OutStreamer.EmitRawText(PAN::getFrameLabel(CurrentFnSym->getName()) +
- Twine(":"));
-
- const Type *RetType = F->getReturnType();
- unsigned RetSize = 0;
- if (RetType->getTypeID() != Type::VoidTyID)
- RetSize = TD->getTypeAllocSize(RetType);
-
- //Emit function return value space
- // FIXME: Do not emit RetvalLable when retsize is zero. To do this
- // we will need to avoid printing a global directive for Retval label
- // in emitExternandGloblas.
- if(RetSize > 0)
- OutStreamer.EmitRawText(PAN::getRetvalLabel(CurrentFnSym->getName()) +
- Twine(" RES ") + Twine(RetSize));
- else
- OutStreamer.EmitRawText(PAN::getRetvalLabel(CurrentFnSym->getName()) +
- Twine(":"));
-
- // Emit variable to hold the space for function arguments
- unsigned ArgSize = 0;
- for (Function::const_arg_iterator argi = F->arg_begin(),
- arge = F->arg_end(); argi != arge ; ++argi) {
- const Type *Ty = argi->getType();
- ArgSize += TD->getTypeAllocSize(Ty);
- }
-
- OutStreamer.EmitRawText(PAN::getArgsLabel(CurrentFnSym->getName()) +
- Twine(" RES ") + Twine(ArgSize));
-
- // Emit temporary space
- int TempSize = FuncInfo->getTmpSize();
- if (TempSize > 0)
- OutStreamer.EmitRawText(PAN::getTempdataLabel(CurrentFnSym->getName()) +
- Twine(" RES ") + Twine(TempSize));
-}
-
-
-void PIC16AsmPrinter::EmitInitializedDataSection(const PIC16Section *S) {
- /// Emit Section header.
- OutStreamer.SwitchSection(S);
-
- std::vector<const GlobalVariable*> Items = S->Items;
- for (unsigned j = 0; j < Items.size(); j++) {
- Constant *C = Items[j]->getInitializer();
- int AddrSpace = Items[j]->getType()->getAddressSpace();
- OutStreamer.EmitRawText(Mang->getSymbol(Items[j])->getName());
- EmitGlobalConstant(C, AddrSpace);
- }
-}
-
-// Print all IDATA sections.
-void PIC16AsmPrinter::EmitIData(Module &M) {
- EmitSectionList (M, PTOF->IDATASections());
-}
-
-void PIC16AsmPrinter::
-EmitUninitializedDataSection(const PIC16Section *S) {
- const TargetData *TD = TM.getTargetData();
- OutStreamer.SwitchSection(S);
- std::vector<const GlobalVariable*> Items = S->Items;
- for (unsigned j = 0; j < Items.size(); j++) {
- Constant *C = Items[j]->getInitializer();
- const Type *Ty = C->getType();
- unsigned Size = TD->getTypeAllocSize(Ty);
- OutStreamer.EmitRawText(Mang->getSymbol(Items[j])->getName() +
- Twine(" RES ") + Twine(Size));
- }
-}
-
-// Print all UDATA sections.
-void PIC16AsmPrinter::EmitUData(Module &M) {
- EmitSectionList (M, PTOF->UDATASections());
-}
-
-// Print all USER sections.
-void PIC16AsmPrinter::EmitUserSections(Module &M) {
- EmitSectionList (M, PTOF->USERSections());
-}
-
-// Print all AUTO sections.
-void PIC16AsmPrinter::EmitAllAutos(Module &M) {
- EmitSectionList (M, PTOF->AUTOSections());
-}
-
-extern "C" void LLVMInitializePIC16AsmPrinter() {
- RegisterAsmPrinter<PIC16AsmPrinter> X(ThePIC16Target);
-}
-
-// Emit one data section using correct section emitter based on section type.
-void PIC16AsmPrinter::EmitSingleSection(const PIC16Section *S) {
- if (S == NULL) return;
-
- switch (S->getType()) {
- default: llvm_unreachable ("unknow user section type");
- case UDATA:
- case UDATA_SHR:
- case UDATA_OVR:
- EmitUninitializedDataSection(S);
- break;
- case IDATA:
- case ROMDATA:
- EmitInitializedDataSection(S);
- break;
- }
-}
-
-// Emit a list of sections.
-void PIC16AsmPrinter::
-EmitSectionList(Module &M, const std::vector<PIC16Section *> &SList) {
- for (unsigned i = 0; i < SList.size(); i++) {
- // Exclude llvm specific metadata sections.
- if (SList[i]->getName().find("llvm.") != std::string::npos)
- continue;
- OutStreamer.AddBlankLine();
- EmitSingleSection(SList[i]);
- }
-}
-
diff --git a/contrib/llvm/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/contrib/llvm/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
deleted file mode 100644
index aa2e1f4..0000000
--- a/contrib/llvm/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
+++ /dev/null
@@ -1,88 +0,0 @@
-//===-- PIC16AsmPrinter.h - PIC16 LLVM assembly writer ----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to PIC16 assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16ASMPRINTER_H
-#define PIC16ASMPRINTER_H
-
-#include "PIC16.h"
-#include "PIC16TargetMachine.h"
-#include "PIC16DebugInfo.h"
-#include "PIC16MCAsmInfo.h"
-#include "PIC16TargetObjectFile.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
-#include <list>
-#include <set>
-#include <string>
-
-namespace llvm {
- class LLVM_LIBRARY_VISIBILITY PIC16AsmPrinter : public AsmPrinter {
- public:
- explicit PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
- private:
- virtual const char *getPassName() const {
- return "PIC16 Assembly Printer";
- }
-
- const PIC16TargetObjectFile &getObjFileLowering() const {
- return (const PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering();
- }
-
- bool runOnMachineFunction(MachineFunction &F);
- void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
- void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
- void printInstruction(const MachineInstr *MI, raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
-
- void EmitInstruction(const MachineInstr *MI);
- void EmitFunctionDecls (Module &M);
- void EmitUndefinedVars (Module &M);
- void EmitDefinedVars (Module &M);
- void EmitIData (Module &M);
- void EmitUData (Module &M);
- void EmitAllAutos (Module &M);
- void EmitRomData (Module &M);
- void EmitSharedUdata(Module &M);
- void EmitUserSections (Module &M);
- void EmitFunctionFrame(MachineFunction &MF);
- void printLibcallDecls();
- void EmitUninitializedDataSection(const PIC16Section *S);
- void EmitInitializedDataSection(const PIC16Section *S);
- void EmitSingleSection(const PIC16Section *S);
- void EmitSectionList(Module &M,
- const std::vector< PIC16Section *> &SList);
- void ColorAutoSection(const Function *F);
- protected:
- bool doInitialization(Module &M);
- bool doFinalization(Module &M);
-
- /// EmitGlobalVariable - Emit the specified global variable and its
- /// initializer to the output stream.
- virtual void EmitGlobalVariable(const GlobalVariable *GV) {
- // PIC16 doesn't use normal hooks for this.
- }
-
- private:
- const PIC16TargetObjectFile *PTOF;
- PIC16DbgInfo DbgInfo;
- const PIC16MCAsmInfo *PMAI;
- std::set<std::string> LibcallDecls; // Sorted & uniqued set of extern decls.
- std::vector<const GlobalVariable *> ExternalVarDecls;
- std::vector<const GlobalVariable *> ExternalVarDefs;
- };
-} // end of namespace
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16.h b/contrib/llvm/lib/Target/PIC16/PIC16.h
deleted file mode 100644
index 08bb3e6..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16.h
+++ /dev/null
@@ -1,134 +0,0 @@
-//===-- PIC16.h - Top-level interface for PIC16 representation --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in
-// the LLVM PIC16 back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_PIC16_H
-#define LLVM_TARGET_PIC16_H
-
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <sstream>
-#include <cstring>
-#include <string>
-#include <vector>
-
-namespace llvm {
- class PIC16TargetMachine;
- class FunctionPass;
- class MachineCodeEmitter;
- class formatted_raw_ostream;
-
-namespace PIC16CC {
- enum CondCodes {
- EQ,
- NE,
- LT,
- LE,
- GT,
- GE,
- ULT,
- UGT,
- ULE,
- UGE
- };
-}
-
- enum PIC16SectionType {
- CODE,
- UDATA,
- IDATA,
- ROMDATA,
- UDATA_OVR,
- UDATA_SHR
- };
-
- class ESNames {
- std::vector<char*> stk;
- ESNames() {}
- public:
- ~ESNames() {
- while (!stk.empty())
- {
- char* p = stk.back();
- delete [] p;
- stk.pop_back();
- }
- }
-
- // External symbol names require memory to live till the program end.
- // So we have to allocate it and keep. Push all such allocations into a
- // vector so that they get freed up on termination.
- inline static const char *createESName (const std::string &name) {
- static ESNames esn;
- char *tmpName = new char[name.size() + 1];
- memcpy(tmpName, name.c_str(), name.size() + 1);
- esn.stk.push_back(tmpName);
- return tmpName;
- }
-
- };
-
- inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) {
- switch (CC) {
- default: llvm_unreachable("Unknown condition code");
- case PIC16CC::NE: return "ne";
- case PIC16CC::EQ: return "eq";
- case PIC16CC::LT: return "lt";
- case PIC16CC::ULT: return "lt";
- case PIC16CC::LE: return "le";
- case PIC16CC::ULE: return "le";
- case PIC16CC::GT: return "gt";
- case PIC16CC::UGT: return "gt";
- case PIC16CC::GE: return "ge";
- case PIC16CC::UGE: return "ge";
- }
- }
-
- inline static bool isSignedComparison(PIC16CC::CondCodes CC) {
- switch (CC) {
- default: llvm_unreachable("Unknown condition code");
- case PIC16CC::NE:
- case PIC16CC::EQ:
- case PIC16CC::LT:
- case PIC16CC::LE:
- case PIC16CC::GE:
- case PIC16CC::GT:
- return true;
- case PIC16CC::ULT:
- case PIC16CC::UGT:
- case PIC16CC::ULE:
- case PIC16CC::UGE:
- return false; // condition codes for unsigned comparison.
- }
- }
-
-
-
- FunctionPass *createPIC16ISelDag(PIC16TargetMachine &TM);
- // Banksel optimizer pass.
- FunctionPass *createPIC16MemSelOptimizerPass();
-
- extern Target ThePIC16Target;
- extern Target TheCooperTarget;
-
-} // end namespace llvm;
-
-// Defines symbolic names for PIC16 registers. This defines a mapping from
-// register name to register number.
-#include "PIC16GenRegisterNames.inc"
-
-// Defines symbolic names for the PIC16 instructions.
-#include "PIC16GenInstrNames.inc"
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16.td b/contrib/llvm/lib/Target/PIC16/PIC16.td
deleted file mode 100644
index b2b9b1c..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16.td
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- PIC16.td - Describe the PIC16 Target Machine -----------*- tblgen -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This is the top level entry point for the PIC16 target.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-include "PIC16RegisterInfo.td"
-include "PIC16InstrInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Subtarget Features.
-//===----------------------------------------------------------------------===//
-def FeatureCooper : SubtargetFeature<"cooper", "IsCooper", "true",
- "PIC16 Cooper ISA Support">;
-
-//===----------------------------------------------------------------------===//
-// PIC16 supported processors.
-//===----------------------------------------------------------------------===//
-
-def : Processor<"generic", NoItineraries, []>;
-def : Processor<"cooper", NoItineraries, [FeatureCooper]>;
-
-
-def PIC16InstrInfo : InstrInfo {}
-
-def PIC16 : Target {
- let InstructionSet = PIC16InstrInfo;
-}
-
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16ABINames.h b/contrib/llvm/lib/Target/PIC16/PIC16ABINames.h
deleted file mode 100644
index 4c1a8da..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16ABINames.h
+++ /dev/null
@@ -1,399 +0,0 @@
-//===-- PIC16ABINames.h - PIC16 Naming conventios for ABI----- --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the functions to manage ABI Naming conventions for PIC16.
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_PIC16ABINAMES_H
-#define LLVM_TARGET_PIC16ABINAMES_H
-
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <sstream>
-#include <cstring>
-#include <string>
-
-namespace llvm {
- class PIC16TargetMachine;
- class FunctionPass;
- class MachineCodeEmitter;
- class formatted_raw_ostream;
-
- // A Central class to manage all ABI naming conventions.
- // PAN - [P]ic16 [A]BI [N]ames
- class PAN {
- public:
- // Map the name of the symbol to its section name.
- // Current ABI:
- // -----------------------------------------------------
- // ALL Names are prefixed with the symobl '@'.
- // ------------------------------------------------------
- // Global variables do not have any '.' in their names.
- // These are maily function names and global variable names.
- // Example - @foo, @i
- // Static local variables - @<func>.<var>
- // -------------------------------------------------------
- // Functions and auto variables.
- // Names are mangled as <prefix><funcname>.<tag>.<varname>
- // Where <prefix> is '@' and <tag> is any one of
- // the following
- // .auto. - an automatic var of a function.
- // .temp. - temproray data of a function.
- // .ret. - return value label for a function.
- // .frame. - Frame label for a function where retval, args
- // and temps are stored.
- // .args. - Label used to pass arguments to a direct call.
- // Example - Function name: @foo
- // Its frame: @foo.frame.
- // Its retval: @foo.ret.
- // Its local vars: @foo.auto.a
- // Its temp data: @foo.temp.
- // Its arg passing: @foo.args.
- //----------------------------------------------
- // Libcall - compiler generated libcall names must start with .lib.
- // This id will be used to emit extern decls for libcalls.
- // Example - libcall name: @.lib.sra.i8
- // To pass args: @.lib.sra.i8.args.
- // To return val: @.lib.sra.i8.ret.
- //----------------------------------------------
- // SECTION Names
- // uninitialized globals - @udata.<num>.#
- // initialized globals - @idata.<num>.#
- // Program memory data - @romdata.#
- // Variables with user defined section name - <user_defined_section>
- // Variables with user defined address - @<var>.user_section.<address>.#
- // Function frame - @<func>.frame_section.
- // Function autos - @<func>.autos_section.
- // Overlay sections - @<color>.##
- // Declarations - Enclosed in comments. No section for them.
- //----------------------------------------------------------
-
- // Tags used to mangle different names.
- enum TAGS {
- PREFIX_SYMBOL,
- GLOBAL,
- STATIC_LOCAL,
- AUTOS_LABEL,
- FRAME_LABEL,
- RET_LABEL,
- ARGS_LABEL,
- TEMPS_LABEL,
-
- LIBCALL,
-
- FRAME_SECTION,
- AUTOS_SECTION,
- CODE_SECTION,
- USER_SECTION
- };
-
- // Textual names of the tags.
- inline static const char *getTagName(TAGS tag) {
- switch (tag) {
- default: return "";
- case PREFIX_SYMBOL: return "@";
- case AUTOS_LABEL: return ".auto.";
- case FRAME_LABEL: return ".frame.";
- case TEMPS_LABEL: return ".temp.";
- case ARGS_LABEL: return ".args.";
- case RET_LABEL: return ".ret.";
- case LIBCALL: return ".lib.";
- case FRAME_SECTION: return ".frame_section.";
- case AUTOS_SECTION: return ".autos_section.";
- case CODE_SECTION: return ".code_section.";
- case USER_SECTION: return ".user_section.";
- }
- }
-
- // Get tag type for the Symbol.
- inline static TAGS getSymbolTag(const std::string &Sym) {
- if (Sym.find(getTagName(TEMPS_LABEL)) != std::string::npos)
- return TEMPS_LABEL;
-
- if (Sym.find(getTagName(FRAME_LABEL)) != std::string::npos)
- return FRAME_LABEL;
-
- if (Sym.find(getTagName(RET_LABEL)) != std::string::npos)
- return RET_LABEL;
-
- if (Sym.find(getTagName(ARGS_LABEL)) != std::string::npos)
- return ARGS_LABEL;
-
- if (Sym.find(getTagName(AUTOS_LABEL)) != std::string::npos)
- return AUTOS_LABEL;
-
- if (Sym.find(getTagName(LIBCALL)) != std::string::npos)
- return LIBCALL;
-
- // It does not have any Tag. So its a true global or static local.
- if (Sym.find(".") == std::string::npos)
- return GLOBAL;
-
- // If a . is there, then it may be static local.
- // We should mangle these as well in clang.
- if (Sym.find(".") != std::string::npos)
- return STATIC_LOCAL;
-
- assert (0 && "Could not determine Symbol's tag");
- return PREFIX_SYMBOL; // Silence warning when assertions are turned off.
- }
-
- // addPrefix - add prefix symbol to a name if there isn't one already.
- inline static std::string addPrefix (const std::string &Name) {
- std::string prefix = getTagName (PREFIX_SYMBOL);
-
- // If this name already has a prefix, nothing to do.
- if (Name.compare(0, prefix.size(), prefix) == 0)
- return Name;
-
- return prefix + Name;
- }
-
- // Get mangled func name from a mangled sym name.
- // In all cases func name is the first component before a '.'.
- static inline std::string getFuncNameForSym(const std::string &Sym1) {
- assert (getSymbolTag(Sym1) != GLOBAL && "not belongs to a function");
-
- std::string Sym = addPrefix(Sym1);
-
- // Position of the . after func name. That's where func name ends.
- size_t func_name_end = Sym.find ('.');
-
- return Sym.substr (0, func_name_end);
- }
-
- // Get Frame start label for a func.
- static std::string getFrameLabel(const std::string &Func) {
- std::string Func1 = addPrefix(Func);
- std::string tag = getTagName(FRAME_LABEL);
- return Func1 + tag;
- }
-
- // Get the retval label for the given function.
- static std::string getRetvalLabel(const std::string &Func) {
- std::string Func1 = addPrefix(Func);
- std::string tag = getTagName(RET_LABEL);
- return Func1 + tag;
- }
-
- // Get the argument label for the given function.
- static std::string getArgsLabel(const std::string &Func) {
- std::string Func1 = addPrefix(Func);
- std::string tag = getTagName(ARGS_LABEL);
- return Func1 + tag;
- }
-
- // Get the tempdata label for the given function.
- static std::string getTempdataLabel(const std::string &Func) {
- std::string Func1 = addPrefix(Func);
- std::string tag = getTagName(TEMPS_LABEL);
- return Func1 + tag;
- }
-
- static std::string getFrameSectionName(const std::string &Func) {
- std::string Func1 = addPrefix(Func);
- std::string tag = getTagName(FRAME_SECTION);
- return Func1 + tag + "#";
- }
-
- static std::string getAutosSectionName(const std::string &Func) {
- std::string Func1 = addPrefix(Func);
- std::string tag = getTagName(AUTOS_SECTION);
- return Func1 + tag + "#";
- }
-
- static std::string getCodeSectionName(const std::string &Func) {
- std::string Func1 = addPrefix(Func);
- std::string tag = getTagName(CODE_SECTION);
- return Func1 + tag + "#";
- }
-
- static std::string getUserSectionName(const std::string &Name) {
- std::string sname = addPrefix(Name);;
- std::string tag = getTagName(USER_SECTION);
- return sname + tag + "#";
- }
-
- // udata, romdata and idata section names are generated by a given number.
- // @udata.<num>.#
- static std::string getUdataSectionName(unsigned num,
- std::string prefix = "") {
- std::ostringstream o;
- o << getTagName(PREFIX_SYMBOL) << prefix << "udata." << num
- << ".#";
- return o.str();
- }
-
- static std::string getRomdataSectionName() {
- return "romdata.#";
- }
-
- static std::string getSharedUDataSectionName() {
- std::ostringstream o;
- o << getTagName(PREFIX_SYMBOL) << "udata_shr" << ".#";
- return o.str();
- }
-
- static std::string getRomdataSectionName(unsigned num,
- std::string prefix = "") {
- std::ostringstream o;
- o << getTagName(PREFIX_SYMBOL) << prefix << "romdata." << num
- << ".#";
- return o.str();
- }
-
- static std::string getIdataSectionName(unsigned num,
- std::string prefix = "") {
- std::ostringstream o;
- o << getTagName(PREFIX_SYMBOL) << prefix << "idata." << num
- << ".#";
- return o.str();
- }
-
- inline static bool isLocalName (const std::string &Name) {
- if (getSymbolTag(Name) == AUTOS_LABEL)
- return true;
-
- return false;
- }
-
-
- inline static bool isMemIntrinsic (const std::string &Name) {
- if (Name.compare("@memcpy") == 0 || Name.compare("@memset") == 0 ||
- Name.compare("@memmove") == 0) {
- return true;
- }
-
- return false;
- }
-
- // Currently names of libcalls are assigned during TargetLowering
- // object construction. There is no provision to change the when the
- // code for a function IL function being generated.
- // So we have to change these names while printing assembly.
- // We need to do that mainly for names related to intrinsics. This
- // function returns true if a name needs to be cloned.
- inline static bool isIntrinsicStuff(const std::string &Name) {
- // Return true if the name contains LIBCALL marker, or a MemIntrinisc.
- // these are mainly ARGS_LABEL, RET_LABEL, and the LIBCALL name itself.
- if ((Name.find(getTagName(LIBCALL)) != std::string::npos)
- || isMemIntrinsic(Name))
- return true;
-
- return false;
- }
-
- // Rename the name for IL.
- inline static std::string Rename(const std::string &Name) {
- std::string Newname;
- // If its a label (LIBCALL+Func+LABEL), change it to
- // (LIBCALL+Func+IL+LABEL).
- TAGS id = getSymbolTag(Name);
- if (id == ARGS_LABEL || id == RET_LABEL) {
- std::size_t pos = Name.find(getTagName(id));
- Newname = Name.substr(0, pos) + ".IL" + getTagName(id);
- return Newname;
- }
-
- // Else, just append IL to name.
- return Name + ".IL";
- }
-
-
-
-
- inline static bool isLocalToFunc (std::string &Func, std::string &Var) {
- if (! isLocalName(Var)) return false;
-
- std::string Func1 = addPrefix(Func);
- // Extract func name of the varilable.
- const std::string &fname = getFuncNameForSym(Var);
-
- if (fname.compare(Func1) == 0)
- return true;
-
- return false;
- }
-
-
- // Get the section for the given external symbol names.
- // This tries to find the type (Tag) of the symbol from its mangled name
- // and return appropriate section name for it.
- static inline std::string getSectionNameForSym(const std::string &Sym1) {
- std::string Sym = addPrefix(Sym1);
-
- std::string SectionName;
-
- std::string Fname = getFuncNameForSym (Sym);
- TAGS id = getSymbolTag (Sym);
-
- switch (id) {
- default : assert (0 && "Could not determine external symbol type");
- case FRAME_LABEL:
- case RET_LABEL:
- case TEMPS_LABEL:
- case ARGS_LABEL: {
- return getFrameSectionName(Fname);
- }
- case AUTOS_LABEL: {
- return getAutosSectionName(Fname);
- }
- }
- }
-
- /// Return Overlay Name for the section.
- /// The ABI Convention is: @<Color>.##.<section_tag>
- /// The section_tag is retrieved from the SectName parameter and
- /// and Color is passed in parameter.
- static inline std::string getOverlayName(std::string SectName, int Color) {
- // FIXME: Only autos_section and frame_section are colored.
- // So check and assert if the passed SectName does not have AUTOS_SECTION
- // or FRAME_SECTION tag in it.
- std::ostringstream o;
- o << getTagName(PREFIX_SYMBOL) << Color << ".##"
- << SectName.substr(SectName.find("."));
-
- return o.str();
- }
-
- // Return true if the current function is an ISR
- inline static bool isISR(const std::string SectName) {
- if (SectName.find("interrupt") != std::string::npos)
- return true;
-
- return false;
- }
-
- // Return the address for ISR starts in rom.
- inline static std::string getISRAddr(void) {
- return "0x4";
- }
-
- // Returns the name of clone of a function.
- static std::string getCloneFnName(const std::string &Func) {
- return (Func + ".IL");
- }
-
- // Returns the name of clone of a variable.
- static std::string getCloneVarName(const std::string &Fn,
- const std::string &Var) {
- std::string cloneVarName = Var;
- // These vars are named like fun.auto.var.
- // Just replace the function name, with clone function name.
- std::string cloneFnName = getCloneFnName(Fn);
- cloneVarName.replace(cloneVarName.find(Fn), Fn.length(), cloneFnName);
- return cloneVarName;
- }
- }; // class PAN.
-} // end namespace llvm;
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16DebugInfo.cpp b/contrib/llvm/lib/Target/PIC16/PIC16DebugInfo.cpp
deleted file mode 100644
index 7a948de..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ /dev/null
@@ -1,490 +0,0 @@
-
-//===-- PIC16DebugInfo.cpp - Implementation for PIC16 Debug Information ======//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the helper functions for representing debug information.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16.h"
-#include "PIC16ABINames.h"
-#include "PIC16DebugInfo.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/DebugLoc.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-using namespace llvm;
-
-/// PopulateDebugInfo - Populate the TypeNo, Aux[] and TagName from Ty.
-///
-void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo,
- bool &HasAux, int Aux[],
- std::string &TagName) {
- if (Ty.isBasicType())
- PopulateBasicTypeInfo (Ty, TypeNo);
- else if (Ty.isCompositeType())
- PopulateCompositeTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
- else if (Ty.isDerivedType())
- PopulateDerivedTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
- else {
- TypeNo = PIC16Dbg::T_NULL;
- HasAux = false;
- }
- return;
-}
-
-/// PopulateBasicTypeInfo- Populate TypeNo for basic type from Ty.
-///
-void PIC16DbgInfo::PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo) {
- std::string Name = Ty.getName();
- unsigned short BaseTy = GetTypeDebugNumber(Name);
- TypeNo = TypeNo << PIC16Dbg::S_BASIC;
- TypeNo = TypeNo | (0xffff & BaseTy);
-}
-
-/// PopulateDerivedTypeInfo - Populate TypeNo, Aux[], TagName for derived type
-/// from Ty. Derived types are mostly pointers.
-///
-void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
- bool &HasAux, int Aux[],
- std::string &TagName) {
-
- switch(Ty.getTag())
- {
- case dwarf::DW_TAG_pointer_type:
- TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
- TypeNo = TypeNo | PIC16Dbg::DT_PTR;
- break;
- default:
- TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
- }
-
- // We also need to encode the information about the base type of
- // pointer in TypeNo.
- DIType BaseType = DIDerivedType(Ty).getTypeDerivedFrom();
- PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
-}
-
-/// PopulateArrayTypeInfo - Populate TypeNo, Aux[] for array from Ty.
-void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
- bool &HasAux, int Aux[],
- std::string &TagName) {
-
- DICompositeType CTy = DICompositeType(Ty);
- DIArray Elements = CTy.getTypeArray();
- unsigned short size = 1;
- unsigned short Dimension[4]={0,0,0,0};
- for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
- DIDescriptor Element = Elements.getElement(i);
- if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
- TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
- TypeNo = TypeNo | PIC16Dbg::DT_ARY;
- DISubrange SubRange = DISubrange(Element);
- Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
- // Each dimension is represented by 2 bytes starting at byte 9.
- Aux[8+i*2+0] = Dimension[i];
- Aux[8+i*2+1] = Dimension[i] >> 8;
- size = size * Dimension[i];
- }
- }
- HasAux = true;
- // In auxillary entry for array, 7th and 8th byte represent array size.
- Aux[6] = size & 0xff;
- Aux[7] = size >> 8;
- DIType BaseType = CTy.getTypeDerivedFrom();
- PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
-}
-
-/// PopulateStructOrUnionTypeInfo - Populate TypeNo, Aux[] , TagName for
-/// structure or union.
-///
-void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty,
- unsigned short &TypeNo,
- bool &HasAux, int Aux[],
- std::string &TagName) {
- DICompositeType CTy = DICompositeType(Ty);
- TypeNo = TypeNo << PIC16Dbg::S_BASIC;
- if (Ty.getTag() == dwarf::DW_TAG_structure_type)
- TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
- else
- TypeNo = TypeNo | PIC16Dbg::T_UNION;
- TagName = CTy.getName();
- // UniqueSuffix is .number where number is obtained from
- // llvm.dbg.composite<number>.
- // FIXME: This will break when composite type is not represented by
- // llvm.dbg.composite* global variable. Since we need to revisit
- // PIC16DebugInfo implementation anyways after the MDNodes based
- // framework is done, let us continue with the way it is.
- std::string UniqueSuffix = "." + Ty->getNameStr().substr(18);
- TagName += UniqueSuffix;
- unsigned short size = CTy.getSizeInBits()/8;
- // 7th and 8th byte represent size.
- HasAux = true;
- Aux[6] = size & 0xff;
- Aux[7] = size >> 8;
-}
-
-/// PopulateEnumTypeInfo - Populate TypeNo for enum from Ty.
-void PIC16DbgInfo::PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo) {
- TypeNo = TypeNo << PIC16Dbg::S_BASIC;
- TypeNo = TypeNo | PIC16Dbg::T_ENUM;
-}
-
-/// PopulateCompositeTypeInfo - Populate TypeNo, Aux[] and TagName for
-/// composite types from Ty.
-///
-void PIC16DbgInfo::PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo,
- bool &HasAux, int Aux[],
- std::string &TagName) {
- switch (Ty.getTag()) {
- case dwarf::DW_TAG_array_type: {
- PopulateArrayTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
- break;
- }
- case dwarf:: DW_TAG_union_type:
- case dwarf::DW_TAG_structure_type: {
- PopulateStructOrUnionTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
- break;
- }
- case dwarf::DW_TAG_enumeration_type: {
- PopulateEnumTypeInfo (Ty, TypeNo);
- break;
- }
- default:
- TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
- }
-}
-
-/// GetTypeDebugNumber - Get debug type number for given type.
-///
-unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type) {
- if (type == "char")
- return PIC16Dbg::T_CHAR;
- else if (type == "short")
- return PIC16Dbg::T_SHORT;
- else if (type == "int")
- return PIC16Dbg::T_INT;
- else if (type == "long")
- return PIC16Dbg::T_LONG;
- else if (type == "unsigned char")
- return PIC16Dbg::T_UCHAR;
- else if (type == "unsigned short")
- return PIC16Dbg::T_USHORT;
- else if (type == "unsigned int")
- return PIC16Dbg::T_UINT;
- else if (type == "unsigned long")
- return PIC16Dbg::T_ULONG;
- else
- return 0;
-}
-
-/// GetStorageClass - Get storage class for give debug variable.
-///
-short PIC16DbgInfo::getStorageClass(DIGlobalVariable DIGV) {
- short ClassNo;
- if (PAN::isLocalName(DIGV.getName())) {
- // Generating C_AUTO here fails due to error in linker. Change it once
- // linker is fixed.
- ClassNo = PIC16Dbg::C_STAT;
- }
- else if (DIGV.isLocalToUnit())
- ClassNo = PIC16Dbg::C_STAT;
- else
- ClassNo = PIC16Dbg::C_EXT;
- return ClassNo;
-}
-
-/// BeginModule - Emit necessary debug info to start a Module and do other
-/// required initializations.
-void PIC16DbgInfo::BeginModule(Module &M) {
- // Emit file directive for module.
- DebugInfoFinder DbgFinder;
- DbgFinder.processModule(M);
- if (DbgFinder.compile_unit_count() != 0) {
- // FIXME : What if more then one CUs are present in a module ?
- MDNode *CU = *DbgFinder.compile_unit_begin();
- EmitDebugDirectives = true;
- SwitchToCU(CU);
- }
- // Emit debug info for decls of composite types.
- EmitCompositeTypeDecls(M);
-}
-
-/// Helper to find first valid debug loc for a function.
-///
-static const DebugLoc GetDebugLocForFunction(const MachineFunction &MF) {
- DebugLoc DL;
- for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
- I != E; ++I) {
- for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
- II != E; ++II) {
- DL = II->getDebugLoc();
- if (!DL.isUnknown())
- return DL;
- }
- }
- return DL;
-}
-
-/// BeginFunction - Emit necessary debug info to start a function.
-///
-void PIC16DbgInfo::BeginFunction(const MachineFunction &MF) {
- if (! EmitDebugDirectives) return;
-
- // Retreive the first valid debug Loc and process it.
- const DebugLoc &DL = GetDebugLocForFunction(MF);
- // Emit debug info only if valid debug info is available.
- if (!DL.isUnknown()) {
- ChangeDebugLoc(MF, DL, true);
- EmitFunctBeginDI(MF.getFunction());
- }
- // Set current line to 0 so that.line directive is genearted after .bf.
- CurLine = 0;
-}
-
-/// ChangeDebugLoc - Take necessary steps when DebugLoc changes.
-/// CurFile and CurLine may change as a result of this.
-///
-void PIC16DbgInfo::ChangeDebugLoc(const MachineFunction &MF,
- const DebugLoc &DL, bool IsInBeginFunction) {
- if (!EmitDebugDirectives) return;
- assert(!DL.isUnknown() && "can't change to invalid debug loc");
-
- SwitchToCU(DL.getScope(MF.getFunction()->getContext()));
- SwitchToLine(DL.getLine(), IsInBeginFunction);
-}
-
-/// SwitchToLine - Emit line directive for a new line.
-///
-void PIC16DbgInfo::SwitchToLine(unsigned Line, bool IsInBeginFunction) {
- if (CurLine == Line) return;
- if (!IsInBeginFunction)
- OS.EmitRawText("\n\t.line " + Twine(Line));
- CurLine = Line;
-}
-
-/// EndFunction - Emit .ef for end of function.
-///
-void PIC16DbgInfo::EndFunction(const MachineFunction &MF) {
- if (! EmitDebugDirectives) return;
- const DebugLoc &DL = GetDebugLocForFunction(MF);
- // Emit debug info only if valid debug info is available.
- if (!DL.isUnknown())
- EmitFunctEndDI(MF.getFunction(), CurLine);
-}
-
-/// EndModule - Emit .eof for end of module.
-///
-void PIC16DbgInfo::EndModule(Module &M) {
- if (! EmitDebugDirectives) return;
- EmitVarDebugInfo(M);
- if (CurFile != "") OS.EmitRawText(StringRef("\n\t.eof"));
-}
-
-/// EmitCompositeTypeElements - Emit debug information for members of a
-/// composite type.
-///
-void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
- std::string SuffixNo) {
- unsigned long Value = 0;
- DIArray Elements = CTy.getTypeArray();
- for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
- DIDescriptor Element = Elements.getElement(i);
- unsigned short TypeNo = 0;
- bool HasAux = false;
- int ElementAux[PIC16Dbg::AuxSize] = { 0 };
- std::string TagName = "";
- DIDerivedType DITy(Element);
- unsigned short ElementSize = DITy.getSizeInBits()/8;
- // Get mangleddd name for this structure/union element.
- std::string MangMemName = DITy.getName().str() + SuffixNo;
- PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName);
- short Class = 0;
- if( CTy.getTag() == dwarf::DW_TAG_union_type)
- Class = PIC16Dbg::C_MOU;
- else if (CTy.getTag() == dwarf::DW_TAG_structure_type)
- Class = PIC16Dbg::C_MOS;
- EmitSymbol(MangMemName.c_str(), Class, TypeNo, Value);
- if (CTy.getTag() == dwarf::DW_TAG_structure_type)
- Value += ElementSize;
- if (HasAux)
- EmitAuxEntry(MangMemName.c_str(), ElementAux, PIC16Dbg::AuxSize, TagName);
- }
-}
-
-/// EmitCompositeTypeDecls - Emit composite type declarations like structure
-/// and union declarations.
-///
-void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
- DebugInfoFinder DbgFinder;
- DbgFinder.processModule(M);
- for (DebugInfoFinder::iterator I = DbgFinder.type_begin(),
- E = DbgFinder.type_end(); I != E; ++I) {
- DICompositeType CTy(*I);
- if (!CTy.Verify())
- continue;
- if (CTy.getTag() == dwarf::DW_TAG_union_type ||
- CTy.getTag() == dwarf::DW_TAG_structure_type ) {
- // Get the number after llvm.dbg.composite and make UniqueSuffix from
- // it.
- std::string DIVar = CTy->getNameStr();
- std::string UniqueSuffix = "." + DIVar.substr(18);
- std::string MangledCTyName = CTy.getName().str() + UniqueSuffix;
- unsigned short size = CTy.getSizeInBits()/8;
- int Aux[PIC16Dbg::AuxSize] = {0};
- // 7th and 8th byte represent size of structure/union.
- Aux[6] = size & 0xff;
- Aux[7] = size >> 8;
- // Emit .def for structure/union tag.
- if( CTy.getTag() == dwarf::DW_TAG_union_type)
- EmitSymbol(MangledCTyName.c_str(), PIC16Dbg::C_UNTAG);
- else if (CTy.getTag() == dwarf::DW_TAG_structure_type)
- EmitSymbol(MangledCTyName.c_str(), PIC16Dbg::C_STRTAG);
-
- // Emit auxiliary debug information for structure/union tag.
- EmitAuxEntry(MangledCTyName.c_str(), Aux, PIC16Dbg::AuxSize);
-
- // Emit members.
- EmitCompositeTypeElements (CTy, UniqueSuffix);
-
- // Emit mangled Symbol for end of structure/union.
- std::string EOSSymbol = ".eos" + UniqueSuffix;
- EmitSymbol(EOSSymbol.c_str(), PIC16Dbg::C_EOS);
- EmitAuxEntry(EOSSymbol.c_str(), Aux, PIC16Dbg::AuxSize,
- MangledCTyName.c_str());
- }
- }
-}
-
-
-/// EmitFunctBeginDI - Emit .bf for function.
-///
-void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
- std::string FunctName = F->getName();
- if (EmitDebugDirectives) {
- std::string FunctBeginSym = ".bf." + FunctName;
- std::string BlockBeginSym = ".bb." + FunctName;
-
- int BFAux[PIC16Dbg::AuxSize] = {0};
- BFAux[4] = CurLine;
- BFAux[5] = CurLine >> 8;
-
- // Emit debug directives for beginning of function.
- EmitSymbol(FunctBeginSym, PIC16Dbg::C_FCN);
- EmitAuxEntry(FunctBeginSym, BFAux, PIC16Dbg::AuxSize);
-
- EmitSymbol(BlockBeginSym, PIC16Dbg::C_BLOCK);
- EmitAuxEntry(BlockBeginSym, BFAux, PIC16Dbg::AuxSize);
- }
-}
-
-/// EmitFunctEndDI - Emit .ef for function end.
-///
-void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
- std::string FunctName = F->getName();
- if (EmitDebugDirectives) {
- std::string FunctEndSym = ".ef." + FunctName;
- std::string BlockEndSym = ".eb." + FunctName;
-
- // Emit debug directives for end of function.
- EmitSymbol(BlockEndSym, PIC16Dbg::C_BLOCK);
- int EFAux[PIC16Dbg::AuxSize] = {0};
- // 5th and 6th byte stand for line number.
- EFAux[4] = CurLine;
- EFAux[5] = CurLine >> 8;
- EmitAuxEntry(BlockEndSym, EFAux, PIC16Dbg::AuxSize);
- EmitSymbol(FunctEndSym, PIC16Dbg::C_FCN);
- EmitAuxEntry(FunctEndSym, EFAux, PIC16Dbg::AuxSize);
- }
-}
-
-/// EmitAuxEntry - Emit Auxiliary debug information.
-///
-void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num,
- std::string TagName) {
- std::string Tmp;
- // TagName is emitted in case of structure/union objects.
- if (!TagName.empty()) Tmp += ", " + TagName;
-
- for (int i = 0; i<Num; i++)
- Tmp += "," + utostr(Aux[i] & 0xff);
-
- OS.EmitRawText("\n\t.dim " + Twine(VarName) + ", 1" + Tmp);
-}
-
-/// EmitSymbol - Emit .def for a symbol. Value is offset for the member.
-///
-void PIC16DbgInfo::EmitSymbol(std::string Name, short Class,
- unsigned short Type, unsigned long Value) {
- std::string Tmp;
- if (Value > 0)
- Tmp = ", value = " + utostr(Value);
-
- OS.EmitRawText("\n\t.def " + Twine(Name) + ", type = " + utostr(Type) +
- ", class = " + utostr(Class) + Tmp);
-}
-
-/// EmitVarDebugInfo - Emit debug information for all variables.
-///
-void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
- DebugInfoFinder DbgFinder;
- DbgFinder.processModule(M);
-
- for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
- E = DbgFinder.global_variable_end(); I != E; ++I) {
- DIGlobalVariable DIGV(*I);
- DIType Ty = DIGV.getType();
- unsigned short TypeNo = 0;
- bool HasAux = false;
- int Aux[PIC16Dbg::AuxSize] = { 0 };
- std::string TagName = "";
- std::string VarName = DIGV.getName();
- VarName = MAI->getGlobalPrefix() + VarName;
- PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName);
- // Emit debug info only if type information is availaible.
- if (TypeNo != PIC16Dbg::T_NULL) {
- OS.EmitRawText("\t.type " + Twine(VarName) + ", " + Twine(TypeNo));
- short ClassNo = getStorageClass(DIGV);
- OS.EmitRawText("\t.class " + Twine(VarName) + ", " + Twine(ClassNo));
- if (HasAux)
- EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TagName);
- }
- }
-}
-
-/// SwitchToCU - Switch to a new compilation unit.
-///
-void PIC16DbgInfo::SwitchToCU(MDNode *CU) {
- // Get the file path from CU.
- DICompileUnit cu(CU);
- std::string DirName = cu.getDirectory();
- std::string FileName = cu.getFilename();
- std::string FilePath = DirName + "/" + FileName;
-
- // Nothing to do if source file is still same.
- if ( FilePath == CurFile ) return;
-
- // Else, close the current one and start a new.
- if (CurFile != "")
- OS.EmitRawText(StringRef("\t.eof"));
- OS.EmitRawText("\n\t.file\t\"" + Twine(FilePath) + "\"");
- CurFile = FilePath;
- CurLine = 0;
-}
-
-/// EmitEOF - Emit .eof for end of file.
-///
-void PIC16DbgInfo::EmitEOF() {
- if (CurFile != "")
- OS.EmitRawText(StringRef("\t.EOF"));
-}
-
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16DebugInfo.h b/contrib/llvm/lib/Target/PIC16/PIC16DebugInfo.h
deleted file mode 100644
index 031dcf0..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16DebugInfo.h
+++ /dev/null
@@ -1,156 +0,0 @@
-//===-- PIC16DebugInfo.h - Interfaces for PIC16 Debug Information ============//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the helper functions for representing debug information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16DBG_H
-#define PIC16DBG_H
-
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Module.h"
-
-namespace llvm {
- class MachineFunction;
- class DebugLoc;
- class MCStreamer;
-
- namespace PIC16Dbg {
- enum VarType {
- T_NULL,
- T_VOID,
- T_CHAR,
- T_SHORT,
- T_INT,
- T_LONG,
- T_FLOAT,
- T_DOUBLE,
- T_STRUCT,
- T_UNION,
- T_ENUM,
- T_MOE,
- T_UCHAR,
- T_USHORT,
- T_UINT,
- T_ULONG
- };
- enum DerivedType {
- DT_NONE,
- DT_PTR,
- DT_FCN,
- DT_ARY
- };
- enum TypeSize {
- S_BASIC = 5,
- S_DERIVED = 3
- };
- enum DbgClass {
- C_NULL,
- C_AUTO,
- C_EXT,
- C_STAT,
- C_REG,
- C_EXTDEF,
- C_LABEL,
- C_ULABEL,
- C_MOS,
- C_ARG,
- C_STRTAG,
- C_MOU,
- C_UNTAG,
- C_TPDEF,
- C_USTATIC,
- C_ENTAG,
- C_MOE,
- C_REGPARM,
- C_FIELD,
- C_AUTOARG,
- C_LASTENT,
- C_BLOCK = 100,
- C_FCN,
- C_EOS,
- C_FILE,
- C_LINE,
- C_ALIAS,
- C_HIDDEN,
- C_EOF,
- C_LIST,
- C_SECTION,
- C_EFCN = 255
- };
- enum SymbolSize {
- AuxSize =20
- };
- }
-
- class PIC16DbgInfo {
- MCStreamer &OS;
- const MCAsmInfo *MAI;
- std::string CurFile;
- unsigned CurLine;
-
- // EmitDebugDirectives is set if debug information is available. Default
- // value for it is false.
- bool EmitDebugDirectives;
-
- public:
- PIC16DbgInfo(MCStreamer &os, const MCAsmInfo *T) : OS(os), MAI(T) {
- CurFile = "";
- CurLine = 0;
- EmitDebugDirectives = false;
- }
-
- void BeginModule (Module &M);
- void BeginFunction (const MachineFunction &MF);
- void ChangeDebugLoc (const MachineFunction &MF, const DebugLoc &DL,
- bool IsInBeginFunction = false);
- void EndFunction (const MachineFunction &MF);
- void EndModule (Module &M);
-
-
- private:
- void SwitchToCU (MDNode *CU);
- void SwitchToLine (unsigned Line, bool IsInBeginFunction = false);
-
- void PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux,
- int Aux[], std::string &TypeName);
- void PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo);
- void PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
- bool &HasAux, int Aux[],
- std::string &TypeName);
-
- void PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo,
- bool &HasAux, int Aux[],
- std::string &TypeName);
- void PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
- bool &HasAux, int Aux[],
- std::string &TypeName);
-
- void PopulateStructOrUnionTypeInfo (DIType Ty, unsigned short &TypeNo,
- bool &HasAux, int Aux[],
- std::string &TypeName);
- void PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo);
-
- unsigned GetTypeDebugNumber(std::string &Type);
- short getStorageClass(DIGlobalVariable DIGV);
- void EmitFunctBeginDI(const Function *F);
- void EmitCompositeTypeDecls(Module &M);
- void EmitCompositeTypeElements (DICompositeType CTy, std::string Suffix);
- void EmitFunctEndDI(const Function *F, unsigned Line);
- void EmitAuxEntry(const std::string VarName, int Aux[],
- int num = PIC16Dbg::AuxSize, std::string TagName = "");
- inline void EmitSymbol(std::string Name, short Class,
- unsigned short Type = PIC16Dbg::T_NULL,
- unsigned long Value = 0);
- void EmitVarDebugInfo(Module &M);
- void EmitEOF();
- };
-} // end namespace llvm;
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
deleted file mode 100644
index 6cbd002..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-//===-- PIC16ISelDAGToDAG.cpp - A dag to dag inst selector for PIC16 ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the PIC16 target.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pic16-isel"
-
-#include "llvm/Support/ErrorHandling.h"
-#include "PIC16ISelDAGToDAG.h"
-using namespace llvm;
-
-/// createPIC16ISelDag - This pass converts a legalized DAG into a
-/// PIC16-specific DAG, ready for instruction scheduling.
-FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) {
- return new PIC16DAGToDAGISel(TM);
-}
-
-
-/// Select - Select instructions not customized! Used for
-/// expanded, promoted and normal instructions.
-SDNode* PIC16DAGToDAGISel::Select(SDNode *N) {
-
- // Select the default instruction.
- SDNode *ResNode = SelectCode(N);
-
- return ResNode;
-}
-
-
-// SelectDirectAddr - Match a direct address for DAG.
-// A direct address could be a globaladdress or externalsymbol.
-bool PIC16DAGToDAGISel::SelectDirectAddr(SDNode *Op, SDValue N,
- SDValue &Address) {
- // Return true if TGA or ES.
- if (N.getOpcode() == ISD::TargetGlobalAddress
- || N.getOpcode() == ISD::TargetExternalSymbol) {
- Address = N;
- return true;
- }
-
- return false;
-}
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/contrib/llvm/lib/Target/PIC16/PIC16ISelDAGToDAG.h
deleted file mode 100644
index ecaddd3..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16ISelDAGToDAG.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- PIC16ISelDAGToDAG.cpp - A dag to dag inst selector for PIC16 ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the PIC16 target.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pic16-isel"
-
-#include "PIC16.h"
-#include "PIC16RegisterInfo.h"
-#include "PIC16TargetMachine.h"
-#include "PIC16MachineFunctionInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Intrinsics.h"
-using namespace llvm;
-
-namespace {
-
-class LLVM_LIBRARY_VISIBILITY PIC16DAGToDAGISel : public SelectionDAGISel {
-
- /// TM - Keep a reference to PIC16TargetMachine.
- const PIC16TargetMachine &TM;
-
- /// PIC16Lowering - This object fully describes how to lower LLVM code to an
- /// PIC16-specific SelectionDAG.
- const PIC16TargetLowering &PIC16Lowering;
-
-public:
- explicit PIC16DAGToDAGISel(PIC16TargetMachine &tm) :
- SelectionDAGISel(tm),
- TM(tm), PIC16Lowering(*TM.getTargetLowering()) {}
-
- // Pass Name
- virtual const char *getPassName() const {
- return "PIC16 DAG->DAG Pattern Instruction Selection";
- }
-
-private:
- // Include the pieces autogenerated from the target description.
-#include "PIC16GenDAGISel.inc"
-
- SDNode *Select(SDNode *N);
-
- // Match direct address complex pattern.
- bool SelectDirectAddr(SDNode *Op, SDValue N, SDValue &Address);
-
-};
-
-}
-
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16ISelLowering.cpp b/contrib/llvm/lib/Target/PIC16/PIC16ISelLowering.cpp
deleted file mode 100644
index 527b31d..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ /dev/null
@@ -1,2000 +0,0 @@
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that PIC16 uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pic16-lower"
-#include "PIC16ABINames.h"
-#include "PIC16ISelLowering.h"
-#include "PIC16TargetObjectFile.h"
-#include "PIC16TargetMachine.h"
-#include "PIC16MachineFunctionInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Function.h"
-#include "llvm/CallingConv.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Support/ErrorHandling.h"
-
-
-using namespace llvm;
-
-static const char *getIntrinsicName(unsigned opcode) {
- std::string Basename;
- switch(opcode) {
- default: llvm_unreachable("do not know intrinsic name");
- // Arithmetic Right shift for integer types.
- case PIC16ISD::SRA_I8: Basename = "sra.i8"; break;
- case RTLIB::SRA_I16: Basename = "sra.i16"; break;
- case RTLIB::SRA_I32: Basename = "sra.i32"; break;
-
- // Left shift for integer types.
- case PIC16ISD::SLL_I8: Basename = "sll.i8"; break;
- case RTLIB::SHL_I16: Basename = "sll.i16"; break;
- case RTLIB::SHL_I32: Basename = "sll.i32"; break;
-
- // Logical Right Shift for integer types.
- case PIC16ISD::SRL_I8: Basename = "srl.i8"; break;
- case RTLIB::SRL_I16: Basename = "srl.i16"; break;
- case RTLIB::SRL_I32: Basename = "srl.i32"; break;
-
- // Multiply for integer types.
- case PIC16ISD::MUL_I8: Basename = "mul.i8"; break;
- case RTLIB::MUL_I16: Basename = "mul.i16"; break;
- case RTLIB::MUL_I32: Basename = "mul.i32"; break;
-
- // Signed division for integers.
- case RTLIB::SDIV_I16: Basename = "sdiv.i16"; break;
- case RTLIB::SDIV_I32: Basename = "sdiv.i32"; break;
-
- // Unsigned division for integers.
- case RTLIB::UDIV_I16: Basename = "udiv.i16"; break;
- case RTLIB::UDIV_I32: Basename = "udiv.i32"; break;
-
- // Signed Modulas for integers.
- case RTLIB::SREM_I16: Basename = "srem.i16"; break;
- case RTLIB::SREM_I32: Basename = "srem.i32"; break;
-
- // Unsigned Modulas for integers.
- case RTLIB::UREM_I16: Basename = "urem.i16"; break;
- case RTLIB::UREM_I32: Basename = "urem.i32"; break;
-
- //////////////////////
- // LIBCALLS FOR FLOATS
- //////////////////////
-
- // Float to signed integrals
- case RTLIB::FPTOSINT_F32_I8: Basename = "f32_to_si32"; break;
- case RTLIB::FPTOSINT_F32_I16: Basename = "f32_to_si32"; break;
- case RTLIB::FPTOSINT_F32_I32: Basename = "f32_to_si32"; break;
-
- // Signed integrals to float. char and int are first sign extended to i32
- // before being converted to float, so an I8_F32 or I16_F32 isn't required.
- case RTLIB::SINTTOFP_I32_F32: Basename = "si32_to_f32"; break;
-
- // Float to Unsigned conversions.
- // Signed conversion can be used for unsigned conversion as well.
- // In signed and unsigned versions only the interpretation of the
- // MSB is different. Bit representation remains the same.
- case RTLIB::FPTOUINT_F32_I8: Basename = "f32_to_si32"; break;
- case RTLIB::FPTOUINT_F32_I16: Basename = "f32_to_si32"; break;
- case RTLIB::FPTOUINT_F32_I32: Basename = "f32_to_si32"; break;
-
- // Unsigned to Float conversions. char and int are first zero extended
- // before being converted to float.
- case RTLIB::UINTTOFP_I32_F32: Basename = "ui32_to_f32"; break;
-
- // Floating point add, sub, mul, div.
- case RTLIB::ADD_F32: Basename = "add.f32"; break;
- case RTLIB::SUB_F32: Basename = "sub.f32"; break;
- case RTLIB::MUL_F32: Basename = "mul.f32"; break;
- case RTLIB::DIV_F32: Basename = "div.f32"; break;
-
- // Floating point comparison
- case RTLIB::O_F32: Basename = "unordered.f32"; break;
- case RTLIB::UO_F32: Basename = "unordered.f32"; break;
- case RTLIB::OLE_F32: Basename = "le.f32"; break;
- case RTLIB::OGE_F32: Basename = "ge.f32"; break;
- case RTLIB::OLT_F32: Basename = "lt.f32"; break;
- case RTLIB::OGT_F32: Basename = "gt.f32"; break;
- case RTLIB::OEQ_F32: Basename = "eq.f32"; break;
- case RTLIB::UNE_F32: Basename = "neq.f32"; break;
- }
-
- std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
- std::string tagname = PAN::getTagName(PAN::LIBCALL);
- std::string Fullname = prefix + tagname + Basename;
-
- // The name has to live through program life.
- return ESNames::createESName(Fullname);
-}
-
-// getStdLibCallName - Get the name for the standard library function.
-static const char *getStdLibCallName(unsigned opcode) {
- std::string BaseName;
- switch(opcode) {
- case RTLIB::COS_F32: BaseName = "cos";
- break;
- case RTLIB::SIN_F32: BaseName = "sin";
- break;
- case RTLIB::MEMCPY: BaseName = "memcpy";
- break;
- case RTLIB::MEMSET: BaseName = "memset";
- break;
- case RTLIB::MEMMOVE: BaseName = "memmove";
- break;
- default: llvm_unreachable("do not know std lib call name");
- }
- std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
- std::string LibCallName = prefix + BaseName;
-
- // The name has to live through program life.
- return ESNames::createESName(LibCallName);
-}
-
-// PIC16TargetLowering Constructor.
-PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
- : TargetLowering(TM, new PIC16TargetObjectFile()) {
-
- Subtarget = &TM.getSubtarget<PIC16Subtarget>();
-
- addRegisterClass(MVT::i8, PIC16::GPRRegisterClass);
-
- setShiftAmountType(MVT::i8);
-
- // Std lib call names
- setLibcallName(RTLIB::COS_F32, getStdLibCallName(RTLIB::COS_F32));
- setLibcallName(RTLIB::SIN_F32, getStdLibCallName(RTLIB::SIN_F32));
- setLibcallName(RTLIB::MEMCPY, getStdLibCallName(RTLIB::MEMCPY));
- setLibcallName(RTLIB::MEMSET, getStdLibCallName(RTLIB::MEMSET));
- setLibcallName(RTLIB::MEMMOVE, getStdLibCallName(RTLIB::MEMMOVE));
-
- // SRA library call names
- setPIC16LibcallName(PIC16ISD::SRA_I8, getIntrinsicName(PIC16ISD::SRA_I8));
- setLibcallName(RTLIB::SRA_I16, getIntrinsicName(RTLIB::SRA_I16));
- setLibcallName(RTLIB::SRA_I32, getIntrinsicName(RTLIB::SRA_I32));
-
- // SHL library call names
- setPIC16LibcallName(PIC16ISD::SLL_I8, getIntrinsicName(PIC16ISD::SLL_I8));
- setLibcallName(RTLIB::SHL_I16, getIntrinsicName(RTLIB::SHL_I16));
- setLibcallName(RTLIB::SHL_I32, getIntrinsicName(RTLIB::SHL_I32));
-
- // SRL library call names
- setPIC16LibcallName(PIC16ISD::SRL_I8, getIntrinsicName(PIC16ISD::SRL_I8));
- setLibcallName(RTLIB::SRL_I16, getIntrinsicName(RTLIB::SRL_I16));
- setLibcallName(RTLIB::SRL_I32, getIntrinsicName(RTLIB::SRL_I32));
-
- // MUL Library call names
- setPIC16LibcallName(PIC16ISD::MUL_I8, getIntrinsicName(PIC16ISD::MUL_I8));
- setLibcallName(RTLIB::MUL_I16, getIntrinsicName(RTLIB::MUL_I16));
- setLibcallName(RTLIB::MUL_I32, getIntrinsicName(RTLIB::MUL_I32));
-
- // Signed division lib call names
- setLibcallName(RTLIB::SDIV_I16, getIntrinsicName(RTLIB::SDIV_I16));
- setLibcallName(RTLIB::SDIV_I32, getIntrinsicName(RTLIB::SDIV_I32));
-
- // Unsigned division lib call names
- setLibcallName(RTLIB::UDIV_I16, getIntrinsicName(RTLIB::UDIV_I16));
- setLibcallName(RTLIB::UDIV_I32, getIntrinsicName(RTLIB::UDIV_I32));
-
- // Signed remainder lib call names
- setLibcallName(RTLIB::SREM_I16, getIntrinsicName(RTLIB::SREM_I16));
- setLibcallName(RTLIB::SREM_I32, getIntrinsicName(RTLIB::SREM_I32));
-
- // Unsigned remainder lib call names
- setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16));
- setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32));
-
- // Floating point to signed int conversions.
- setLibcallName(RTLIB::FPTOSINT_F32_I8,
- getIntrinsicName(RTLIB::FPTOSINT_F32_I8));
- setLibcallName(RTLIB::FPTOSINT_F32_I16,
- getIntrinsicName(RTLIB::FPTOSINT_F32_I16));
- setLibcallName(RTLIB::FPTOSINT_F32_I32,
- getIntrinsicName(RTLIB::FPTOSINT_F32_I32));
-
- // Signed int to floats.
- setLibcallName(RTLIB::SINTTOFP_I32_F32,
- getIntrinsicName(RTLIB::SINTTOFP_I32_F32));
-
- // Floating points to unsigned ints.
- setLibcallName(RTLIB::FPTOUINT_F32_I8,
- getIntrinsicName(RTLIB::FPTOUINT_F32_I8));
- setLibcallName(RTLIB::FPTOUINT_F32_I16,
- getIntrinsicName(RTLIB::FPTOUINT_F32_I16));
- setLibcallName(RTLIB::FPTOUINT_F32_I32,
- getIntrinsicName(RTLIB::FPTOUINT_F32_I32));
-
- // Unsigned int to floats.
- setLibcallName(RTLIB::UINTTOFP_I32_F32,
- getIntrinsicName(RTLIB::UINTTOFP_I32_F32));
-
- // Floating point add, sub, mul ,div.
- setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32));
- setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32));
- setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32));
- setLibcallName(RTLIB::DIV_F32, getIntrinsicName(RTLIB::DIV_F32));
-
- // Floationg point comparison
- setLibcallName(RTLIB::O_F32, getIntrinsicName(RTLIB::O_F32));
- setLibcallName(RTLIB::UO_F32, getIntrinsicName(RTLIB::UO_F32));
- setLibcallName(RTLIB::OLE_F32, getIntrinsicName(RTLIB::OLE_F32));
- setLibcallName(RTLIB::OGE_F32, getIntrinsicName(RTLIB::OGE_F32));
- setLibcallName(RTLIB::OLT_F32, getIntrinsicName(RTLIB::OLT_F32));
- setLibcallName(RTLIB::OGT_F32, getIntrinsicName(RTLIB::OGT_F32));
- setLibcallName(RTLIB::OEQ_F32, getIntrinsicName(RTLIB::OEQ_F32));
- setLibcallName(RTLIB::UNE_F32, getIntrinsicName(RTLIB::UNE_F32));
-
- // Return value comparisons of floating point calls.
- setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
-
- setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
- setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
-
- setOperationAction(ISD::LOAD, MVT::i8, Legal);
- setOperationAction(ISD::LOAD, MVT::i16, Custom);
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
-
- setOperationAction(ISD::STORE, MVT::i8, Legal);
- setOperationAction(ISD::STORE, MVT::i16, Custom);
- setOperationAction(ISD::STORE, MVT::i32, Custom);
- setOperationAction(ISD::STORE, MVT::i64, Custom);
-
- setOperationAction(ISD::ADDE, MVT::i8, Custom);
- setOperationAction(ISD::ADDC, MVT::i8, Custom);
- setOperationAction(ISD::SUBE, MVT::i8, Custom);
- setOperationAction(ISD::SUBC, MVT::i8, Custom);
- setOperationAction(ISD::SUB, MVT::i8, Custom);
- setOperationAction(ISD::ADD, MVT::i8, Custom);
- setOperationAction(ISD::ADD, MVT::i16, Custom);
-
- setOperationAction(ISD::OR, MVT::i8, Custom);
- setOperationAction(ISD::AND, MVT::i8, Custom);
- setOperationAction(ISD::XOR, MVT::i8, Custom);
-
- setOperationAction(ISD::FrameIndex, MVT::i16, Custom);
-
- setOperationAction(ISD::MUL, MVT::i8, Custom);
-
- setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
- setOperationAction(ISD::MULHU, MVT::i8, Expand);
- setOperationAction(ISD::MULHS, MVT::i8, Expand);
-
- setOperationAction(ISD::SRA, MVT::i8, Custom);
- setOperationAction(ISD::SHL, MVT::i8, Custom);
- setOperationAction(ISD::SRL, MVT::i8, Custom);
-
- setOperationAction(ISD::ROTL, MVT::i8, Expand);
- setOperationAction(ISD::ROTR, MVT::i8, Expand);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
- // PIC16 does not support shift parts
- setOperationAction(ISD::SRA_PARTS, MVT::i8, Expand);
- setOperationAction(ISD::SHL_PARTS, MVT::i8, Expand);
- setOperationAction(ISD::SRL_PARTS, MVT::i8, Expand);
-
-
- // PIC16 does not have a SETCC, expand it to SELECT_CC.
- setOperationAction(ISD::SETCC, MVT::i8, Expand);
- setOperationAction(ISD::SELECT, MVT::i8, Expand);
- setOperationAction(ISD::BRCOND, MVT::Other, Expand);
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
-
- setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
- setOperationAction(ISD::BR_CC, MVT::i8, Custom);
-
- //setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
- setTruncStoreAction(MVT::i16, MVT::i8, Custom);
-
- // Now deduce the information based on the above mentioned
- // actions
- computeRegisterProperties();
-}
-
-std::pair<const TargetRegisterClass*, uint8_t>
-PIC16TargetLowering::findRepresentativeClass(EVT VT) const {
- switch (VT.getSimpleVT().SimpleTy) {
- default:
- return TargetLowering::findRepresentativeClass(VT);
- case MVT::i16:
- return std::make_pair(PIC16::FSR16RegisterClass, 1);
- }
-}
-
-// getOutFlag - Extract the flag result if the Op has it.
-static SDValue getOutFlag(SDValue &Op) {
- // Flag is the last value of the node.
- SDValue Flag = Op.getValue(Op.getNode()->getNumValues() - 1);
-
- assert (Flag.getValueType() == MVT::Flag
- && "Node does not have an out Flag");
-
- return Flag;
-}
-// Get the TmpOffset for FrameIndex
-unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size,
- MachineFunction &MF) const {
- PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
- std::map<unsigned, unsigned> &FiTmpOffsetMap = FuncInfo->getFiTmpOffsetMap();
-
- std::map<unsigned, unsigned>::iterator
- MapIt = FiTmpOffsetMap.find(FI);
- if (MapIt != FiTmpOffsetMap.end())
- return MapIt->second;
-
- // This FI (FrameIndex) is not yet mapped, so map it
- FiTmpOffsetMap[FI] = FuncInfo->getTmpSize();
- FuncInfo->setTmpSize(FuncInfo->getTmpSize() + size);
- return FiTmpOffsetMap[FI];
-}
-
-void PIC16TargetLowering::ResetTmpOffsetMap(SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
- PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
- FuncInfo->getFiTmpOffsetMap().clear();
- FuncInfo->setTmpSize(0);
-}
-
-// To extract chain value from the SDValue Nodes
-// This function will help to maintain the chain extracting
-// code at one place. In case of any change in future it will
-// help maintain the code.
-static SDValue getChain(SDValue &Op) {
- SDValue Chain = Op.getValue(Op.getNode()->getNumValues() - 1);
-
- // If the last value returned in Flag then the chain is
- // second last value returned.
- if (Chain.getValueType() == MVT::Flag)
- Chain = Op.getValue(Op.getNode()->getNumValues() - 2);
-
- // All nodes may not produce a chain. Therefore following assert
- // verifies that the node is returning a chain only.
- assert (Chain.getValueType() == MVT::Other
- && "Node does not have a chain");
-
- return Chain;
-}
-
-/// PopulateResults - Helper function to LowerOperation.
-/// If a node wants to return multiple results after lowering,
-/// it stuffs them into an array of SDValue called Results.
-
-static void PopulateResults(SDValue N, SmallVectorImpl<SDValue>&Results) {
- if (N.getOpcode() == ISD::MERGE_VALUES) {
- int NumResults = N.getNumOperands();
- for( int i = 0; i < NumResults; i++)
- Results.push_back(N.getOperand(i));
- }
- else
- Results.push_back(N);
-}
-
-MVT::SimpleValueType
-PIC16TargetLowering::getSetCCResultType(EVT ValType) const {
- return MVT::i8;
-}
-
-MVT::SimpleValueType
-PIC16TargetLowering::getCmpLibcallReturnType() const {
- return MVT::i8;
-}
-
-/// The type legalizer framework of generating legalizer can generate libcalls
-/// only when the operand/result types are illegal.
-/// PIC16 needs to generate libcalls even for the legal types (i8) for some ops.
-/// For example an arithmetic right shift. These functions are used to lower
-/// such operations that generate libcall for legal types.
-
-void
-PIC16TargetLowering::setPIC16LibcallName(PIC16ISD::PIC16Libcall Call,
- const char *Name) {
- PIC16LibcallNames[Call] = Name;
-}
-
-const char *
-PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const {
- return PIC16LibcallNames[Call];
-}
-
-SDValue
-PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
- EVT RetVT, const SDValue *Ops,
- unsigned NumOps, bool isSigned,
- SelectionDAG &DAG, DebugLoc dl) const {
-
- TargetLowering::ArgListTy Args;
- Args.reserve(NumOps);
-
- TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0; i != NumOps; ++i) {
- Entry.Node = Ops[i];
- Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
- Args.push_back(Entry);
- }
-
- SDValue Callee = DAG.getExternalSymbol(getPIC16LibcallName(Call), MVT::i16);
-
- const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
- std::pair<SDValue,SDValue> CallInfo =
- LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, CallingConv::C, false,
- /*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl);
-
- return CallInfo.first;
-}
-
-const char *PIC16TargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default: return NULL;
- case PIC16ISD::Lo: return "PIC16ISD::Lo";
- case PIC16ISD::Hi: return "PIC16ISD::Hi";
- case PIC16ISD::MTLO: return "PIC16ISD::MTLO";
- case PIC16ISD::MTHI: return "PIC16ISD::MTHI";
- case PIC16ISD::MTPCLATH: return "PIC16ISD::MTPCLATH";
- case PIC16ISD::PIC16Connect: return "PIC16ISD::PIC16Connect";
- case PIC16ISD::Banksel: return "PIC16ISD::Banksel";
- case PIC16ISD::PIC16Load: return "PIC16ISD::PIC16Load";
- case PIC16ISD::PIC16LdArg: return "PIC16ISD::PIC16LdArg";
- case PIC16ISD::PIC16LdWF: return "PIC16ISD::PIC16LdWF";
- case PIC16ISD::PIC16Store: return "PIC16ISD::PIC16Store";
- case PIC16ISD::PIC16StWF: return "PIC16ISD::PIC16StWF";
- case PIC16ISD::BCF: return "PIC16ISD::BCF";
- case PIC16ISD::LSLF: return "PIC16ISD::LSLF";
- case PIC16ISD::LRLF: return "PIC16ISD::LRLF";
- case PIC16ISD::RLF: return "PIC16ISD::RLF";
- case PIC16ISD::RRF: return "PIC16ISD::RRF";
- case PIC16ISD::CALL: return "PIC16ISD::CALL";
- case PIC16ISD::CALLW: return "PIC16ISD::CALLW";
- case PIC16ISD::SUBCC: return "PIC16ISD::SUBCC";
- case PIC16ISD::SELECT_ICC: return "PIC16ISD::SELECT_ICC";
- case PIC16ISD::BRCOND: return "PIC16ISD::BRCOND";
- case PIC16ISD::RET: return "PIC16ISD::RET";
- case PIC16ISD::Dummy: return "PIC16ISD::Dummy";
- }
-}
-
-void PIC16TargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG) const {
-
- switch (N->getOpcode()) {
- case ISD::GlobalAddress:
- Results.push_back(ExpandGlobalAddress(N, DAG));
- return;
- case ISD::ExternalSymbol:
- Results.push_back(ExpandExternalSymbol(N, DAG));
- return;
- case ISD::STORE:
- Results.push_back(ExpandStore(N, DAG));
- return;
- case ISD::LOAD:
- PopulateResults(ExpandLoad(N, DAG), Results);
- return;
- case ISD::ADD:
- // Results.push_back(ExpandAdd(N, DAG));
- return;
- case ISD::FrameIndex:
- Results.push_back(ExpandFrameIndex(N, DAG));
- return;
- default:
- assert (0 && "not implemented");
- return;
- }
-}
-
-SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N,
- SelectionDAG &DAG) const {
-
- // Currently handling FrameIndex of size MVT::i16 only
- // One example of this scenario is when return value is written on
- // FrameIndex#0
-
- if (N->getValueType(0) != MVT::i16)
- return SDValue();
-
- // Expand the FrameIndex into ExternalSymbol and a Constant node
- // The constant will represent the frame index number
- // Get the current function frame
- MachineFunction &MF = DAG.getMachineFunction();
- const Function *Func = MF.getFunction();
- const std::string Name = Func->getName();
-
- FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(SDValue(N,0));
- // FIXME there isn't really debug info here
- DebugLoc dl = FR->getDebugLoc();
-
- // Expand FrameIndex like GlobalAddress and ExternalSymbol
- // Also use Offset field for lo and hi parts. The default
- // offset is zero.
-
- SDValue ES;
- int FrameOffset;
- SDValue FI = SDValue(N,0);
- LegalizeFrameIndex(FI, DAG, ES, FrameOffset);
- SDValue Offset = DAG.getConstant(FrameOffset, MVT::i8);
- SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, ES, Offset);
- SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, ES, Offset);
- return DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0), Lo, Hi);
-}
-
-
-SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) const {
- StoreSDNode *St = cast<StoreSDNode>(N);
- SDValue Chain = St->getChain();
- SDValue Src = St->getValue();
- SDValue Ptr = St->getBasePtr();
- EVT ValueType = Src.getValueType();
- unsigned StoreOffset = 0;
- DebugLoc dl = N->getDebugLoc();
-
- SDValue PtrLo, PtrHi;
- LegalizeAddress(Ptr, DAG, PtrLo, PtrHi, StoreOffset, dl);
-
- if (ValueType == MVT::i8) {
- return DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, Src,
- PtrLo, PtrHi,
- DAG.getConstant (0 + StoreOffset, MVT::i8));
- }
- else if (ValueType == MVT::i16) {
- // Get the Lo and Hi parts from MERGE_VALUE or BUILD_PAIR.
- SDValue SrcLo, SrcHi;
- GetExpandedParts(Src, DAG, SrcLo, SrcHi);
- SDValue ChainLo = Chain, ChainHi = Chain;
- // FIXME: This makes unsafe assumptions. The Chain may be a TokenFactor
- // created for an unrelated purpose, in which case it may not have
- // exactly two operands. Also, even if it does have two operands, they
- // may not be the low and high parts of an aligned load that was split.
- if (Chain.getOpcode() == ISD::TokenFactor) {
- ChainLo = Chain.getOperand(0);
- ChainHi = Chain.getOperand(1);
- }
- SDValue Store1 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other,
- ChainLo,
- SrcLo, PtrLo, PtrHi,
- DAG.getConstant (0 + StoreOffset, MVT::i8));
-
- SDValue Store2 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi,
- SrcHi, PtrLo, PtrHi,
- DAG.getConstant (1 + StoreOffset, MVT::i8));
-
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, getChain(Store1),
- getChain(Store2));
- }
- else if (ValueType == MVT::i32) {
- // Get the Lo and Hi parts from MERGE_VALUE or BUILD_PAIR.
- SDValue SrcLo, SrcHi;
- GetExpandedParts(Src, DAG, SrcLo, SrcHi);
-
- // Get the expanded parts of each of SrcLo and SrcHi.
- SDValue SrcLo1, SrcLo2, SrcHi1, SrcHi2;
- GetExpandedParts(SrcLo, DAG, SrcLo1, SrcLo2);
- GetExpandedParts(SrcHi, DAG, SrcHi1, SrcHi2);
-
- SDValue ChainLo = Chain, ChainHi = Chain;
- // FIXME: This makes unsafe assumptions; see the FIXME above.
- if (Chain.getOpcode() == ISD::TokenFactor) {
- ChainLo = Chain.getOperand(0);
- ChainHi = Chain.getOperand(1);
- }
- SDValue ChainLo1 = ChainLo, ChainLo2 = ChainLo, ChainHi1 = ChainHi,
- ChainHi2 = ChainHi;
- // FIXME: This makes unsafe assumptions; see the FIXME above.
- if (ChainLo.getOpcode() == ISD::TokenFactor) {
- ChainLo1 = ChainLo.getOperand(0);
- ChainLo2 = ChainLo.getOperand(1);
- }
- // FIXME: This makes unsafe assumptions; see the FIXME above.
- if (ChainHi.getOpcode() == ISD::TokenFactor) {
- ChainHi1 = ChainHi.getOperand(0);
- ChainHi2 = ChainHi.getOperand(1);
- }
- SDValue Store1 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other,
- ChainLo1,
- SrcLo1, PtrLo, PtrHi,
- DAG.getConstant (0 + StoreOffset, MVT::i8));
-
- SDValue Store2 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainLo2,
- SrcLo2, PtrLo, PtrHi,
- DAG.getConstant (1 + StoreOffset, MVT::i8));
-
- SDValue Store3 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi1,
- SrcHi1, PtrLo, PtrHi,
- DAG.getConstant (2 + StoreOffset, MVT::i8));
-
- SDValue Store4 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi2,
- SrcHi2, PtrLo, PtrHi,
- DAG.getConstant (3 + StoreOffset, MVT::i8));
-
- SDValue RetLo = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- getChain(Store1), getChain(Store2));
- SDValue RetHi = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- getChain(Store3), getChain(Store4));
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, RetLo, RetHi);
-
- } else if (ValueType == MVT::i64) {
- SDValue SrcLo, SrcHi;
- GetExpandedParts(Src, DAG, SrcLo, SrcHi);
- SDValue ChainLo = Chain, ChainHi = Chain;
- // FIXME: This makes unsafe assumptions; see the FIXME above.
- if (Chain.getOpcode() == ISD::TokenFactor) {
- ChainLo = Chain.getOperand(0);
- ChainHi = Chain.getOperand(1);
- }
- SDValue Store1 = DAG.getStore(ChainLo, dl, SrcLo, Ptr, NULL,
- 0 + StoreOffset, false, false, 0);
-
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(4, Ptr.getValueType()));
- SDValue Store2 = DAG.getStore(ChainHi, dl, SrcHi, Ptr, NULL,
- 1 + StoreOffset, false, false, 0);
-
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1,
- Store2);
- } else {
- assert (0 && "value type not supported");
- return SDValue();
- }
-}
-
-SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N,
- SelectionDAG &DAG)
- const {
- ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(SDValue(N, 0));
- // FIXME there isn't really debug info here
- DebugLoc dl = ES->getDebugLoc();
-
- SDValue TES = DAG.getTargetExternalSymbol(ES->getSymbol(), MVT::i8);
- SDValue Offset = DAG.getConstant(0, MVT::i8);
- SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, TES, Offset);
- SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, TES, Offset);
-
- return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi);
-}
-
-// ExpandGlobalAddress -
-SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N,
- SelectionDAG &DAG) const {
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(SDValue(N, 0));
- // FIXME there isn't really debug info here
- DebugLoc dl = G->getDebugLoc();
-
- SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), N->getDebugLoc(),
- MVT::i8,
- G->getOffset());
-
- SDValue Offset = DAG.getConstant(0, MVT::i8);
- SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, TGA, Offset);
- SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, TGA, Offset);
-
- return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi);
-}
-
-bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) const {
- assert (Op.getNode() != NULL && "Can't operate on NULL SDNode!!");
-
- if (Op.getOpcode() == ISD::BUILD_PAIR) {
- if (Op.getOperand(0).getOpcode() == PIC16ISD::Lo)
- return true;
- }
- return false;
-}
-
-// Return true if DirectAddress is in ROM_SPACE
-bool PIC16TargetLowering::isRomAddress(const SDValue &Op) const {
-
- // RomAddress is a GlobalAddress in ROM_SPACE_
- // If the Op is not a GlobalAddress return NULL without checking
- // anything further.
- if (!isDirectAddress(Op))
- return false;
-
- // Its a GlobalAddress.
- // It is BUILD_PAIR((PIC16Lo TGA), (PIC16Hi TGA)) and Op is BUILD_PAIR
- SDValue TGA = Op.getOperand(0).getOperand(0);
- GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(TGA);
-
- if (GSDN->getAddressSpace() == PIC16ISD::ROM_SPACE)
- return true;
-
- // Any other address space return it false
- return false;
-}
-
-
-// GetExpandedParts - This function is on the similiar lines as
-// the GetExpandedInteger in type legalizer is. This returns expanded
-// parts of Op in Lo and Hi.
-
-void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG,
- SDValue &Lo, SDValue &Hi) const {
- SDNode *N = Op.getNode();
- DebugLoc dl = N->getDebugLoc();
- EVT NewVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-
- // Extract the lo component.
- Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op,
- DAG.getConstant(0, MVT::i8));
-
- // extract the hi component
- Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op,
- DAG.getConstant(1, MVT::i8));
-}
-
-// Legalize FrameIndex into ExternalSymbol and offset.
-void
-PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG,
- SDValue &ES, int &Offset) const {
-
- MachineFunction &MF = DAG.getMachineFunction();
- const Function *Func = MF.getFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
- const std::string Name = Func->getName();
-
- FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(Op);
-
- // FrameIndices are not stack offsets. But they represent the request
- // for space on stack. That space requested may be more than one byte.
- // Therefore, to calculate the stack offset that a FrameIndex aligns
- // with, we need to traverse all the FrameIndices available earlier in
- // the list and add their requested size.
- unsigned FIndex = FR->getIndex();
- const char *tmpName;
- if (FIndex < FuncInfo->getReservedFrameCount()) {
- tmpName = ESNames::createESName(PAN::getFrameLabel(Name));
- ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
- Offset = 0;
- for (unsigned i=0; i<FIndex ; ++i) {
- Offset += MFI->getObjectSize(i);
- }
- } else {
- // FrameIndex has been made for some temporary storage
- tmpName = ESNames::createESName(PAN::getTempdataLabel(Name));
- ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
- Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex), MF);
- }
-
- return;
-}
-
-// This function legalizes the PIC16 Addresses. If the Pointer is
-// -- Direct address variable residing
-// --> then a Banksel for that variable will be created.
-// -- Rom variable
-// --> then it will be treated as an indirect address.
-// -- Indirect address
-// --> then the address will be loaded into FSR
-// -- ADD with constant operand
-// --> then constant operand of ADD will be returned as Offset
-// and non-constant operand of ADD will be treated as pointer.
-// Returns the high and lo part of the address, and the offset(in case of ADD).
-
-void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG,
- SDValue &Lo, SDValue &Hi,
- unsigned &Offset, DebugLoc dl) const {
-
- // Offset, by default, should be 0
- Offset = 0;
-
- // If the pointer is ADD with constant,
- // return the constant value as the offset
- if (Ptr.getOpcode() == ISD::ADD) {
- SDValue OperLeft = Ptr.getOperand(0);
- SDValue OperRight = Ptr.getOperand(1);
- if ((OperLeft.getOpcode() == ISD::Constant) &&
- (dyn_cast<ConstantSDNode>(OperLeft)->getZExtValue() < 32 )) {
- Offset = dyn_cast<ConstantSDNode>(OperLeft)->getZExtValue();
- Ptr = OperRight;
- } else if ((OperRight.getOpcode() == ISD::Constant) &&
- (dyn_cast<ConstantSDNode>(OperRight)->getZExtValue() < 32 )){
- Offset = dyn_cast<ConstantSDNode>(OperRight)->getZExtValue();
- Ptr = OperLeft;
- }
- }
-
- // If the pointer is Type i8 and an external symbol
- // then treat it as direct address.
- // One example for such case is storing and loading
- // from function frame during a call
- if (Ptr.getValueType() == MVT::i8) {
- switch (Ptr.getOpcode()) {
- case ISD::TargetExternalSymbol:
- Lo = Ptr;
- Hi = DAG.getConstant(1, MVT::i8);
- return;
- }
- }
-
- // Expansion of FrameIndex has Lo/Hi parts
- if (isDirectAddress(Ptr)) {
- SDValue TFI = Ptr.getOperand(0).getOperand(0);
- int FrameOffset;
- if (TFI.getOpcode() == ISD::TargetFrameIndex) {
- LegalizeFrameIndex(TFI, DAG, Lo, FrameOffset);
- Hi = DAG.getConstant(1, MVT::i8);
- Offset += FrameOffset;
- return;
- } else if (TFI.getOpcode() == ISD::TargetExternalSymbol) {
- // FrameIndex has already been expanded.
- // Now just make use of its expansion
- Lo = TFI;
- Hi = DAG.getConstant(1, MVT::i8);
- SDValue FOffset = Ptr.getOperand(0).getOperand(1);
- assert (FOffset.getOpcode() == ISD::Constant &&
- "Invalid operand of PIC16ISD::Lo");
- Offset += dyn_cast<ConstantSDNode>(FOffset)->getZExtValue();
- return;
- }
- }
-
- if (isDirectAddress(Ptr) && !isRomAddress(Ptr)) {
- // Direct addressing case for RAM variables. The Hi part is constant
- // and the Lo part is the TGA itself.
- Lo = Ptr.getOperand(0).getOperand(0);
-
- // For direct addresses Hi is a constant. Value 1 for the constant
- // signifies that banksel needs to generated for it. Value 0 for
- // the constant signifies that banksel does not need to be generated
- // for it. Mark it as 1 now and optimize later.
- Hi = DAG.getConstant(1, MVT::i8);
- return;
- }
-
- // Indirect addresses. Get the hi and lo parts of ptr.
- GetExpandedParts(Ptr, DAG, Lo, Hi);
-
- // Put the hi and lo parts into FSR.
- Lo = DAG.getNode(PIC16ISD::MTLO, dl, MVT::i8, Lo);
- Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Hi);
-
- return;
-}
-
-SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) const {
- LoadSDNode *LD = dyn_cast<LoadSDNode>(SDValue(N, 0));
- SDValue Chain = LD->getChain();
- SDValue Ptr = LD->getBasePtr();
- DebugLoc dl = LD->getDebugLoc();
-
- SDValue Load, Offset;
- SDVTList Tys;
- EVT VT, NewVT;
- SDValue PtrLo, PtrHi;
- unsigned LoadOffset;
-
- // Legalize direct/indirect addresses. This will give the lo and hi parts
- // of the address and the offset.
- LegalizeAddress(Ptr, DAG, PtrLo, PtrHi, LoadOffset, dl);
-
- // Load from the pointer (direct address or FSR)
- VT = N->getValueType(0);
- unsigned NumLoads = VT.getSizeInBits() / 8;
- std::vector<SDValue> PICLoads;
- unsigned iter;
- EVT MemVT = LD->getMemoryVT();
- if(ISD::isNON_EXTLoad(N)) {
- for (iter=0; iter<NumLoads ; ++iter) {
- // Add the pointer offset if any
- Offset = DAG.getConstant(iter + LoadOffset, MVT::i8);
- Tys = DAG.getVTList(MVT::i8, MVT::Other);
- Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi,
- Offset);
- PICLoads.push_back(Load);
- }
- } else {
- // If it is extended load then use PIC16Load for Memory Bytes
- // and for all extended bytes perform action based on type of
- // extention - i.e. SignExtendedLoad or ZeroExtendedLoad
-
-
- // For extended loads this is the memory value type
- // i.e. without any extension
- EVT MemVT = LD->getMemoryVT();
- unsigned MemBytes = MemVT.getSizeInBits() / 8;
- // if MVT::i1 is extended to MVT::i8 then MemBytes will be zero
- // So set it to one
- if (MemBytes == 0) MemBytes = 1;
-
- unsigned ExtdBytes = VT.getSizeInBits() / 8;
- Offset = DAG.getConstant(LoadOffset, MVT::i8);
-
- Tys = DAG.getVTList(MVT::i8, MVT::Other);
- // For MemBytes generate PIC16Load with proper offset
- for (iter=0; iter < MemBytes; ++iter) {
- // Add the pointer offset if any
- Offset = DAG.getConstant(iter + LoadOffset, MVT::i8);
- Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi,
- Offset);
- PICLoads.push_back(Load);
- }
-
- // For SignExtendedLoad
- if (ISD::isSEXTLoad(N)) {
- // For all ExtdBytes use the Right Shifted(Arithmetic) Value of the
- // highest MemByte
- SDValue SRA = DAG.getNode(ISD::SRA, dl, MVT::i8, Load,
- DAG.getConstant(7, MVT::i8));
- for (iter=MemBytes; iter<ExtdBytes; ++iter) {
- PICLoads.push_back(SRA);
- }
- } else if (ISD::isZEXTLoad(N) || ISD::isEXTLoad(N)) {
- //} else if (ISD::isZEXTLoad(N)) {
- // ZeroExtendedLoad -- For all ExtdBytes use constant 0
- SDValue ConstZero = DAG.getConstant(0, MVT::i8);
- for (iter=MemBytes; iter<ExtdBytes; ++iter) {
- PICLoads.push_back(ConstZero);
- }
- }
- }
- SDValue BP;
-
- if (VT == MVT::i8) {
- // Operand of Load is illegal -- Load itself is legal
- return PICLoads[0];
- }
- else if (VT == MVT::i16) {
- BP = DAG.getNode(ISD::BUILD_PAIR, dl, VT, PICLoads[0], PICLoads[1]);
- if ((MemVT == MVT::i8) || (MemVT == MVT::i1))
- Chain = getChain(PICLoads[0]);
- else
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- getChain(PICLoads[0]), getChain(PICLoads[1]));
- } else if (VT == MVT::i32) {
- SDValue BPs[2];
- BPs[0] = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16,
- PICLoads[0], PICLoads[1]);
- BPs[1] = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16,
- PICLoads[2], PICLoads[3]);
- BP = DAG.getNode(ISD::BUILD_PAIR, dl, VT, BPs[0], BPs[1]);
- if ((MemVT == MVT::i8) || (MemVT == MVT::i1))
- Chain = getChain(PICLoads[0]);
- else if (MemVT == MVT::i16)
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- getChain(PICLoads[0]), getChain(PICLoads[1]));
- else {
- SDValue Chains[2];
- Chains[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- getChain(PICLoads[0]), getChain(PICLoads[1]));
- Chains[1] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- getChain(PICLoads[2]), getChain(PICLoads[3]));
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Chains[0], Chains[1]);
- }
- }
- Tys = DAG.getVTList(VT, MVT::Other);
- return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, BP, Chain);
-}
-
-SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
- // We should have handled larger operands in type legalizer itself.
- assert (Op.getValueType() == MVT::i8 && "illegal shift to lower");
-
- SDNode *N = Op.getNode();
- SDValue Value = N->getOperand(0);
- SDValue Amt = N->getOperand(1);
- PIC16ISD::PIC16Libcall CallCode;
- switch (N->getOpcode()) {
- case ISD::SRA:
- CallCode = PIC16ISD::SRA_I8;
- break;
- case ISD::SHL:
- CallCode = PIC16ISD::SLL_I8;
- break;
- case ISD::SRL:
- CallCode = PIC16ISD::SRL_I8;
- break;
- default:
- assert ( 0 && "This shift is not implemented yet.");
- return SDValue();
- }
- SmallVector<SDValue, 2> Ops(2);
- Ops[0] = Value;
- Ops[1] = Amt;
- SDValue Call = MakePIC16Libcall(CallCode, N->getValueType(0), &Ops[0], 2,
- true, DAG, N->getDebugLoc());
- return Call;
-}
-
-SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
- // We should have handled larger operands in type legalizer itself.
- assert (Op.getValueType() == MVT::i8 && "illegal multiply to lower");
-
- SDNode *N = Op.getNode();
- SmallVector<SDValue, 2> Ops(2);
- Ops[0] = N->getOperand(0);
- Ops[1] = N->getOperand(1);
- SDValue Call = MakePIC16Libcall(PIC16ISD::MUL_I8, N->getValueType(0),
- &Ops[0], 2, true, DAG, N->getDebugLoc());
- return Call;
-}
-
-void
-PIC16TargetLowering::LowerOperationWrapper(SDNode *N,
- SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG) const {
- SDValue Op = SDValue(N, 0);
- SDValue Res;
- unsigned i;
- switch (Op.getOpcode()) {
- case ISD::LOAD:
- Res = ExpandLoad(Op.getNode(), DAG); break;
- default: {
- // All other operations are handled in LowerOperation.
- Res = LowerOperation(Op, DAG);
- if (Res.getNode())
- Results.push_back(Res);
-
- return;
- }
- }
-
- N = Res.getNode();
- unsigned NumValues = N->getNumValues();
- for (i = 0; i < NumValues ; i++) {
- Results.push_back(SDValue(N, i));
- }
-}
-
-SDValue PIC16TargetLowering::LowerOperation(SDValue Op,
- SelectionDAG &DAG) const {
- switch (Op.getOpcode()) {
- case ISD::ADD:
- case ISD::ADDC:
- case ISD::ADDE:
- return LowerADD(Op, DAG);
- case ISD::SUB:
- case ISD::SUBC:
- case ISD::SUBE:
- return LowerSUB(Op, DAG);
- case ISD::LOAD:
- return ExpandLoad(Op.getNode(), DAG);
- case ISD::STORE:
- return ExpandStore(Op.getNode(), DAG);
- case ISD::MUL:
- return LowerMUL(Op, DAG);
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- return LowerShift(Op, DAG);
- case ISD::OR:
- case ISD::AND:
- case ISD::XOR:
- return LowerBinOp(Op, DAG);
- case ISD::BR_CC:
- return LowerBR_CC(Op, DAG);
- case ISD::SELECT_CC:
- return LowerSELECT_CC(Op, DAG);
- }
- return SDValue();
-}
-
-SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
- SelectionDAG &DAG,
- DebugLoc dl) const {
- assert (Op.getValueType() == MVT::i8
- && "illegal value type to store on stack.");
-
- MachineFunction &MF = DAG.getMachineFunction();
- const Function *Func = MF.getFunction();
- const std::string FuncName = Func->getName();
-
-
- // Put the value on stack.
- // Get a stack slot index and convert to es.
- int FI = MF.getFrameInfo()->CreateStackObject(1, 1, false);
- const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName));
- SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
-
- // Store the value to ES.
- SDValue Store = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other,
- DAG.getEntryNode(),
- Op, ES,
- DAG.getConstant (1, MVT::i8), // Banksel.
- DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF),
- MVT::i8));
-
- // Load the value from ES.
- SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other);
- SDValue Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Store,
- ES, DAG.getConstant (1, MVT::i8),
- DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF),
- MVT::i8));
-
- return Load.getValue(0);
-}
-
-SDValue PIC16TargetLowering::
-LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
- SDValue DataAddr_Lo, SDValue DataAddr_Hi,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG) const {
- unsigned NumOps = Outs.size();
-
- // If call has no arguments then do nothing and return.
- if (NumOps == 0)
- return Chain;
-
- std::vector<SDValue> Ops;
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Arg, StoreRet;
-
- // For PIC16 ABI the arguments come after the return value.
- unsigned RetVals = Ins.size();
- for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) {
- // Get the arguments
- Arg = OutVals[i];
-
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(Arg);
- Ops.push_back(DataAddr_Lo);
- Ops.push_back(DataAddr_Hi);
- Ops.push_back(DAG.getConstant(ArgOffset, MVT::i8));
- Ops.push_back(InFlag);
-
- StoreRet = DAG.getNode (PIC16ISD::PIC16StWF, dl, Tys, &Ops[0], Ops.size());
-
- Chain = getChain(StoreRet);
- InFlag = getOutFlag(StoreRet);
- ArgOffset++;
- }
- return Chain;
-}
-
-SDValue PIC16TargetLowering::
-LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
- unsigned NumOps = Outs.size();
- std::string Name;
- SDValue Arg, StoreAt;
- EVT ArgVT;
- unsigned Size=0;
-
- // If call has no arguments then do nothing and return.
- if (NumOps == 0)
- return Chain;
-
- // FIXME: This portion of code currently assumes only
- // primitive types being passed as arguments.
-
- // Legalize the address before use
- SDValue PtrLo, PtrHi;
- unsigned AddressOffset;
- int StoreOffset = 0;
- LegalizeAddress(ArgLabel, DAG, PtrLo, PtrHi, AddressOffset, dl);
- SDValue StoreRet;
-
- std::vector<SDValue> Ops;
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- for (unsigned i=0, Offset = 0; i<NumOps; i++) {
- // Get the argument
- Arg = OutVals[i];
- StoreOffset = (Offset + AddressOffset);
-
- // Store the argument on frame
-
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(Arg);
- Ops.push_back(PtrLo);
- Ops.push_back(PtrHi);
- Ops.push_back(DAG.getConstant(StoreOffset, MVT::i8));
- Ops.push_back(InFlag);
-
- StoreRet = DAG.getNode (PIC16ISD::PIC16StWF, dl, Tys, &Ops[0], Ops.size());
-
- Chain = getChain(StoreRet);
- InFlag = getOutFlag(StoreRet);
-
- // Update the frame offset to be used for next argument
- ArgVT = Arg.getValueType();
- Size = ArgVT.getSizeInBits();
- Size = Size/8; // Calculate size in bytes
- Offset += Size; // Increase the frame offset
- }
- return Chain;
-}
-
-SDValue PIC16TargetLowering::
-LowerIndirectCallReturn(SDValue Chain, SDValue InFlag,
- SDValue DataAddr_Lo, SDValue DataAddr_Hi,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- unsigned RetVals = Ins.size();
-
- // If call does not have anything to return
- // then do nothing and go back.
- if (RetVals == 0)
- return Chain;
-
- // Call has something to return
- SDValue LoadRet;
-
- SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
- for(unsigned i=0;i<RetVals;i++) {
- LoadRet = DAG.getNode(PIC16ISD::PIC16LdWF, dl, Tys, Chain, DataAddr_Lo,
- DataAddr_Hi, DAG.getConstant(i, MVT::i8),
- InFlag);
- InFlag = getOutFlag(LoadRet);
- Chain = getChain(LoadRet);
- InVals.push_back(LoadRet);
- }
- return Chain;
-}
-
-SDValue PIC16TargetLowering::
-LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
-
- // Currently handling primitive types only. They will come in
- // i8 parts
- unsigned RetVals = Ins.size();
-
- // Return immediately if the return type is void
- if (RetVals == 0)
- return Chain;
-
- // Call has something to return
-
- // Legalize the address before use
- SDValue LdLo, LdHi;
- unsigned LdOffset;
- LegalizeAddress(RetLabel, DAG, LdLo, LdHi, LdOffset, dl);
-
- SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
- SDValue LoadRet;
-
- for(unsigned i=0, Offset=0;i<RetVals;i++) {
-
- LoadRet = DAG.getNode(PIC16ISD::PIC16LdWF, dl, Tys, Chain, LdLo, LdHi,
- DAG.getConstant(LdOffset + Offset, MVT::i8),
- InFlag);
-
- InFlag = getOutFlag(LoadRet);
-
- Chain = getChain(LoadRet);
- Offset++;
- InVals.push_back(LoadRet);
- }
-
- return Chain;
-}
-
-SDValue
-PIC16TargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
-
- // Number of values to return
- unsigned NumRet = Outs.size();
-
- // Function returns value always on stack with the offset starting
- // from 0
- MachineFunction &MF = DAG.getMachineFunction();
- const Function *F = MF.getFunction();
- std::string FuncName = F->getName();
-
- const char *tmpName = ESNames::createESName(PAN::getFrameLabel(FuncName));
- SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
- SDValue BS = DAG.getConstant(1, MVT::i8);
- SDValue RetVal;
- for(unsigned i=0;i<NumRet; ++i) {
- RetVal = OutVals[i];
- Chain = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, RetVal,
- ES, BS,
- DAG.getConstant (i, MVT::i8));
-
- }
- return DAG.getNode(PIC16ISD::RET, dl, MVT::Other, Chain);
-}
-
-void PIC16TargetLowering::
-GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain,
- SDValue &DataAddr_Lo, SDValue &DataAddr_Hi,
- SelectionDAG &DAG) const {
- assert (Callee.getOpcode() == PIC16ISD::PIC16Connect
- && "Don't know what to do of such callee!!");
- SDValue ZeroOperand = DAG.getConstant(0, MVT::i8);
- SDValue SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand);
- Chain = getChain(SeqStart);
- SDValue OperFlag = getOutFlag(SeqStart); // To manage the data dependency
-
- // Get the Lo and Hi part of code address
- SDValue Lo = Callee.getOperand(0);
- SDValue Hi = Callee.getOperand(1);
-
- SDValue Data_Lo, Data_Hi;
- SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
- // Subtract 2 from Address to get the Lower part of DataAddress.
- SDVTList VTList = DAG.getVTList(MVT::i8, MVT::Flag);
- Data_Lo = DAG.getNode(ISD::SUBC, dl, VTList, Lo,
- DAG.getConstant(2, MVT::i8));
- SDValue Ops[3] = { Hi, DAG.getConstant(0, MVT::i8), Data_Lo.getValue(1)};
- Data_Hi = DAG.getNode(ISD::SUBE, dl, VTList, Ops, 3);
- SDValue PCLATH = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, Data_Hi);
- Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Data_Lo, PCLATH);
- SDValue Call = DAG.getNode(PIC16ISD::CALLW, dl, Tys, Chain, Callee,
- OperFlag);
- Chain = getChain(Call);
- OperFlag = getOutFlag(Call);
- SDValue SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand,
- OperFlag);
- Chain = getChain(SeqEnd);
- OperFlag = getOutFlag(SeqEnd);
-
- // Low part of Data Address
- DataAddr_Lo = DAG.getNode(PIC16ISD::MTLO, dl, MVT::i8, Call, OperFlag);
-
- // Make the second call.
- SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand);
- Chain = getChain(SeqStart);
- OperFlag = getOutFlag(SeqStart); // To manage the data dependency
-
- // Subtract 1 from Address to get high part of data address.
- Data_Lo = DAG.getNode(ISD::SUBC, dl, VTList, Lo,
- DAG.getConstant(1, MVT::i8));
- SDValue HiOps[3] = { Hi, DAG.getConstant(0, MVT::i8), Data_Lo.getValue(1)};
- Data_Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
- PCLATH = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, Data_Hi);
-
- // Use new Lo to make another CALLW
- Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Data_Lo, PCLATH);
- Call = DAG.getNode(PIC16ISD::CALLW, dl, Tys, Chain, Callee, OperFlag);
- Chain = getChain(Call);
- OperFlag = getOutFlag(Call);
- SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand,
- OperFlag);
- Chain = getChain(SeqEnd);
- OperFlag = getOutFlag(SeqEnd);
- // Hi part of Data Address
- DataAddr_Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Call, OperFlag);
-}
-
-SDValue
-PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- // PIC16 target does not yet support tail call optimization.
- isTailCall = false;
-
- assert(Callee.getValueType() == MVT::i16 &&
- "Don't know how to legalize this call node!!!");
-
- // The flag to track if this is a direct or indirect call.
- bool IsDirectCall = true;
- unsigned RetVals = Ins.size();
- unsigned NumArgs = Outs.size();
-
- SDValue DataAddr_Lo, DataAddr_Hi;
- if (!isa<GlobalAddressSDNode>(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee)) {
- IsDirectCall = false; // This is indirect call
-
- // If this is an indirect call then to pass the arguments
- // and read the return value back, we need the data address
- // of the function being called.
- // To get the data address two more calls need to be made.
-
- // Come here for indirect calls
- SDValue Lo, Hi;
- // Indirect addresses. Get the hi and lo parts of ptr.
- GetExpandedParts(Callee, DAG, Lo, Hi);
- // Connect Lo and Hi parts of the callee with the PIC16Connect
- Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Lo, Hi);
-
- // Read DataAddress only if we have to pass arguments or
- // read return value.
- if ((RetVals > 0) || (NumArgs > 0))
- GetDataAddress(dl, Callee, Chain, DataAddr_Lo, DataAddr_Hi, DAG);
- }
-
- SDValue ZeroOperand = DAG.getConstant(0, MVT::i8);
-
- // Start the call sequence.
- // Carring the Constant 0 along the CALLSEQSTART
- // because there is nothing else to carry.
- SDValue SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand);
- Chain = getChain(SeqStart);
- SDValue OperFlag = getOutFlag(SeqStart); // To manage the data dependency
- std::string Name;
-
- // For any direct call - callee will be GlobalAddressNode or
- // ExternalSymbol
- SDValue ArgLabel, RetLabel;
- if (IsDirectCall) {
- // Considering the GlobalAddressNode case here.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i8);
- Name = G->getGlobal()->getName();
- } else {// Considering the ExternalSymbol case here
- ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Callee);
- Callee = DAG.getTargetExternalSymbol(ES->getSymbol(), MVT::i8);
- Name = ES->getSymbol();
- }
-
- // Label for argument passing
- const char *argFrame = ESNames::createESName(PAN::getArgsLabel(Name));
- ArgLabel = DAG.getTargetExternalSymbol(argFrame, MVT::i8);
-
- // Label for reading return value
- const char *retName = ESNames::createESName(PAN::getRetvalLabel(Name));
- RetLabel = DAG.getTargetExternalSymbol(retName, MVT::i8);
- } else {
- // if indirect call
- SDValue CodeAddr_Lo = Callee.getOperand(0);
- SDValue CodeAddr_Hi = Callee.getOperand(1);
-
- /*CodeAddr_Lo = DAG.getNode(ISD::ADD, dl, MVT::i8, CodeAddr_Lo,
- DAG.getConstant(2, MVT::i8));*/
-
- // move Hi part in PCLATH
- CodeAddr_Hi = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, CodeAddr_Hi);
- Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, CodeAddr_Lo,
- CodeAddr_Hi);
- }
-
- // Pass the argument to function before making the call.
- SDValue CallArgs;
- if (IsDirectCall) {
- CallArgs = LowerDirectCallArguments(ArgLabel, Chain, OperFlag,
- Outs, OutVals, dl, DAG);
- Chain = getChain(CallArgs);
- OperFlag = getOutFlag(CallArgs);
- } else {
- CallArgs = LowerIndirectCallArguments(Chain, OperFlag, DataAddr_Lo,
- DataAddr_Hi, Outs, OutVals, Ins,
- dl, DAG);
- Chain = getChain(CallArgs);
- OperFlag = getOutFlag(CallArgs);
- }
-
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue PICCall = DAG.getNode(PIC16ISD::CALL, dl, Tys, Chain, Callee,
- OperFlag);
- Chain = getChain(PICCall);
- OperFlag = getOutFlag(PICCall);
-
-
- // Carrying the Constant 0 along the CALLSEQSTART
- // because there is nothing else to carry.
- SDValue SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand,
- OperFlag);
- Chain = getChain(SeqEnd);
- OperFlag = getOutFlag(SeqEnd);
-
- // Lower the return value reading after the call.
- if (IsDirectCall)
- return LowerDirectCallReturn(RetLabel, Chain, OperFlag,
- Ins, dl, DAG, InVals);
- else
- return LowerIndirectCallReturn(Chain, OperFlag, DataAddr_Lo,
- DataAddr_Hi, Ins, dl, DAG, InVals);
-}
-
-bool PIC16TargetLowering::isDirectLoad(const SDValue Op) const {
- if (Op.getOpcode() == PIC16ISD::PIC16Load)
- if (Op.getOperand(1).getOpcode() == ISD::TargetGlobalAddress
- || Op.getOperand(1).getOpcode() == ISD::TargetExternalSymbol)
- return true;
- return false;
-}
-
-// NeedToConvertToMemOp - Returns true if one of the operands of the
-// operation 'Op' needs to be put into memory. Also returns the
-// operand no. of the operand to be converted in 'MemOp'. Remember, PIC16 has
-// no instruction that can operation on two registers. Most insns take
-// one register and one memory operand (addwf) / Constant (addlw).
-bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
- SelectionDAG &DAG) const {
- // If one of the operand is a constant, return false.
- if (Op.getOperand(0).getOpcode() == ISD::Constant ||
- Op.getOperand(1).getOpcode() == ISD::Constant)
- return false;
-
- // Return false if one of the operands is already a direct
- // load and that operand has only one use.
- if (isDirectLoad(Op.getOperand(0))) {
- if (Op.getOperand(0).hasOneUse()) {
- // Legal and profitable folding check uses the NodeId of DAG nodes.
- // This NodeId is assigned by topological order. Therefore first
- // assign topological order then perform legal and profitable check.
- // Note:- Though this ordering is done before begining with legalization,
- // newly added node during legalization process have NodeId=-1 (NewNode)
- // therefore before performing any check proper ordering of the node is
- // required.
- DAG.AssignTopologicalOrder();
-
- // Direct load operands are folded in binary operations. But before folding
- // verify if this folding is legal. Fold only if it is legal otherwise
- // convert this direct load to a separate memory operation.
- if (SelectionDAGISel::IsLegalToFold(Op.getOperand(0),
- Op.getNode(), Op.getNode(),
- CodeGenOpt::Default))
- return false;
- else
- MemOp = 0;
- }
- }
-
- // For operations that are non-cummutative there is no need to check
- // for right operand because folding right operand may result in
- // incorrect operation.
- if (! SelectionDAG::isCommutativeBinOp(Op.getOpcode()))
- return true;
-
- if (isDirectLoad(Op.getOperand(1))) {
- if (Op.getOperand(1).hasOneUse()) {
- // Legal and profitable folding check uses the NodeId of DAG nodes.
- // This NodeId is assigned by topological order. Therefore first
- // assign topological order then perform legal and profitable check.
- // Note:- Though this ordering is done before begining with legalization,
- // newly added node during legalization process have NodeId=-1 (NewNode)
- // therefore before performing any check proper ordering of the node is
- // required.
- DAG.AssignTopologicalOrder();
-
- // Direct load operands are folded in binary operations. But before folding
- // verify if this folding is legal. Fold only if it is legal otherwise
- // convert this direct load to a separate memory operation.
- if (SelectionDAGISel::IsLegalToFold(Op.getOperand(1),
- Op.getNode(), Op.getNode(),
- CodeGenOpt::Default))
- return false;
- else
- MemOp = 1;
- }
- }
- return true;
-}
-
-// LowerBinOp - Lower a commutative binary operation that does not
-// affect status flag carry.
-SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
-
- // We should have handled larger operands in type legalizer itself.
- assert (Op.getValueType() == MVT::i8 && "illegal Op to lower");
-
- unsigned MemOp = 1;
- if (NeedToConvertToMemOp(Op, MemOp, DAG)) {
- // Put one value on stack.
- SDValue NewVal = ConvertToMemOperand (Op.getOperand(MemOp), DAG, dl);
-
- return DAG.getNode(Op.getOpcode(), dl, MVT::i8, Op.getOperand(MemOp ^ 1),
- NewVal);
- }
- else {
- return Op;
- }
-}
-
-// LowerADD - Lower all types of ADD operations including the ones
-// that affects carry.
-SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
- // We should have handled larger operands in type legalizer itself.
- assert (Op.getValueType() == MVT::i8 && "illegal add to lower");
- DebugLoc dl = Op.getDebugLoc();
- unsigned MemOp = 1;
- if (NeedToConvertToMemOp(Op, MemOp, DAG)) {
- // Put one value on stack.
- SDValue NewVal = ConvertToMemOperand (Op.getOperand(MemOp), DAG, dl);
-
- // ADDC and ADDE produce two results.
- SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag);
-
- // ADDE has three operands, the last one is the carry bit.
- if (Op.getOpcode() == ISD::ADDE)
- return DAG.getNode(Op.getOpcode(), dl, Tys, Op.getOperand(MemOp ^ 1),
- NewVal, Op.getOperand(2));
- // ADDC has two operands.
- else if (Op.getOpcode() == ISD::ADDC)
- return DAG.getNode(Op.getOpcode(), dl, Tys, Op.getOperand(MemOp ^ 1),
- NewVal);
- // ADD it is. It produces only one result.
- else
- return DAG.getNode(Op.getOpcode(), dl, MVT::i8, Op.getOperand(MemOp ^ 1),
- NewVal);
- }
- else
- return Op;
-}
-
-SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
- // We should have handled larger operands in type legalizer itself.
- assert (Op.getValueType() == MVT::i8 && "illegal sub to lower");
- unsigned MemOp = 1;
- SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag);
-
- // Since we don't have an instruction for X - c ,
- // we can change it to X + (-c)
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (C && (Op.getOpcode() == ISD::SUB))
- {
- return DAG.getNode(ISD::ADD,
- dl, MVT::i8, Op.getOperand(0),
- DAG.getConstant(0-(C->getZExtValue()), MVT::i8));
- }
-
- if (NeedToConvertToMemOp(Op, MemOp, DAG) ||
- (isDirectLoad(Op.getOperand(1)) &&
- (!isDirectLoad(Op.getOperand(0))) &&
- (Op.getOperand(0).getOpcode() != ISD::Constant)))
- {
- // Put first operand on stack.
- SDValue NewVal = ConvertToMemOperand (Op.getOperand(0), DAG, dl);
-
- switch (Op.getOpcode()) {
- default:
- assert (0 && "Opcode unknown.");
- case ISD::SUBE:
- return DAG.getNode(Op.getOpcode(),
- dl, Tys, NewVal, Op.getOperand(1),
- Op.getOperand(2));
- break;
- case ISD::SUBC:
- return DAG.getNode(Op.getOpcode(),
- dl, Tys, NewVal, Op.getOperand(1));
- break;
- case ISD::SUB:
- return DAG.getNode(Op.getOpcode(),
- dl, MVT::i8, NewVal, Op.getOperand(1));
- break;
- }
- }
- else
- return Op;
-}
-
-void PIC16TargetLowering::InitReservedFrameCount(const Function *F,
- SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
- PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>();
-
- unsigned NumArgs = F->arg_size();
-
- bool isVoidFunc = (F->getReturnType()->getTypeID() == Type::VoidTyID);
-
- if (isVoidFunc)
- FuncInfo->setReservedFrameCount(NumArgs);
- else
- FuncInfo->setReservedFrameCount(NumArgs + 1);
-}
-
-// LowerFormalArguments - Argument values are loaded from the
-// <fname>.args + offset. All arguments are already broken to leaglized
-// types, so the offset just runs from 0 to NumArgVals - 1.
-
-SDValue
-PIC16TargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
- const {
- unsigned NumArgVals = Ins.size();
-
- // Get the callee's name to create the <fname>.args label to pass args.
- MachineFunction &MF = DAG.getMachineFunction();
- const Function *F = MF.getFunction();
- std::string FuncName = F->getName();
-
- // Reset the map of FI and TmpOffset
- ResetTmpOffsetMap(DAG);
- // Initialize the ReserveFrameCount
- InitReservedFrameCount(F, DAG);
-
- // Create the <fname>.args external symbol.
- const char *tmpName = ESNames::createESName(PAN::getArgsLabel(FuncName));
- SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
-
- // Load arg values from the label + offset.
- SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Other);
- SDValue BS = DAG.getConstant(1, MVT::i8);
- for (unsigned i = 0; i < NumArgVals ; ++i) {
- SDValue Offset = DAG.getConstant(i, MVT::i8);
- SDValue PICLoad = DAG.getNode(PIC16ISD::PIC16LdArg, dl, VTs, Chain, ES, BS,
- Offset);
- Chain = getChain(PICLoad);
- InVals.push_back(PICLoad);
- }
-
- return Chain;
-}
-
-// Perform DAGCombine of PIC16Load.
-// FIXME - Need a more elaborate comment here.
-SDValue PIC16TargetLowering::
-PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const {
- SelectionDAG &DAG = DCI.DAG;
- SDValue Chain = N->getOperand(0);
- if (N->hasNUsesOfValue(0, 0)) {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), Chain);
- }
- return SDValue();
-}
-
-// For all the functions with arguments some STORE nodes are generated
-// that store the argument on the frameindex. However in PIC16 the arguments
-// are passed on stack only. Therefore these STORE nodes are redundant.
-// To remove these STORE nodes will be removed in PerformStoreCombine
-//
-// Currently this function is doint nothing and will be updated for removing
-// unwanted store operations
-SDValue PIC16TargetLowering::
-PerformStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const {
- return SDValue(N, 0);
- /*
- // Storing an undef value is of no use, so remove it
- if (isStoringUndef(N, Chain, DAG)) {
- return Chain; // remove the store and return the chain
- }
- //else everything is ok.
- return SDValue(N, 0);
- */
-}
-
-SDValue PIC16TargetLowering::PerformDAGCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- switch (N->getOpcode()) {
- case ISD::STORE:
- return PerformStoreCombine(N, DCI);
- case PIC16ISD::PIC16Load:
- return PerformPIC16LoadCombine(N, DCI);
- }
- return SDValue();
-}
-
-static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) {
- switch (CC) {
- default: llvm_unreachable("Unknown condition code!");
- case ISD::SETNE: return PIC16CC::NE;
- case ISD::SETEQ: return PIC16CC::EQ;
- case ISD::SETGT: return PIC16CC::GT;
- case ISD::SETGE: return PIC16CC::GE;
- case ISD::SETLT: return PIC16CC::LT;
- case ISD::SETLE: return PIC16CC::LE;
- case ISD::SETULT: return PIC16CC::ULT;
- case ISD::SETULE: return PIC16CC::ULE;
- case ISD::SETUGE: return PIC16CC::UGE;
- case ISD::SETUGT: return PIC16CC::UGT;
- }
-}
-
-// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so
-// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
-static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
- ISD::CondCode CC, unsigned &SPCC) {
- if (isa<ConstantSDNode>(RHS) &&
- cast<ConstantSDNode>(RHS)->isNullValue() &&
- CC == ISD::SETNE &&
- (LHS.getOpcode() == PIC16ISD::SELECT_ICC &&
- LHS.getOperand(3).getOpcode() == PIC16ISD::SUBCC) &&
- isa<ConstantSDNode>(LHS.getOperand(0)) &&
- isa<ConstantSDNode>(LHS.getOperand(1)) &&
- cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
- cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
- SDValue CMPCC = LHS.getOperand(3);
- SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
- LHS = CMPCC.getOperand(0);
- RHS = CMPCC.getOperand(1);
- }
-}
-
-// Returns appropriate CMP insn and corresponding condition code in PIC16CC
-SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS,
- unsigned CC, SDValue &PIC16CC,
- SelectionDAG &DAG, DebugLoc dl) const {
- PIC16CC::CondCodes CondCode = (PIC16CC::CondCodes) CC;
-
- // PIC16 sub is literal - W. So Swap the operands and condition if needed.
- // i.e. a < 12 can be rewritten as 12 > a.
- if (RHS.getOpcode() == ISD::Constant) {
-
- SDValue Tmp = LHS;
- LHS = RHS;
- RHS = Tmp;
-
- switch (CondCode) {
- default: break;
- case PIC16CC::LT:
- CondCode = PIC16CC::GT;
- break;
- case PIC16CC::GT:
- CondCode = PIC16CC::LT;
- break;
- case PIC16CC::ULT:
- CondCode = PIC16CC::UGT;
- break;
- case PIC16CC::UGT:
- CondCode = PIC16CC::ULT;
- break;
- case PIC16CC::GE:
- CondCode = PIC16CC::LE;
- break;
- case PIC16CC::LE:
- CondCode = PIC16CC::GE;
- break;
- case PIC16CC::ULE:
- CondCode = PIC16CC::UGE;
- break;
- case PIC16CC::UGE:
- CondCode = PIC16CC::ULE;
- break;
- }
- }
-
- PIC16CC = DAG.getConstant(CondCode, MVT::i8);
-
- // These are signed comparisons.
- SDValue Mask = DAG.getConstant(128, MVT::i8);
- if (isSignedComparison(CondCode)) {
- LHS = DAG.getNode (ISD::XOR, dl, MVT::i8, LHS, Mask);
- RHS = DAG.getNode (ISD::XOR, dl, MVT::i8, RHS, Mask);
- }
-
- SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Flag);
- // We can use a subtract operation to set the condition codes. But
- // we need to put one operand in memory if required.
- // Nothing to do if the first operand is already a valid type (direct load
- // for subwf and literal for sublw) and it is used by this operation only.
- if ((LHS.getOpcode() == ISD::Constant || isDirectLoad(LHS))
- && LHS.hasOneUse())
- return DAG.getNode(PIC16ISD::SUBCC, dl, VTs, LHS, RHS);
-
- // else convert the first operand to mem.
- LHS = ConvertToMemOperand (LHS, DAG, dl);
- return DAG.getNode(PIC16ISD::SUBCC, dl, VTs, LHS, RHS);
-}
-
-
-SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
- SDValue TrueVal = Op.getOperand(2);
- SDValue FalseVal = Op.getOperand(3);
- unsigned ORIGCC = ~0;
- DebugLoc dl = Op.getDebugLoc();
-
- // If this is a select_cc of a "setcc", and if the setcc got lowered into
- // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
- // i.e.
- // A setcc: lhs, rhs, cc is expanded by llvm to
- // select_cc: result of setcc, 0, 1, 0, setne
- // We can think of it as:
- // select_cc: lhs, rhs, 1, 0, cc
- LookThroughSetCC(LHS, RHS, CC, ORIGCC);
- if (ORIGCC == ~0U) ORIGCC = IntCCToPIC16CC (CC);
-
- SDValue PIC16CC;
- SDValue Cmp = getPIC16Cmp(LHS, RHS, ORIGCC, PIC16CC, DAG, dl);
-
- return DAG.getNode (PIC16ISD::SELECT_ICC, dl, TrueVal.getValueType(), TrueVal,
- FalseVal, PIC16CC, Cmp.getValue(1));
-}
-
-MachineBasicBlock *
-PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *BB) const {
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
- unsigned CC = (PIC16CC::CondCodes)MI->getOperand(3).getImm();
- DebugLoc dl = MI->getDebugLoc();
-
- // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
- // control-flow pattern. The incoming instruction knows the destination vreg
- // to set, the condition code register to branch on, the true/false values to
- // select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // [f]bCC copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- BuildMI(BB, dl, TII.get(PIC16::pic16brcond)).addMBB(sinkMBB).addImm(CC);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
- // ...
- BB = sinkMBB;
- BuildMI(*BB, BB->begin(), dl,
- TII.get(PIC16::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-
-SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
- SDValue LHS = Op.getOperand(2); // LHS of the condition.
- SDValue RHS = Op.getOperand(3); // RHS of the condition.
- SDValue Dest = Op.getOperand(4); // BB to jump to
- unsigned ORIGCC = ~0;
- DebugLoc dl = Op.getDebugLoc();
-
- // If this is a br_cc of a "setcc", and if the setcc got lowered into
- // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
- LookThroughSetCC(LHS, RHS, CC, ORIGCC);
- if (ORIGCC == ~0U) ORIGCC = IntCCToPIC16CC (CC);
-
- // Get the Compare insn and condition code.
- SDValue PIC16CC;
- SDValue Cmp = getPIC16Cmp(LHS, RHS, ORIGCC, PIC16CC, DAG, dl);
-
- return DAG.getNode(PIC16ISD::BRCOND, dl, MVT::Other, Chain, Dest, PIC16CC,
- Cmp.getValue(1));
-}
-
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16ISelLowering.h b/contrib/llvm/lib/Target/PIC16/PIC16ISelLowering.h
deleted file mode 100644
index d942af4..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16ISelLowering.h
+++ /dev/null
@@ -1,253 +0,0 @@
-//===-- PIC16ISelLowering.h - PIC16 DAG Lowering Interface ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that PIC16 uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16ISELLOWERING_H
-#define PIC16ISELLOWERING_H
-
-#include "PIC16.h"
-#include "PIC16Subtarget.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
-#include <map>
-
-namespace llvm {
- namespace PIC16ISD {
- enum NodeType {
- // Start the numbering from where ISD NodeType finishes.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- Lo, // Low 8-bits of GlobalAddress.
- Hi, // High 8-bits of GlobalAddress.
- PIC16Load,
- PIC16LdArg, // This is replica of PIC16Load but used to load function
- // arguments and is being used for facilitating for some
- // store removal optimizations.
-
- PIC16LdWF,
- PIC16Store,
- PIC16StWF,
- Banksel,
- MTLO, // Move to low part of FSR
- MTHI, // Move to high part of FSR
- MTPCLATH, // Move to PCLATCH
- PIC16Connect, // General connector for PIC16 nodes
- BCF,
- LSLF, // PIC16 Logical shift left
- LRLF, // PIC16 Logical shift right
- RLF, // Rotate left through carry
- RRF, // Rotate right through carry
- CALL, // PIC16 Call instruction
- CALLW, // PIC16 CALLW instruction
- SUBCC, // Compare for equality or inequality.
- SELECT_ICC, // Pseudo to be caught in scheduler and expanded to brcond.
- BRCOND, // Conditional branch.
- RET, // Return.
- Dummy
- };
-
- // Keep track of different address spaces.
- enum AddressSpace {
- RAM_SPACE = 0, // RAM address space
- ROM_SPACE = 1 // ROM address space number is 1
- };
- enum PIC16Libcall {
- MUL_I8 = RTLIB::UNKNOWN_LIBCALL + 1,
- SRA_I8,
- SLL_I8,
- SRL_I8,
- PIC16UnknownCall
- };
- }
-
-
- //===--------------------------------------------------------------------===//
- // TargetLowering Implementation
- //===--------------------------------------------------------------------===//
- class PIC16TargetLowering : public TargetLowering {
- public:
- explicit PIC16TargetLowering(PIC16TargetMachine &TM);
-
- /// getTargetNodeName - This method returns the name of a target specific
- /// DAG node.
- virtual const char *getTargetNodeName(unsigned Opcode) const;
- /// getSetCCResultType - Return the ISD::SETCC ValueType
- virtual MVT::SimpleValueType getSetCCResultType(EVT ValType) const;
- virtual MVT::SimpleValueType getCmpLibcallReturnType() const;
- SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG) const;
- // Call returns
- SDValue
- LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
- SDValue
- LowerIndirectCallReturn(SDValue Chain, SDValue InFlag,
- SDValue DataAddr_Lo, SDValue DataAddr_Hi,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- // Call arguments
- SDValue
- LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const;
-
- SDValue
- LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
- SDValue DataAddr_Lo, SDValue DataAddr_Hi,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG) const;
-
- SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned OrigCC, SDValue &CC,
- SelectionDAG &DAG, DebugLoc dl) const;
- virtual MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
-
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
- virtual void ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
- virtual void LowerOperationWrapper(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
-
- virtual SDValue
- LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const;
-
- SDValue ExpandStore(SDNode *N, SelectionDAG &DAG) const;
- SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG) const;
- SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) const;
- SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG) const;
- SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) const;
-
- SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue PerformStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
- // This function returns the Tmp Offset for FrameIndex. If any TmpOffset
- // already exists for the FI then it returns the same else it creates the
- // new offset and returns.
- unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size,
- MachineFunction &MF) const;
- void ResetTmpOffsetMap(SelectionDAG &DAG) const;
- void InitReservedFrameCount(const Function *F,
- SelectionDAG &DAG) const;
-
- /// getFunctionAlignment - Return the Log2 alignment of this function.
- virtual unsigned getFunctionAlignment(const Function *) const {
- // FIXME: The function never seems to be aligned.
- return 1;
- }
- protected:
- std::pair<const TargetRegisterClass*, uint8_t>
- findRepresentativeClass(EVT VT) const;
- private:
- // If the Node is a BUILD_PAIR representing a direct Address,
- // then this function will return true.
- bool isDirectAddress(const SDValue &Op) const;
-
- // If the Node is a DirectAddress in ROM_SPACE then this
- // function will return true
- bool isRomAddress(const SDValue &Op) const;
-
- // Extract the Lo and Hi component of Op.
- void GetExpandedParts(SDValue Op, SelectionDAG &DAG, SDValue &Lo,
- SDValue &Hi) const;
-
-
- // Load pointer can be a direct or indirect address. In PIC16 direct
- // addresses need Banksel and Indirect addresses need to be loaded to
- // FSR first. Handle address specific cases here.
- void LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, SDValue &Chain,
- SDValue &NewPtr, unsigned &Offset, DebugLoc dl) const;
-
- // FrameIndex should be broken down into ExternalSymbol and FrameOffset.
- void LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, SDValue &ES,
- int &Offset) const;
-
- // For indirect calls data address of the callee frame need to be
- // extracted. This function fills the arguments DataAddr_Lo and
- // DataAddr_Hi with the address of the callee frame.
- void GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain,
- SDValue &DataAddr_Lo, SDValue &DataAddr_Hi,
- SelectionDAG &DAG) const;
-
- // We can not have both operands of a binary operation in W.
- // This function is used to put one operand on stack and generate a load.
- SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG,
- DebugLoc dl) const;
-
- // This function checks if we need to put an operand of an operation on
- // stack and generate a load or not.
- // DAG parameter is required to access DAG information during
- // analysis.
- bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
- SelectionDAG &DAG) const;
-
- /// Subtarget - Keep a pointer to the PIC16Subtarget around so that we can
- /// make the right decision when generating code for different targets.
- const PIC16Subtarget *Subtarget;
-
-
- // Extending the LIB Call framework of LLVM
- // to hold the names of PIC16Libcalls.
- const char *PIC16LibcallNames[PIC16ISD::PIC16UnknownCall];
-
- // To set and retrieve the lib call names.
- void setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, const char *Name);
- const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const;
-
- // Make PIC16 Libcall.
- SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, EVT RetVT,
- const SDValue *Ops, unsigned NumOps, bool isSigned,
- SelectionDAG &DAG, DebugLoc dl) const;
-
- // Check if operation has a direct load operand.
- inline bool isDirectLoad(const SDValue Op) const;
- };
-} // namespace llvm
-
-#endif // PIC16ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16InstrFormats.td b/contrib/llvm/lib/Target/PIC16/PIC16InstrFormats.td
deleted file mode 100644
index e213ea8..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16InstrFormats.td
+++ /dev/null
@@ -1,117 +0,0 @@
-//===- PIC16InstrFormats.td - PIC16 Instruction Formats-------*- tblgen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Describe PIC16 instructions format
-//
-// All the possible PIC16 fields are:
-//
-// opcode - operation code.
-// f - 7-bit register file address.
-// d - 1-bit direction specifier
-// k - 8/11 bit literals
-// b - 3 bits bit num specifier
-//
-//===----------------------------------------------------------------------===//
-
-// Generic PIC16 Format
-// PIC16 Instructions are 14-bit wide.
-
-// FIXME: Add Cooper Specific Formats if any.
-
-class PIC16Inst<dag outs, dag ins, string asmstr, list<dag> pattern>
- : Instruction {
- field bits<14> Inst;
-
- let Namespace = "PIC16";
- dag OutOperandList = outs;
- dag InOperandList = ins;
- let AsmString = asmstr;
- let Pattern = pattern;
-}
-
-
-//===----------------------------------------------------------------------===//
-// Byte Oriented instruction class in PIC16 : <|opcode|d|f|>
-// opcode = 6 bits.
-// d = direction = 1 bit.
-// f = file register address = 7 bits.
-//===----------------------------------------------------------------------===//
-
-class ByteFormat<bits<6> opcode, dag outs, dag ins, string asmstr,
- list<dag> pattern>
- :PIC16Inst<outs, ins, asmstr, pattern> {
- bits<1> d;
- bits<7> f;
-
- let Inst{13-8} = opcode;
-
- let Inst{7} = d;
- let Inst{6-0} = f;
-}
-
-//===----------------------------------------------------------------------===//
-// Bit Oriented instruction class in PIC16 : <|opcode|b|f|>
-// opcode = 4 bits.
-// b = bit specifier = 3 bits.
-// f = file register address = 7 bits.
-//===----------------------------------------------------------------------===//
-
-class BitFormat<bits<4> opcode, dag outs, dag ins, string asmstr,
- list<dag> pattern>
- : PIC16Inst<outs, ins, asmstr, pattern> {
- bits<3> b;
- bits<7> f;
-
- let Inst{13-10} = opcode;
-
- let Inst{9-7} = b;
- let Inst{6-0} = f;
-}
-
-//===----------------------------------------------------------------------===//
-// Literal Format instruction class in PIC16 : <|opcode|k|>
-// opcode = 6 bits
-// k = literal = 8 bits
-//===----------------------------------------------------------------------===//
-
-class LiteralFormat<bits<6> opcode, dag outs, dag ins, string asmstr,
- list<dag> pattern>
- : PIC16Inst<outs, ins, asmstr, pattern> {
- bits<8> k;
-
- let Inst{13-8} = opcode;
-
- let Inst{7-0} = k;
-}
-
-//===----------------------------------------------------------------------===//
-// Control Format instruction class in PIC16 : <|opcode|k|>
-// opcode = 3 bits.
-// k = jump address = 11 bits.
-//===----------------------------------------------------------------------===//
-
-class ControlFormat<bits<3> opcode, dag outs, dag ins, string asmstr,
- list<dag> pattern>
- : PIC16Inst<outs, ins, asmstr, pattern> {
- bits<11> k;
-
- let Inst{13-11} = opcode;
-
- let Inst{10-0} = k;
-}
-
-//===----------------------------------------------------------------------===//
-// Pseudo instruction class in PIC16
-//===----------------------------------------------------------------------===//
-
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : PIC16Inst<outs, ins, asmstr, pattern> {
- let Inst{13-6} = 0;
-}
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16InstrInfo.cpp b/contrib/llvm/lib/Target/PIC16/PIC16InstrInfo.cpp
deleted file mode 100644
index 81257f3..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ /dev/null
@@ -1,224 +0,0 @@
-//===- PIC16InstrInfo.cpp - PIC16 Instruction Information -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PIC16 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16.h"
-#include "PIC16ABINames.h"
-#include "PIC16InstrInfo.h"
-#include "PIC16TargetMachine.h"
-#include "PIC16GenInstrInfo.inc"
-#include "llvm/Function.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cstdio>
-
-
-using namespace llvm;
-
-// FIXME: Add the subtarget support on this constructor.
-PIC16InstrInfo::PIC16InstrInfo(PIC16TargetMachine &tm)
- : TargetInstrInfoImpl(PIC16Insts, array_lengthof(PIC16Insts)),
- TM(tm),
- RegInfo(*this, *TM.getSubtargetImpl()) {}
-
-
-/// isStoreToStackSlot - If the specified machine instruction is a direct
-/// store to a stack slot, return the virtual or physical register number of
-/// the source reg along with the FrameIndex of the loaded stack slot.
-/// If not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned PIC16InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- if (MI->getOpcode() == PIC16::movwf
- && MI->getOperand(0).isReg()
- && MI->getOperand(1).isSymbol()) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- return 0;
-}
-
-/// isLoadFromStackSlot - If the specified machine instruction is a direct
-/// load from a stack slot, return the virtual or physical register number of
-/// the dest reg along with the FrameIndex of the stack slot.
-/// If not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned PIC16InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- if (MI->getOpcode() == PIC16::movf
- && MI->getOperand(0).isReg()
- && MI->getOperand(1).isSymbol()) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- return 0;
-}
-
-
-void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- const PIC16TargetLowering *PTLI = TM.getTargetLowering();
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
- const Function *Func = MBB.getParent()->getFunction();
- const std::string FuncName = Func->getName();
-
- const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName));
-
- // On the order of operands here: think "movwf SrcReg, tmp_slot, offset".
- if (RC == PIC16::GPRRegisterClass) {
- //MachineFunction &MF = *MBB.getParent();
- //MachineRegisterInfo &RI = MF.getRegInfo();
- BuildMI(MBB, I, DL, get(PIC16::movwf))
- .addReg(SrcReg, getKillRegState(isKill))
- .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent()))
- .addExternalSymbol(tmpName)
- .addImm(1); // Emit banksel for it.
- }
- else if (RC == PIC16::FSR16RegisterClass) {
- // This is a 16-bit register and the frameindex given by llvm is of
- // size two here. Break this index N into two zero based indexes and
- // put one into the map. The second one is always obtained by adding 1
- // to the first zero based index. In fact it is going to use 3 slots
- // as saving FSRs corrupts W also and hence we need to save/restore W also.
-
- unsigned opcode = (SrcReg == PIC16::FSR0) ? PIC16::save_fsr0
- : PIC16::save_fsr1;
- BuildMI(MBB, I, DL, get(opcode))
- .addReg(SrcReg, getKillRegState(isKill))
- .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent()))
- .addExternalSymbol(tmpName)
- .addImm(1); // Emit banksel for it.
- }
- else
- llvm_unreachable("Can't store this register to stack slot");
-}
-
-void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- const PIC16TargetLowering *PTLI = TM.getTargetLowering();
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
- const Function *Func = MBB.getParent()->getFunction();
- const std::string FuncName = Func->getName();
-
- const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName));
-
- // On the order of operands here: think "movf FrameIndex, W".
- if (RC == PIC16::GPRRegisterClass) {
- //MachineFunction &MF = *MBB.getParent();
- //MachineRegisterInfo &RI = MF.getRegInfo();
- BuildMI(MBB, I, DL, get(PIC16::movf), DestReg)
- .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent()))
- .addExternalSymbol(tmpName)
- .addImm(1); // Emit banksel for it.
- }
- else if (RC == PIC16::FSR16RegisterClass) {
- // This is a 16-bit register and the frameindex given by llvm is of
- // size two here. Break this index N into two zero based indexes and
- // put one into the map. The second one is always obtained by adding 1
- // to the first zero based index. In fact it is going to use 3 slots
- // as saving FSRs corrupts W also and hence we need to save/restore W also.
-
- unsigned opcode = (DestReg == PIC16::FSR0) ? PIC16::restore_fsr0
- : PIC16::restore_fsr1;
- BuildMI(MBB, I, DL, get(opcode), DestReg)
- .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent()))
- .addExternalSymbol(tmpName)
- .addImm(1); // Emit banksel for it.
- }
- else
- llvm_unreachable("Can't load this register from stack slot");
-}
-
-void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
- unsigned Opc;
- if (PIC16::FSR16RegClass.contains(DestReg, SrcReg))
- Opc = PIC16::copy_fsr;
- else if (PIC16::GPRRegClass.contains(DestReg, SrcReg))
- Opc = PIC16::copy_w;
- else
- llvm_unreachable("Impossible reg-to-reg copy");
-
- BuildMI(MBB, I, DL, get(Opc), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
-}
-
-/// InsertBranch - Insert a branch into the end of the specified
-/// MachineBasicBlock. This operands to this method are the same as those
-/// returned by AnalyzeBranch. This is invoked in cases where AnalyzeBranch
-/// returns success and when an unconditional branch (TBB is non-null, FBB is
-/// null, Cond is empty) needs to be inserted. It returns the number of
-/// instructions inserted.
-unsigned PIC16InstrInfo::
-InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const {
- // Shouldn't be a fall through.
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-
- if (FBB == 0) { // One way branch.
- if (Cond.empty()) {
- // Unconditional branch?
- BuildMI(&MBB, DL, get(PIC16::br_uncond)).addMBB(TBB);
- }
- return 1;
- }
-
- // FIXME: If the there are some conditions specified then conditional branch
- // should be generated.
- // For the time being no instruction is being generated therefore
- // returning NULL.
- return 0;
-}
-
-bool PIC16InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
- return true;
-
- // Get the terminator instruction.
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return true;
- --I;
- }
- // Handle unconditional branches. If the unconditional branch's target is
- // successor basic block then remove the unconditional branch.
- if (I->getOpcode() == PIC16::br_uncond && AllowModify) {
- if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
- TBB = 0;
- I->eraseFromParent();
- }
- }
- return true;
-}
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16InstrInfo.h b/contrib/llvm/lib/Target/PIC16/PIC16InstrInfo.h
deleted file mode 100644
index 661b335..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16InstrInfo.h
+++ /dev/null
@@ -1,76 +0,0 @@
-//===- PIC16InstrInfo.h - PIC16 Instruction Information----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the niversity of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PIC16 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16INSTRUCTIONINFO_H
-#define PIC16INSTRUCTIONINFO_H
-
-#include "PIC16.h"
-#include "PIC16RegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-namespace llvm {
-
-
-class PIC16InstrInfo : public TargetInstrInfoImpl
-{
- PIC16TargetMachine &TM;
- const PIC16RegisterInfo RegInfo;
-public:
- explicit PIC16InstrInfo(PIC16TargetMachine &TM);
-
- virtual const PIC16RegisterInfo &getRegisterInfo() const { return RegInfo; }
-
- /// isLoadFromStackSlot - If the specified machine instruction is a direct
- /// load from a stack slot, return the virtual or physical register number of
- /// the destination along with the FrameIndex of the loaded stack slot. If
- /// not, return 0. This predicate must return 0 if the instruction has
- /// any side effects other than loading from the stack slot.
- virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
- /// isStoreToStackSlot - If the specified machine instruction is a direct
- /// store to a stack slot, return the virtual or physical register number of
- /// the source reg along with the FrameIndex of the loaded stack slot. If
- /// not, return 0. This predicate must return 0 if the instruction has
- /// any side effects other than storing to the stack slot.
- virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
- virtual
- unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
- };
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16InstrInfo.td b/contrib/llvm/lib/Target/PIC16/PIC16InstrInfo.td
deleted file mode 100644
index 86d36cb..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16InstrInfo.td
+++ /dev/null
@@ -1,540 +0,0 @@
-//===- PIC16InstrInfo.td - PIC16 Instruction defs -------------*- tblgen-*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the PIC16 instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// PIC16 Specific Type Constraints.
-//===----------------------------------------------------------------------===//
-class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
-class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
-
-//===----------------------------------------------------------------------===//
-// PIC16 Specific Type Profiles.
-//===----------------------------------------------------------------------===//
-
-// Generic type profiles for i8/i16 unary/binary operations.
-// Taking one i8 or i16 and producing void.
-def SDTI8VoidOp : SDTypeProfile<0, 1, [SDTCisI8<0>]>;
-def SDTI16VoidOp : SDTypeProfile<0, 1, [SDTCisI16<0>]>;
-
-// Taking one value and producing an output of same type.
-def SDTI8UnaryOp : SDTypeProfile<1, 1, [SDTCisI8<0>, SDTCisI8<1>]>;
-def SDTI16UnaryOp : SDTypeProfile<1, 1, [SDTCisI16<0>, SDTCisI16<1>]>;
-
-// Taking two values and producing an output of same type.
-def SDTI8BinOp : SDTypeProfile<1, 2, [SDTCisI8<0>, SDTCisI8<1>, SDTCisI8<2>]>;
-def SDTI16BinOp : SDTypeProfile<1, 2, [SDTCisI16<0>, SDTCisI16<1>,
- SDTCisI16<2>]>;
-
-// Node specific type profiles.
-def SDT_PIC16Load : SDTypeProfile<1, 3, [SDTCisI8<0>, SDTCisI8<1>,
- SDTCisI8<2>, SDTCisI8<3>]>;
-
-def SDT_PIC16Store : SDTypeProfile<0, 4, [SDTCisI8<0>, SDTCisI8<1>,
- SDTCisI8<2>, SDTCisI8<3>]>;
-
-def SDT_PIC16Connect : SDTypeProfile<1, 2, [SDTCisI8<0>, SDTCisI8<1>,
- SDTCisI8<2>]>;
-
-// PIC16ISD::CALL type prorile
-def SDT_PIC16call : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
-def SDT_PIC16callw : SDTypeProfile<1, -1, [SDTCisInt<0>]>;
-
-// PIC16ISD::BRCOND
-def SDT_PIC16Brcond: SDTypeProfile<0, 2,
- [SDTCisVT<0, OtherVT>, SDTCisI8<1>]>;
-
-// PIC16ISD::BRCOND
-def SDT_PIC16Selecticc: SDTypeProfile<1, 3,
- [SDTCisI8<0>, SDTCisI8<1>, SDTCisI8<2>,
- SDTCisI8<3>]>;
-
-//===----------------------------------------------------------------------===//
-// PIC16 addressing modes matching via DAG.
-//===----------------------------------------------------------------------===//
-def diraddr : ComplexPattern<i8, 1, "SelectDirectAddr", [], []>;
-
-//===----------------------------------------------------------------------===//
-// PIC16 Specific Node Definitions.
-//===----------------------------------------------------------------------===//
-def PIC16callseq_start : SDNode<"ISD::CALLSEQ_START", SDTI8VoidOp,
- [SDNPHasChain, SDNPOutFlag]>;
-def PIC16callseq_end : SDNode<"ISD::CALLSEQ_END", SDTI8VoidOp,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-
-// Low 8-bits of GlobalAddress.
-def PIC16Lo : SDNode<"PIC16ISD::Lo", SDTI8BinOp>;
-
-// High 8-bits of GlobalAddress.
-def PIC16Hi : SDNode<"PIC16ISD::Hi", SDTI8BinOp>;
-
-// The MTHI and MTLO nodes are used only to match them in the incoming
-// DAG for replacement by corresponding set_fsrhi, set_fsrlo insntructions.
-// These nodes are not used for defining any instructions.
-def MTLO : SDNode<"PIC16ISD::MTLO", SDTI8UnaryOp>;
-def MTHI : SDNode<"PIC16ISD::MTHI", SDTI8UnaryOp>;
-def MTPCLATH : SDNode<"PIC16ISD::MTPCLATH", SDTI8UnaryOp>;
-
-// Node to generate Bank Select for a GlobalAddress.
-def Banksel : SDNode<"PIC16ISD::Banksel", SDTI8UnaryOp>;
-
-// Node to match a direct store operation.
-def PIC16Store : SDNode<"PIC16ISD::PIC16Store", SDT_PIC16Store, [SDNPHasChain]>;
-def PIC16StWF : SDNode<"PIC16ISD::PIC16StWF", SDT_PIC16Store,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
-
-// Node to match a direct load operation.
-def PIC16Load : SDNode<"PIC16ISD::PIC16Load", SDT_PIC16Load, [SDNPHasChain]>;
-def PIC16LdArg : SDNode<"PIC16ISD::PIC16LdArg", SDT_PIC16Load, [SDNPHasChain]>;
-def PIC16LdWF : SDNode<"PIC16ISD::PIC16LdWF", SDT_PIC16Load,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
-def PIC16Connect: SDNode<"PIC16ISD::PIC16Connect", SDT_PIC16Connect, []>;
-
-// Node to match PIC16 call
-def PIC16call : SDNode<"PIC16ISD::CALL", SDT_PIC16call,
- [SDNPHasChain , SDNPOptInFlag, SDNPOutFlag]>;
-def PIC16callw : SDNode<"PIC16ISD::CALLW", SDT_PIC16callw,
- [SDNPHasChain , SDNPOptInFlag, SDNPOutFlag]>;
-
-// Node to match a comparison instruction.
-def PIC16Subcc : SDNode<"PIC16ISD::SUBCC", SDTI8BinOp, [SDNPOutFlag]>;
-
-// Node to match a conditional branch.
-def PIC16Brcond : SDNode<"PIC16ISD::BRCOND", SDT_PIC16Brcond,
- [SDNPHasChain, SDNPInFlag]>;
-
-def PIC16Selecticc : SDNode<"PIC16ISD::SELECT_ICC", SDT_PIC16Selecticc,
- [SDNPInFlag]>;
-
-def PIC16ret : SDNode<"PIC16ISD::RET", SDTNone, [SDNPHasChain]>;
-
-//===----------------------------------------------------------------------===//
-// PIC16 Operand Definitions.
-//===----------------------------------------------------------------------===//
-def i8mem : Operand<i8>;
-def brtarget: Operand<OtherVT>;
-
-// Operand for printing out a condition code.
-let PrintMethod = "printCCOperand" in
- def CCOp : Operand<i8>;
-
-include "PIC16InstrFormats.td"
-
-//===----------------------------------------------------------------------===//
-// PIC16 Common Classes.
-//===----------------------------------------------------------------------===//
-
-// W = W Op F : Load the value from F and do Op to W.
-let Constraints = "$src = $dst", mayLoad = 1 in
-class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
- ByteFormat<OpCode, (outs GPR:$dst),
- (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
- !strconcat(OpcStr, " $ptrlo + $offset, W"),
- [(set GPR:$dst, (OpNode GPR:$src, (PIC16Load diraddr:$ptrlo,
- (i8 imm:$ptrhi),
- (i8 imm:$offset))))]>;
-
-// F = F Op W : Load the value from F, do op with W and store in F.
-// This insn class is not marked as TwoAddress because the reg is
-// being used as a source operand only. (Remember a TwoAddress insn
-// needs a copy.)
-let mayStore = 1 in
-class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>:
- ByteFormat<OpCode, (outs),
- (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
- !strconcat(OpcStr, " $ptrlo + $offset, F"),
- [(PIC16Store (OpNode GPR:$src, (PIC16Load diraddr:$ptrlo,
- (i8 imm:$ptrhi),
- (i8 imm:$offset))),
- diraddr:$ptrlo,
- (i8 imm:$ptrhi), (i8 imm:$offset)
- )]>;
-
-// W = W Op L : Do Op of L with W and place result in W.
-let Constraints = "$src = $dst" in
-class BinOpWL<bits<6> opcode, string OpcStr, SDNode OpNode> :
- LiteralFormat<opcode, (outs GPR:$dst),
- (ins GPR:$src, i8imm:$literal),
- !strconcat(OpcStr, " $literal"),
- [(set GPR:$dst, (OpNode GPR:$src, (i8 imm:$literal)))]>;
-
-//===----------------------------------------------------------------------===//
-// PIC16 Instructions.
-//===----------------------------------------------------------------------===//
-
-// Pseudo-instructions.
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i8imm:$amt),
- "!ADJCALLSTACKDOWN $amt",
- [(PIC16callseq_start imm:$amt)]>;
-
-def ADJCALLSTACKUP : Pseudo<(outs), (ins i8imm:$amt),
- "!ADJCALLSTACKUP $amt",
- [(PIC16callseq_end imm:$amt)]>;
-
-//-----------------------------------
-// Vaious movlw insn patterns.
-//-----------------------------------
-let isReMaterializable = 1 in {
-// Move 8-bit literal to W.
-def movlw : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src),
- "movlw $src",
- [(set GPR:$dst, (i8 imm:$src))]>;
-
-// Move a Lo(TGA) to W.
-def movlw_lo_1 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
- "movlw LOW(${src} + ${src2})",
- [(set GPR:$dst, (PIC16Lo tglobaladdr:$src, imm:$src2 ))]>;
-
-// Move a Lo(TES) to W.
-def movlw_lo_2 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
- "movlw LOW(${src} + ${src2})",
- [(set GPR:$dst, (PIC16Lo texternalsym:$src, imm:$src2 ))]>;
-
-// Move a Hi(TGA) to W.
-def movlw_hi_1 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
- "movlw HIGH(${src} + ${src2})",
- [(set GPR:$dst, (PIC16Hi tglobaladdr:$src, imm:$src2))]>;
-
-// Move a Hi(TES) to W.
-def movlw_hi_2 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
- "movlw HIGH(${src} + ${src2})",
- [(set GPR:$dst, (PIC16Hi texternalsym:$src, imm:$src2))]>;
-}
-
-//-------------------
-// FSR setting insns.
-//-------------------
-// These insns are matched via a DAG replacement pattern.
-def set_fsrlo:
- ByteFormat<0, (outs FSR16:$fsr),
- (ins GPR:$val),
- "movwf ${fsr}L",
- []>;
-
-let Constraints = "$src = $dst" in
-def set_fsrhi:
- ByteFormat<0, (outs FSR16:$dst),
- (ins FSR16:$src, GPR:$val),
- "movwf ${dst}H",
- []>;
-
-def set_pclath:
- ByteFormat<0, (outs PCLATHR:$dst),
- (ins GPR:$val),
- "movwf ${dst}",
- [(set PCLATHR:$dst , (MTPCLATH GPR:$val))]>;
-
-//----------------------------
-// copyPhysReg
-// copyPhysReg insns. These are dummy. They should always be deleted
-// by the optimizer and never be present in the final generated code.
-// if they are, then we have to write correct macros for these insns.
-//----------------------------
-def copy_fsr:
- Pseudo<(outs FSR16:$dst), (ins FSR16:$src), "copy_fsr $dst, $src", []>;
-
-def copy_w:
- Pseudo<(outs GPR:$dst), (ins GPR:$src), "copy_w $dst, $src", []>;
-
-class SAVE_FSR<string OpcStr>:
- Pseudo<(outs),
- (ins FSR16:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
- !strconcat(OpcStr, " $ptrlo, $offset"),
- []>;
-
-def save_fsr0: SAVE_FSR<"save_fsr0">;
-def save_fsr1: SAVE_FSR<"save_fsr1">;
-
-class RESTORE_FSR<string OpcStr>:
- Pseudo<(outs FSR16:$dst),
- (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
- !strconcat(OpcStr, " $ptrlo, $offset"),
- []>;
-
-def restore_fsr0: RESTORE_FSR<"restore_fsr0">;
-def restore_fsr1: RESTORE_FSR<"restore_fsr1">;
-
-//--------------------------
-// Store to memory
-//-------------------------
-
-// Direct store.
-// Input operands are: val = W, ptrlo = GA, offset = offset, ptrhi = banksel.
-let mayStore = 1 in
-class MOVWF_INSN<bits<6> OpCode, SDNode OpNodeDest, SDNode Op>:
- ByteFormat<0, (outs),
- (ins GPR:$val, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
- "movwf ${ptrlo} + ${offset}",
- [(Op GPR:$val, OpNodeDest:$ptrlo, (i8 imm:$ptrhi),
- (i8 imm:$offset))]>;
-
-// Store W to a Global Address.
-def movwf : MOVWF_INSN<0, tglobaladdr, PIC16Store>;
-
-// Store W to an External Symobol.
-def movwf_1 : MOVWF_INSN<0, texternalsym, PIC16Store>;
-
-// Store with InFlag and OutFlag
-// This is same as movwf_1 but has a flag. A flag is required to
-// order the stores while passing the params to function.
-def movwf_2 : MOVWF_INSN<0, texternalsym, PIC16StWF>;
-
-// Indirect store. Matched via a DAG replacement pattern.
-def store_indirect :
- ByteFormat<0, (outs),
- (ins GPR:$val, FSR16:$fsr, i8imm:$offset),
- "movwi $offset[$fsr]",
- []>;
-
-//----------------------------
-// Load from memory
-//----------------------------
-// Direct load.
-// Input Operands are: ptrlo = GA, offset = offset, ptrhi = banksel.
-// Output: dst = W
-let Defs = [STATUS], mayLoad = 1 in
-class MOVF_INSN<bits<6> OpCode, SDNode OpNodeSrc, SDNode Op>:
- ByteFormat<0, (outs GPR:$dst),
- (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
- "movf ${ptrlo} + ${offset}, W",
- [(set GPR:$dst,
- (Op OpNodeSrc:$ptrlo, (i8 imm:$ptrhi),
- (i8 imm:$offset)))]>;
-
-// Load from a GA.
-def movf : MOVF_INSN<0, tglobaladdr, PIC16Load>;
-
-// Load from an ES.
-def movf_1 : MOVF_INSN<0, texternalsym, PIC16Load>;
-def movf_1_1 : MOVF_INSN<0, texternalsym, PIC16LdArg>;
-
-// Load with InFlag and OutFlag
-// This is same as movf_1 but has a flag. A flag is required to
-// order the loads while copying the return value of a function.
-def movf_2 : MOVF_INSN<0, texternalsym, PIC16LdWF>;
-
-// Indirect load. Matched via a DAG replacement pattern.
-def load_indirect :
- ByteFormat<0, (outs GPR:$dst),
- (ins FSR16:$fsr, i8imm:$offset),
- "moviw $offset[$fsr]",
- []>;
-
-//-------------------------
-// Bitwise operations patterns
-//--------------------------
-// W = W op [F]
-let Defs = [STATUS] in {
-def OrFW : BinOpFW<0, "iorwf", or>;
-def XOrFW : BinOpFW<0, "xorwf", xor>;
-def AndFW : BinOpFW<0, "andwf", and>;
-
-// F = W op [F]
-def OrWF : BinOpWF<0, "iorwf", or>;
-def XOrWF : BinOpWF<0, "xorwf", xor>;
-def AndWF : BinOpWF<0, "andwf", and>;
-
-//-------------------------
-// Various add/sub patterns.
-//-------------------------
-
-// W = W + [F]
-def addfw_1: BinOpFW<0, "addwf", add>;
-def addfw_2: BinOpFW<0, "addwf", addc>;
-
-let Uses = [STATUS] in
-def addfwc: BinOpFW<0, "addwfc", adde>; // With Carry.
-
-// F = W + [F]
-def addwf_1: BinOpWF<0, "addwf", add>;
-def addwf_2: BinOpWF<0, "addwf", addc>;
-let Uses = [STATUS] in
-def addwfc: BinOpWF<0, "addwfc", adde>; // With Carry.
-}
-
-// W -= [F] ; load from F and sub the value from W.
-let Constraints = "$src = $dst", mayLoad = 1 in
-class SUBFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
- ByteFormat<OpCode, (outs GPR:$dst),
- (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
- !strconcat(OpcStr, " $ptrlo + $offset, W"),
- [(set GPR:$dst, (OpNode (PIC16Load diraddr:$ptrlo,
- (i8 imm:$ptrhi), (i8 imm:$offset)),
- GPR:$src))]>;
-let Defs = [STATUS] in {
-def subfw_1: SUBFW<0, "subwf", sub>;
-def subfw_2: SUBFW<0, "subwf", subc>;
-
-let Uses = [STATUS] in
-def subfwb: SUBFW<0, "subwfb", sube>; // With Borrow.
-
-}
-let Defs = [STATUS], isTerminator = 1 in
-def subfw_cc: SUBFW<0, "subwf", PIC16Subcc>;
-
-// [F] -= W ;
-let mayStore = 1 in
-class SUBWF<bits<6> OpCode, string OpcStr, SDNode OpNode>:
- ByteFormat<OpCode, (outs),
- (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
- !strconcat(OpcStr, " $ptrlo + $offset"),
- [(PIC16Store (OpNode (PIC16Load diraddr:$ptrlo,
- (i8 imm:$ptrhi), (i8 imm:$offset)),
- GPR:$src), diraddr:$ptrlo,
- (i8 imm:$ptrhi), (i8 imm:$offset))]>;
-
-let Defs = [STATUS] in {
-def subwf_1: SUBWF<0, "subwf", sub>;
-def subwf_2: SUBWF<0, "subwf", subc>;
-
-let Uses = [STATUS] in
- def subwfb: SUBWF<0, "subwfb", sube>; // With Borrow.
-
-def subwf_cc: SUBWF<0, "subwf", PIC16Subcc>;
-}
-
-// addlw
-let Defs = [STATUS] in {
-def addlw_1 : BinOpWL<0, "addlw", add>;
-def addlw_2 : BinOpWL<0, "addlw", addc>;
-
-let Uses = [STATUS] in
-def addlwc : BinOpWL<0, "addlwc", adde>; // With Carry. (Assembler macro).
-
-// bitwise operations involving a literal and w.
-def andlw : BinOpWL<0, "andlw", and>;
-def xorlw : BinOpWL<0, "xorlw", xor>;
-def orlw : BinOpWL<0, "iorlw", or>;
-}
-
-// sublw
-// W = C - W ; sub W from literal. (Without borrow).
-let Constraints = "$src = $dst" in
-class SUBLW<bits<6> opcode, string OpcStr, SDNode OpNode> :
- LiteralFormat<opcode, (outs GPR:$dst),
- (ins GPR:$src, i8imm:$literal),
- !strconcat(OpcStr, " $literal"),
- [(set GPR:$dst, (OpNode (i8 imm:$literal), GPR:$src))]>;
-// subwl
-// W = W - C ; sub literal from W (Without borrow).
-let Constraints = "$src = $dst" in
-class SUBWL<bits<6> opcode, string OpcStr, SDNode OpNode> :
- LiteralFormat<opcode, (outs GPR:$dst),
- (ins GPR:$src, i8imm:$literal),
- !strconcat(OpcStr, " $literal"),
- [(set GPR:$dst, (OpNode GPR:$src, (i8 imm:$literal)))]>;
-
-let Defs = [STATUS] in {
-def sublw_1 : SUBLW<0, "sublw", sub>;
-def sublw_2 : SUBLW<0, "sublw", subc>;
-def sublw_3 : SUBLW<0, "sublwb", sube>; // With borrow (Assembler macro).
-
-def sublw_4 : SUBWL<0, "subwl", sub>; // Assembler macro replace with addlw
-def sublw_5 : SUBWL<0, "subwl", subc>; // Assembler macro replace with addlw
-def sublw_6 : SUBWL<0, "subwlb", sube>; // With borrow (Assembler macro).
-}
-let Defs = [STATUS], isTerminator = 1 in
-def sublw_cc : SUBLW<0, "sublw", PIC16Subcc>;
-
-// Call instruction.
-let isCall = 1,
- Defs = [W, FSR0, FSR1] in {
- def CALL: LiteralFormat<0x1, (outs), (ins i8imm:$func),
- //"call ${func} + 2",
- "call ${func}",
- [(PIC16call diraddr:$func)]>;
-}
-
-let isCall = 1,
- Defs = [W, FSR0, FSR1] in {
- def CALL_1: LiteralFormat<0x1, (outs), (ins GPR:$func, PCLATHR:$pc),
- "callw",
- [(PIC16call (PIC16Connect GPR:$func, PCLATHR:$pc))]>;
-}
-
-let isCall = 1,
- Defs = [FSR0, FSR1] in {
- def CALLW: LiteralFormat<0x1, (outs GPR:$dest),
- (ins GPR:$func, PCLATHR:$pc),
- "callw",
- [(set GPR:$dest, (PIC16callw (PIC16Connect GPR:$func, PCLATHR:$pc)))]>;
-}
-
-let Uses = [STATUS], isBranch = 1, isTerminator = 1, hasDelaySlot = 0 in
-def pic16brcond: ControlFormat<0x0, (outs), (ins brtarget:$dst, CCOp:$cc),
- "b$cc $dst",
- [(PIC16Brcond bb:$dst, imm:$cc)]>;
-
-// Unconditional branch.
-let isBranch = 1, isTerminator = 1, hasDelaySlot = 0 in
-def br_uncond: ControlFormat<0x0, (outs), (ins brtarget:$dst),
- "goto $dst",
- [(br bb:$dst)]>;
-
-// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
-// instruction selection into a branch sequence.
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
- def SELECT_CC_Int_ICC
- : Pseudo<(outs GPR:$dst), (ins GPR:$T, GPR:$F, i8imm:$Cond),
- "; SELECT_CC_Int_ICC PSEUDO!",
- [(set GPR:$dst, (PIC16Selecticc GPR:$T, GPR:$F,
- imm:$Cond))]>;
-}
-
-
-// Banksel.
-def banksel :
- Pseudo<(outs),
- (ins i8mem:$ptr),
- "banksel $ptr",
- []>;
-
-def pagesel :
- Pseudo<(outs),
- (ins i8mem:$ptr),
- "movlp $ptr",
- []>;
-
-
-// Return insn.
-let isTerminator = 1, isBarrier = 1, isReturn = 1 in
-def Return :
- ControlFormat<0, (outs), (ins), "return", [(PIC16ret)]>;
-
-//===----------------------------------------------------------------------===//
-// PIC16 Replacment Patterns.
-//===----------------------------------------------------------------------===//
-
-// Identify an indirect store and select insns for it.
-def : Pat<(PIC16Store GPR:$val, (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr),
- imm:$offset),
- (store_indirect GPR:$val,
- (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
- imm:$offset)>;
-
-def : Pat<(PIC16StWF GPR:$val, (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr),
- imm:$offset),
- (store_indirect GPR:$val,
- (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
- imm:$offset)>;
-
-// Identify an indirect load and select insns for it.
-def : Pat<(PIC16Load (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr),
- imm:$offset),
- (load_indirect (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
- imm:$offset)>;
-
-def : Pat<(PIC16LdWF (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr),
- imm:$offset),
- (load_indirect (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
- imm:$offset)>;
-
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16MCAsmInfo.cpp b/contrib/llvm/lib/Target/PIC16/PIC16MCAsmInfo.cpp
deleted file mode 100644
index 1bcc497..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16MCAsmInfo.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- PIC16MCAsmInfo.cpp - PIC16 asm properties -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the PIC16MCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16MCAsmInfo.h"
-
-// FIXME: Layering violation to get enums and static function, should be moved
-// to separate headers.
-#include "PIC16.h"
-#include "PIC16ABINames.h"
-#include "PIC16ISelLowering.h"
-using namespace llvm;
-
-PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, StringRef TT) {
- CommentString = ";";
- GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
- GlobalDirective = "\tglobal\t";
- ExternDirective = "\textern\t";
-
- Data8bitsDirective = " db ";
- Data16bitsDirective = " dw ";
- Data32bitsDirective = " dl ";
- Data64bitsDirective = NULL;
- ZeroDirective = NULL;
- AsciiDirective = " dt ";
- AscizDirective = NULL;
-
- RomData8bitsDirective = " dw ";
- RomData16bitsDirective = " rom_di ";
- RomData32bitsDirective = " rom_dl ";
- HasSetDirective = false;
-
- // Set it to false because we weed to generate c file name and not bc file
- // name.
- HasSingleParameterDotFile = false;
-}
-
-const char *PIC16MCAsmInfo::getDataASDirective(unsigned Size,
- unsigned AS) const {
- if (AS != PIC16ISD::ROM_SPACE)
- return 0;
-
- switch (Size) {
- case 8: return RomData8bitsDirective;
- case 16: return RomData16bitsDirective;
- case 32: return RomData32bitsDirective;
- default: return NULL;
- }
-}
-
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16MCAsmInfo.h b/contrib/llvm/lib/Target/PIC16/PIC16MCAsmInfo.h
deleted file mode 100644
index 6e1c111..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16MCAsmInfo.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//=====-- PIC16MCAsmInfo.h - PIC16 asm properties -------------*- C++ -*--====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the PIC16MCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16TARGETASMINFO_H
-#define PIC16TARGETASMINFO_H
-
-#include "llvm/MC/MCAsmInfo.h"
-
-namespace llvm {
- class Target;
- class StringRef;
-
- class PIC16MCAsmInfo : public MCAsmInfo {
- const char *RomData8bitsDirective;
- const char *RomData16bitsDirective;
- const char *RomData32bitsDirective;
- public:
- PIC16MCAsmInfo(const Target &T, StringRef TT);
-
- virtual const char *getDataASDirective(unsigned size, unsigned AS) const;
- };
-
-} // namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16MachineFunctionInfo.h b/contrib/llvm/lib/Target/PIC16/PIC16MachineFunctionInfo.h
deleted file mode 100644
index bdf5086..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16MachineFunctionInfo.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//====- PIC16MachineFuctionInfo.h - PIC16 machine function info -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares PIC16-specific per-machine-function information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16MACHINEFUNCTIONINFO_H
-#define PIC16MACHINEFUNCTIONINFO_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-/// PIC16MachineFunctionInfo - This class is derived from MachineFunction
-/// private PIC16 target-specific information for each MachineFunction.
-class PIC16MachineFunctionInfo : public MachineFunctionInfo {
- // The frameindexes generated for spill/reload are stack based.
- // This maps maintain zero based indexes for these FIs.
- std::map<unsigned, unsigned> FiTmpOffsetMap;
- unsigned TmpSize;
-
- // These are the frames for return value and argument passing
- // These FrameIndices will be expanded to foo.frame external symbol
- // and all others will be expanded to foo.tmp external symbol.
- unsigned ReservedFrameCount;
-
-public:
- PIC16MachineFunctionInfo()
- : TmpSize(0), ReservedFrameCount(0) {}
-
- explicit PIC16MachineFunctionInfo(MachineFunction &MF)
- : TmpSize(0), ReservedFrameCount(0) {}
-
- std::map<unsigned, unsigned> &getFiTmpOffsetMap() { return FiTmpOffsetMap; }
-
- unsigned getTmpSize() const { return TmpSize; }
- void setTmpSize(unsigned Size) { TmpSize = Size; }
-
- unsigned getReservedFrameCount() const { return ReservedFrameCount; }
- void setReservedFrameCount(unsigned Count) { ReservedFrameCount = Count; }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16MemSelOpt.cpp b/contrib/llvm/lib/Target/PIC16/PIC16MemSelOpt.cpp
deleted file mode 100644
index b6aa38f..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ /dev/null
@@ -1,254 +0,0 @@
-//===-- PIC16MemSelOpt.cpp - PIC16 banksel optimizer --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the pass which optimizes the emitting of banksel
-// instructions before accessing data memory. This currently works within
-// a basic block only and keep tracks of the last accessed memory bank.
-// If memory access continues to be in the same bank it just makes banksel
-// immediate, which is a part of the insn accessing the data memory, from 1
-// to zero. The asm printer emits a banksel only if that immediate is 1.
-//
-// FIXME: this is not implemented yet. The banksel pass only works on local
-// basic blocks.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pic16-codegen"
-#include "PIC16.h"
-#include "PIC16ABINames.h"
-#include "PIC16InstrInfo.h"
-#include "PIC16MCAsmInfo.h"
-#include "PIC16TargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/DerivedTypes.h"
-
-using namespace llvm;
-
-namespace {
- struct MemSelOpt : public MachineFunctionPass {
- static char ID;
- MemSelOpt() : MachineFunctionPass(ID) {}
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {
- return "PIC16 Memsel Optimizer";
- }
-
- bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
- bool processInstruction(MachineInstr *MI);
-
- private:
- const TargetInstrInfo *TII; // Machine instruction info.
- MachineBasicBlock *MBB; // Current basic block
- std::string CurBank;
- int PageChanged;
-
- };
- char MemSelOpt::ID = 0;
-}
-
-FunctionPass *llvm::createPIC16MemSelOptimizerPass() {
- return new MemSelOpt();
-}
-
-
-/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
-/// register references into FP stack references.
-///
-bool MemSelOpt::runOnMachineFunction(MachineFunction &MF) {
- TII = MF.getTarget().getInstrInfo();
- bool Changed = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end();
- I != E; ++I) {
- Changed |= processBasicBlock(MF, *I);
- }
-
- return Changed;
-}
-
-/// processBasicBlock - Loop over all of the instructions in the basic block,
-/// transforming FP instructions into their stack form.
-///
-bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
- bool Changed = false;
- MBB = &BB;
-
- // Let us assume that when entering a basic block now bank is selected.
- // Ideally we should look at the predecessors for this information.
- CurBank="";
- PageChanged=0;
-
- MachineBasicBlock::iterator I;
- for (I = BB.begin(); I != BB.end(); ++I) {
- Changed |= processInstruction(I);
-
- // if the page has changed insert a page sel before
- // any instruction that needs one
- if (PageChanged == 1)
- {
- // Restore the page if it was changed, before leaving the basic block,
- // because it may be required by the goto terminator or the fall thru
- // basic blcok.
- // If the terminator is return, we don't need to restore since there
- // is no goto or fall thru basic block.
- if ((I->getOpcode() == PIC16::sublw_3) || //macro has goto
- (I->getOpcode() == PIC16::sublw_6) || //macro has goto
- (I->getOpcode() == PIC16::addlwc) || //macro has goto
- (TII->get(I->getOpcode()).isBranch()))
- {
- DebugLoc dl = I->getDebugLoc();
- BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$");
- Changed = true;
- PageChanged = 0;
- }
- }
- }
-
- // The basic block is over, but if we did not find any goto yet,
- // we haven't restored the page.
- // Restore the page if it was changed, before leaving the basic block,
- // because it may be required by fall thru basic blcok.
- // If the terminator is return, we don't need to restore since there
- // is fall thru basic block.
- if (PageChanged == 1) {
- // save the end pointer before we move back to last insn.
- MachineBasicBlock::iterator J = I;
- I--;
- const TargetInstrDesc &TID = TII->get(I->getOpcode());
- if (! TID.isReturn())
- {
- DebugLoc dl = I->getDebugLoc();
- BuildMI(*MBB, J, dl,
- TII->get(PIC16::pagesel)).addExternalSymbol("$");
- Changed = true;
- PageChanged = 0;
- }
- }
-
-
- return Changed;
-}
-
-bool MemSelOpt::processInstruction(MachineInstr *MI) {
- bool Changed = false;
-
- unsigned NumOperands = MI->getNumOperands();
- if (NumOperands == 0) return false;
-
-
- // If this insn is not going to access any memory, return.
- const TargetInstrDesc &TID = TII->get(MI->getOpcode());
- if (!(TID.isBranch() || TID.isCall() || TID.mayLoad() || TID.mayStore()))
- return false;
-
- // The first thing we should do is that record if banksel/pagesel are
- // changed in an unknown way. This can happend via any type of call.
- // We do it here first before scanning of MemOp / BBOp as the indirect
- // call insns do not have any operands, but they still may change bank/page.
- if (TID.isCall()) {
- // Record that we have changed the page, so that we can restore it
- // before basic block ends.
- // We require to signal that a page anc bank change happened even for
- // indirect calls.
- PageChanged = 1;
-
- // When a call is made, there may be banksel for variables in callee.
- // Hence the banksel in caller needs to be reset.
- CurBank = "";
- }
-
- // Scan for the memory address operand.
- // FIXME: Should we use standard interfaces like memoperands_iterator,
- // hasMemOperand() etc ?
- int MemOpPos = -1;
- int BBOpPos = -1;
- for (unsigned i = 0; i < NumOperands; i++) {
- MachineOperand Op = MI->getOperand(i);
- if (Op.getType() == MachineOperand::MO_GlobalAddress ||
- Op.getType() == MachineOperand::MO_ExternalSymbol) {
- // We found one mem operand. Next one may be BS.
- MemOpPos = i;
- }
- if (Op.getType() == MachineOperand::MO_MachineBasicBlock) {
- // We found one BB operand. Next one may be pagesel.
- BBOpPos = i;
- }
- }
-
- // If we did not find an insn accessing memory. Continue.
- if ((MemOpPos == -1) &&
- (BBOpPos == -1))
- return false;
- assert ((BBOpPos != MemOpPos) && "operand can only be of one type");
-
-
- // If this is a pagesel material, handle it first.
- // CALL and br_ucond insns use MemOp (GA or ES) and not BBOp.
- // Pagesel is required only for a direct call.
- if ((MI->getOpcode() == PIC16::CALL)) {
- // Get the BBOp.
- MachineOperand &MemOp = MI->getOperand(MemOpPos);
- DebugLoc dl = MI->getDebugLoc();
- BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).addOperand(MemOp);
-
- // CALL and br_ucond needs only pagesel. so we are done.
- return true;
- }
-
- // Pagesel is handled. Now, add a Banksel if needed.
- if (MemOpPos == -1) return Changed;
- // Get the MemOp.
- MachineOperand &Op = MI->getOperand(MemOpPos);
-
- // Get the section name(NewBank) for MemOp.
- // This assumes that the section names for globals are already set by
- // AsmPrinter->doInitialization.
- std::string NewBank = CurBank;
- bool hasExternalLinkage = false;
- if (Op.getType() == MachineOperand::MO_GlobalAddress &&
- Op.getGlobal()->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) {
- if (Op.getGlobal()->hasExternalLinkage())
- hasExternalLinkage= true;
- NewBank = Op.getGlobal()->getSection();
- } else if (Op.getType() == MachineOperand::MO_ExternalSymbol) {
- // External Symbol is generated for temp data and arguments. They are
- // in fpdata.<functionname>.# section.
- std::string Sym = Op.getSymbolName();
- NewBank = PAN::getSectionNameForSym(Sym);
- }
-
- // If the section is shared section, do not emit banksel.
- if (NewBank == PAN::getSharedUDataSectionName())
- return Changed;
-
- // If the previous and new section names are same, we don't need to
- // emit banksel.
- if (NewBank.compare(CurBank) != 0 || hasExternalLinkage) {
- DebugLoc dl = MI->getDebugLoc();
- BuildMI(*MBB, MI, dl, TII->get(PIC16::banksel)).
- addOperand(Op);
- Changed = true;
- CurBank = NewBank;
- }
-
- return Changed;
-}
-
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
deleted file mode 100644
index 56f0211..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
-//===-- PIC16Cloner.cpp - PIC16 LLVM Cloner for shared functions -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains code to clone all functions that are shared between
-// the main line code (ML) and interrupt line code (IL). It clones all such
-// shared functions and their automatic global vars by adding the .IL suffix.
-//
-// This pass is supposed to be run on the linked .bc module.
-// It traveses the module call graph twice. Once starting from the main function
-// and marking each reached function as "ML". Again, starting from the ISR
-// and cloning any reachable function that was marked as "ML". After cloning
-// the function, it remaps all the call sites in IL functions to call the
-// cloned functions.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Pass.h"
-#include "llvm/Module.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "PIC16Cloner.h"
-#include "../PIC16ABINames.h"
-#include <vector>
-
-using namespace llvm;
-using std::vector;
-using std::string;
-using std::map;
-
-namespace llvm {
- char PIC16Cloner::ID = 0;
-
- ModulePass *createPIC16ClonerPass() { return new PIC16Cloner(); }
-}
-
-// We currently intend to run these passes in opt, which does not have any
-// diagnostic support. So use these functions for now. In future
-// we will probably write our own driver tool.
-//
-void PIC16Cloner::reportError(string ErrorString) {
- errs() << "ERROR : " << ErrorString << "\n";
- exit(1);
-}
-
-void PIC16Cloner::
-reportError (string ErrorString, vector<string> &Values) {
- unsigned ValCount = Values.size();
- string TargetString;
- for (unsigned i=0; i<ValCount; ++i) {
- TargetString = "%";
- TargetString += ((char)i + '0');
- ErrorString.replace(ErrorString.find(TargetString), TargetString.length(),
- Values[i]);
- }
- errs() << "ERROR : " << ErrorString << "\n";
- exit(1);
-}
-
-
-// Entry point
-//
-bool PIC16Cloner::runOnModule(Module &M) {
- CallGraph &CG = getAnalysis<CallGraph>();
-
- // Search for the "main" and "ISR" functions.
- CallGraphNode *mainCGN = NULL, *isrCGN = NULL;
- for (CallGraph::iterator it = CG.begin() ; it != CG.end(); it++)
- {
- // External calling node doesn't have any function associated with it.
- if (! it->first)
- continue;
-
- if (it->first->getName().str() == "main") {
- mainCGN = it->second;
- }
-
- if (PAN::isISR(it->first->getSection())) {
- isrCGN = it->second;
- }
-
- // Don't search further if we've found both.
- if (mainCGN && isrCGN)
- break;
- }
-
- // We have nothing to do if any of the main or ISR is missing.
- if (! mainCGN || ! isrCGN) return false;
-
- // Time for some diagnostics.
- // See if the main itself is interrupt function then report an error.
- if (PAN::isISR(mainCGN->getFunction()->getSection())) {
- reportError("Function 'main' can't be interrupt function");
- }
-
-
- // Mark all reachable functions from main as ML.
- markCallGraph(mainCGN, "ML");
-
- // And then all the functions reachable from ISR will be cloned.
- cloneSharedFunctions(isrCGN);
-
- return true;
-}
-
-// Mark all reachable functions from the given node, with the given mark.
-//
-void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) {
- // Mark the top node first.
- Function *thisF = CGN->getFunction();
-
- thisF->setSection(StringMark);
-
- // Mark all the called functions
- for(CallGraphNode::iterator cgn_it = CGN->begin();
- cgn_it != CGN->end(); ++cgn_it) {
- Function *CalledF = cgn_it->second->getFunction();
-
- // If calling an external function then CallGraphNode
- // will not be associated with any function.
- if (! CalledF)
- continue;
-
- // Issue diagnostic if interrupt function is being called.
- if (PAN::isISR(CalledF->getSection())) {
- vector<string> Values;
- Values.push_back(CalledF->getName().str());
- reportError("Interrupt function (%0) can't be called", Values);
- }
-
- // Has already been mark
- if (CalledF->getSection().find(StringMark) != string::npos) {
- // Should we do anything here?
- } else {
- // Mark now
- CalledF->setSection(StringMark);
- }
-
- // Before going any further mark all the called function by current
- // function.
- markCallGraph(cgn_it->second ,StringMark);
- } // end of loop of all called functions.
-}
-
-
-// For PIC16, automatic variables of a function are emitted as globals.
-// Clone the auto variables of a function and put them in VMap,
-// this VMap will be used while
-// Cloning the code of function itself.
-//
-void PIC16Cloner::CloneAutos(Function *F) {
- // We'll need to update module's globals list as well. So keep a reference
- // handy.
- Module *M = F->getParent();
- Module::GlobalListType &Globals = M->getGlobalList();
-
- // Clear the leftovers in VMap by any previous cloning.
- VMap.clear();
-
- // Find the auto globls for this function and clone them, and put them
- // in VMap.
- std::string FnName = F->getName().str();
- std::string VarName, ClonedVarName;
- for (Module::global_iterator I = M->global_begin(), E = M->global_end();
- I != E; ++I) {
- VarName = I->getName().str();
- if (PAN::isLocalToFunc(FnName, VarName)) {
- // Auto variable for current function found. Clone it.
- const GlobalVariable *GV = I;
-
- const Type *InitTy = GV->getInitializer()->getType();
- GlobalVariable *ClonedGV =
- new GlobalVariable(InitTy, false, GV->getLinkage(),
- GV->getInitializer());
- ClonedGV->setName(PAN::getCloneVarName(FnName, VarName));
- // Add these new globals to module's globals list.
- Globals.push_back(ClonedGV);
-
- // Update VMap.
- VMap[GV] = ClonedGV;
- }
- }
-}
-
-
-// Clone all functions that are reachable from ISR and are already
-// marked as ML.
-//
-void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) {
-
- // Check all the called functions from ISR.
- for(CallGraphNode::iterator cgn_it = CGN->begin();
- cgn_it != CGN->end(); ++cgn_it) {
- Function *CalledF = cgn_it->second->getFunction();
-
- // If calling an external function then CallGraphNode
- // will not be associated with any function.
- if (!CalledF)
- continue;
-
- // Issue diagnostic if interrupt function is being called.
- if (PAN::isISR(CalledF->getSection())) {
- vector<string> Values;
- Values.push_back(CalledF->getName().str());
- reportError("Interrupt function (%0) can't be called", Values);
- }
-
- if (CalledF->getSection().find("ML") != string::npos) {
- // Function is alternatively marked. It should be a shared one.
- // Create IL copy. Passing called function as first argument
- // and the caller as the second argument.
-
- // Before making IL copy, first ensure that this function has a
- // body. If the function does have a body. It can't be cloned.
- // Such a case may occur when the function has been declarated
- // in the C source code but its body exists in assembly file.
- if (!CalledF->isDeclaration()) {
- Function *cf = cloneFunction(CalledF);
- remapAllSites(CGN->getFunction(), CalledF, cf);
- } else {
- // It is called only from ISR. Still mark it as we need this info
- // in code gen while calling intrinsics.Function is not marked.
- CalledF->setSection("IL");
- }
- }
- // Before going any further clone all the shared function reachaable
- // by current function.
- cloneSharedFunctions(cgn_it->second);
- } // end of loop of all called functions.
-}
-
-// Clone the given function and return it.
-// Note: it uses the VMap member of the class, which is already populated
-// by cloneAutos by the time we reach here.
-// FIXME: Should we just pass VMap's ref as a parameter here? rather
-// than keeping the VMap as a member.
-Function *
-PIC16Cloner::cloneFunction(Function *OrgF) {
- Function *ClonedF;
-
- // See if we already cloned it. Return that.
- cloned_map_iterator cm_it = ClonedFunctionMap.find(OrgF);
- if(cm_it != ClonedFunctionMap.end()) {
- ClonedF = cm_it->second;
- return ClonedF;
- }
-
- // Clone does not exist.
- // First clone the autos, and populate VMap.
- CloneAutos(OrgF);
-
- // Now create the clone.
- ClonedF = CloneFunction(OrgF, VMap, /*ModuleLevelChanges=*/false);
-
- // The new function should be for interrupt line. Therefore should have
- // the name suffixed with IL and section attribute marked with IL.
- ClonedF->setName(PAN::getCloneFnName(OrgF->getName()));
- ClonedF->setSection("IL");
-
- // Add the newly created function to the module.
- OrgF->getParent()->getFunctionList().push_back(ClonedF);
-
- // Update the ClonedFunctionMap to record this cloning activity.
- ClonedFunctionMap[OrgF] = ClonedF;
-
- return ClonedF;
-}
-
-
-// Remap the call sites of shared functions, that are in IL.
-// Change the IL call site of a shared function to its clone.
-//
-void PIC16Cloner::
-remapAllSites(Function *Caller, Function *OrgF, Function *Clone) {
- // First find the caller to update. If the caller itself is cloned
- // then use the cloned caller. Otherwise use it.
- cloned_map_iterator cm_it = ClonedFunctionMap.find(Caller);
- if (cm_it != ClonedFunctionMap.end())
- Caller = cm_it->second;
-
- // For the lack of a better call site finding mechanism, iterate over
- // all insns to find the uses of original fn.
- for (Function::iterator BI = Caller->begin(); BI != Caller->end(); ++BI) {
- BasicBlock &BB = *BI;
- for (BasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) {
- if (II->getNumOperands() > 0 && II->getOperand(0) == OrgF)
- II->setOperand(0, Clone);
- }
- }
-}
-
-
-
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
deleted file mode 100644
index e7d67ce..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
+++ /dev/null
@@ -1,83 +0,0 @@
-//===-- PIC16Cloner.h - PIC16 LLVM Cloner for shared functions --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains declaration of a cloner class clone all functions that
-// are shared between the main line code (ML) and interrupt line code (IL).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16CLONER_H
-#define PIC16CLONER_H
-
-#include "llvm/ADT/ValueMap.h"
-
-using namespace llvm;
-using std::vector;
-using std::string;
-using std::map;
-
-namespace llvm {
- // forward classes.
- class Value;
- class Function;
- class Module;
- class ModulePass;
- class CallGraph;
- class CallGraphNode;
- class AnalysisUsage;
-
- class PIC16Cloner : public ModulePass {
- public:
- static char ID; // Class identification
- PIC16Cloner() : ModulePass(ID) {}
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<CallGraph>();
- }
- virtual bool runOnModule(Module &M);
-
- private: // Functions
- // Mark reachable functions for the MainLine or InterruptLine.
- void markCallGraph(CallGraphNode *CGN, string StringMark);
-
- // Clone auto variables of function specified.
- void CloneAutos(Function *F);
-
- // Clone the body of a function.
- Function *cloneFunction(Function *F);
-
- // Clone all shared functions.
- void cloneSharedFunctions(CallGraphNode *isrCGN);
-
- // Remap all call sites to the shared function.
- void remapAllSites(Function *Caller, Function *OrgF, Function *Clone);
-
- // Error reporting for PIC16Pass
- void reportError(string ErrorString, vector<string> &Values);
- void reportError(string ErrorString);
-
- private: //data
- // Records if the interrupt function has already been found.
- // If more than one interrupt function is found then an error
- // should be thrown.
- bool foundISR;
-
- // This ValueMap maps the auto variables of the original functions with
- // the corresponding cloned auto variable of the cloned function.
- // This value map is passed during the function cloning so that all the
- // uses of auto variables be updated properly.
- ValueMap<const Value*, Value*> VMap;
-
- // Map of a already cloned functions.
- map<Function *, Function *> ClonedFunctionMap;
- typedef map<Function *, Function *>::iterator cloned_map_iterator;
- };
-} // End of anonymous namespace
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
deleted file mode 100644
index 0f8928a..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-//===-- PIC16Overlay.cpp - Implementation for PIC16 Frame Overlay===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PIC16 Frame Overlay implementation.
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Pass.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/Value.h"
-#include "PIC16Overlay.h"
-#include "llvm/Function.h"
-#include <cstdlib>
-#include <sstream>
-using namespace llvm;
-
-namespace llvm {
- char PIC16Overlay::ID = 0;
- ModulePass *createPIC16OverlayPass() { return new PIC16Overlay(); }
-}
-
-void PIC16Overlay::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<CallGraph>();
-}
-
-void PIC16Overlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) {
- // Do not set any color for external calling node.
- if (Depth != 0 && CGN->getFunction()) {
- unsigned Color = getColor(CGN->getFunction());
-
- // Handle indirectly called functions
- if (Color >= PIC16OVERLAY::StartIndirectCallColor ||
- Depth >= PIC16OVERLAY::StartIndirectCallColor) {
- // All functions called from an indirectly called function are given
- // an unique color.
- if (Color < PIC16OVERLAY::StartIndirectCallColor &&
- Depth >= PIC16OVERLAY::StartIndirectCallColor)
- setColor(CGN->getFunction(), Depth);
-
- for (unsigned int i = 0; i < CGN->size(); i++)
- DFSTraverse((*CGN)[i], ++IndirectCallColor);
- return;
- }
- // Just return if the node already has a color greater than the current
- // depth. A node must be colored with the maximum depth that it has.
- if (Color >= Depth)
- return;
-
- Depth = ModifyDepthForInterrupt(CGN, Depth);
- setColor(CGN->getFunction(), Depth);
- }
-
- // Color all children of this node with color depth+1.
- for (unsigned int i = 0; i < CGN->size(); i++)
- DFSTraverse((*CGN)[i], Depth+1);
-}
-
-unsigned PIC16Overlay::ModifyDepthForInterrupt(CallGraphNode *CGN,
- unsigned Depth) {
- Function *Fn = CGN->getFunction();
-
- // Return original Depth if function or section for function do not exist.
- if (!Fn || !Fn->hasSection())
- return Depth;
-
- // Return original Depth if this function is not marked as interrupt.
- if (Fn->getSection().find("interrupt") == string::npos)
- return Depth;
-
- Depth = Depth + InterruptDepth;
- return Depth;
-}
-
-void PIC16Overlay::setColor(Function *Fn, unsigned Color) {
- std::string Section = "";
- if (Fn->hasSection())
- Section = Fn->getSection();
-
- size_t Pos = Section.find(OverlayStr);
-
- // Convert Color to string.
- std::stringstream ss;
- ss << Color;
- std::string ColorString = ss.str();
-
- // If color is already set then reset it with the new value. Else append
- // the Color string to section.
- if (Pos != std::string::npos) {
- Pos += OverlayStr.length();
- char c = Section.at(Pos);
- unsigned OldColorLength = 0;
- while (c >= '0' && c<= '9') {
- OldColorLength++;
- if (Pos < Section.length() - 1)
- Pos++;
- else
- break;
- c = Section.at(Pos);
- }
- // Replace old color with new one.
- Section.replace(Pos-OldColorLength +1, OldColorLength, ColorString);
- }
- else {
- // Append Color information to section string.
- if (Fn->hasSection())
- Section.append(" ");
- Section.append(OverlayStr + ColorString);
- }
- Fn->setSection(Section);
-}
-
-unsigned PIC16Overlay::getColor(Function *Fn) {
- int Color = 0;
- if (!Fn->hasSection())
- return 0;
-
- std::string Section = Fn->getSection();
- size_t Pos = Section.find(OverlayStr);
-
- // Return 0 if Color is not set.
- if (Pos == std::string::npos)
- return 0;
-
- // Set Pos to after "Overlay=".
- Pos += OverlayStr.length();
- char c = Section.at(Pos);
- std::string ColorString = "";
-
- // Find the string representing Color. A Color can only consist of digits.
- while (c >= '0' && c<= '9') {
- ColorString.append(1,c);
- if (Pos < Section.length() - 1)
- Pos++;
- else
- break;
- c = Section.at(Pos);
- }
- Color = atoi(ColorString.c_str());
-
- return Color;
-}
-
-bool PIC16Overlay::runOnModule(Module &M) {
- CallGraph &CG = getAnalysis<CallGraph>();
- CallGraphNode *ECN = CG.getExternalCallingNode();
-
- MarkIndirectlyCalledFunctions(M);
- // Since External Calling Node is the base function, do a depth first
- // traversal of CallGraph with ECN as root. Each node with be marked with
- // a color that is max(color(callers)) + 1.
- if(ECN) {
- DFSTraverse(ECN, 0);
- }
- return false;
-}
-
-void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) {
- // If the use of a function is not a call instruction then this
- // function might be called indirectly. In that case give it
- // an unique color.
- for (Module::iterator MI = M.begin(), E = M.end(); MI != E; ++MI) {
- for (Value::use_iterator I = MI->use_begin(), E = MI->use_end(); I != E;
- ++I) {
- User *U = *I;
- if ((!isa<CallInst>(U) && !isa<InvokeInst>(U))
- || !CallSite(cast<Instruction>(U)).isCallee(I)) {
- setColor(MI, ++IndirectCallColor);
- break;
- }
- }
- }
-}
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
deleted file mode 100644
index 2f611e6..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- PIC16Overlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PIC16 Overlay infrastructure.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16FRAMEOVERLAY_H
-#define PIC16FRAMEOVERLAY_H
-
-
-using std::string;
-using namespace llvm;
-
-namespace llvm {
- // Forward declarations.
- class Function;
- class Module;
- class ModulePass;
- class AnalysisUsage;
- class CallGraphNode;
- class CallGraph;
-
- namespace PIC16OVERLAY {
- enum OverlayConsts {
- StartInterruptColor = 200,
- StartIndirectCallColor = 300
- };
- }
- class PIC16Overlay : public ModulePass {
- std::string OverlayStr;
- unsigned InterruptDepth;
- unsigned IndirectCallColor;
- public:
- static char ID; // Class identification
- PIC16Overlay() : ModulePass(ID) {
- OverlayStr = "Overlay=";
- InterruptDepth = PIC16OVERLAY::StartInterruptColor;
- IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual bool runOnModule(Module &M);
-
- private:
- unsigned getColor(Function *Fn);
- void setColor(Function *Fn, unsigned Color);
- unsigned ModifyDepthForInterrupt(CallGraphNode *CGN, unsigned Depth);
- void MarkIndirectlyCalledFunctions(Module &M);
- void DFSTraverse(CallGraphNode *CGN, unsigned Depth);
- };
-} // End of namespace
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16RegisterInfo.cpp b/contrib/llvm/lib/Target/PIC16/PIC16RegisterInfo.cpp
deleted file mode 100644
index 76de47f..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16RegisterInfo.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-//===- PIC16RegisterInfo.cpp - PIC16 Register Information -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PIC16 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pic16-reg-info"
-
-#include "PIC16.h"
-#include "PIC16RegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-PIC16RegisterInfo::PIC16RegisterInfo(const TargetInstrInfo &tii,
- const PIC16Subtarget &st)
- : PIC16GenRegisterInfo(PIC16::ADJCALLSTACKDOWN, PIC16::ADJCALLSTACKUP),
- TII(tii),
- ST(st) {}
-
-#include "PIC16GenRegisterInfo.inc"
-
-/// PIC16 Callee Saved Registers
-const unsigned* PIC16RegisterInfo::
-getCalleeSavedRegs(const MachineFunction *MF) const {
- static const unsigned CalleeSavedRegs[] = { 0 };
- return CalleeSavedRegs;
-}
-
-BitVector PIC16RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- BitVector Reserved(getNumRegs());
- return Reserved;
-}
-
-bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const {
- return false;
-}
-
-void PIC16RegisterInfo::
-eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- RegScavenger *RS) const
-{ /* NOT YET IMPLEMENTED */ }
-
-void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const
-{ /* NOT YET IMPLEMENTED */ }
-
-void PIC16RegisterInfo::
-emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
-{ /* NOT YET IMPLEMENTED */ }
-
-int PIC16RegisterInfo::
-getDwarfRegNum(unsigned RegNum, bool isEH) const {
- llvm_unreachable("Not keeping track of debug information yet!!");
- return -1;
-}
-
-unsigned PIC16RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- llvm_unreachable("PIC16 Does not have any frame register");
- return 0;
-}
-
-unsigned PIC16RegisterInfo::getRARegister() const {
- llvm_unreachable("PIC16 Does not have any return address register");
- return 0;
-}
-
-// This function eliminates ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void PIC16RegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- // Simply discard ADJCALLSTACKDOWN,
- // ADJCALLSTACKUP instructions.
- MBB.erase(I);
-}
-
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16RegisterInfo.h b/contrib/llvm/lib/Target/PIC16/PIC16RegisterInfo.h
deleted file mode 100644
index 20052b0..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16RegisterInfo.h
+++ /dev/null
@@ -1,64 +0,0 @@
-//===- PIC16RegisterInfo.h - PIC16 Register Information Impl ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the PIC16 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16REGISTERINFO_H
-#define PIC16REGISTERINFO_H
-
-#include "PIC16GenRegisterInfo.h.inc"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-namespace llvm {
-
-// Forward Declarations.
- class PIC16Subtarget;
- class TargetInstrInfo;
-
-class PIC16RegisterInfo : public PIC16GenRegisterInfo {
- private:
- const TargetInstrInfo &TII;
- const PIC16Subtarget &ST;
-
- public:
- PIC16RegisterInfo(const TargetInstrInfo &tii,
- const PIC16Subtarget &st);
-
-
- //------------------------------------------------------
- // Pure virtual functions from TargetRegisterInfo
- //------------------------------------------------------
-
- // PIC16 callee saved registers
- virtual const unsigned*
- getCalleeSavedRegs(const MachineFunction *MF = 0) const;
-
- virtual BitVector getReservedRegs(const MachineFunction &MF) const;
- virtual bool hasFP(const MachineFunction &MF) const;
-
- virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, RegScavenger *RS=NULL) const;
-
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
- virtual void emitPrologue(MachineFunction &MF) const;
- virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
- virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- virtual unsigned getFrameRegister(const MachineFunction &MF) const;
- virtual unsigned getRARegister() const;
-
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16RegisterInfo.td b/contrib/llvm/lib/Target/PIC16/PIC16RegisterInfo.td
deleted file mode 100644
index 2959d91..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16RegisterInfo.td
+++ /dev/null
@@ -1,33 +0,0 @@
-//===- PIC16RegisterInfo.td - PIC16 Register defs ------------*- tblgen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Declarations that describe the PIC16 register file
-//===----------------------------------------------------------------------===//
-
-class PIC16Reg<string n> : Register<n> {
- let Namespace = "PIC16";
-}
-
-// PIC16 Registers.
-def W : PIC16Reg<"W">;
-def FSR0 : PIC16Reg<"FSR0">;
-def FSR1 : PIC16Reg<"FSR1">;
-def BS : PIC16Reg<"BS">;
-def PCLATH : PIC16Reg<"PCLATH">;
-
-def STATUS : PIC16Reg<"STATUS">;
-
-// PIC16 Register classes.
-def GPR : RegisterClass<"PIC16", [i8], 8, [W]>;
-def FSR16 : RegisterClass<"PIC16", [i16], 8, [FSR0, FSR1]>;
-def BSR : RegisterClass<"PIC16", [i8], 8, [BS]>;
-def PCLATHR : RegisterClass<"PIC16", [i8], 8, [PCLATH]>;
-def STATUSR : RegisterClass<"PIC16", [i8], 8, [STATUS]>;
-
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16Section.cpp b/contrib/llvm/lib/Target/PIC16/PIC16Section.cpp
deleted file mode 100644
index 2505b11..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16Section.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-//===-- PIC16Section.cpp - PIC16 Section ----------- --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16.h"
-#include "PIC16ABINames.h"
-#include "PIC16Section.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-
-// This is the only way to create a PIC16Section. Sections created here
-// do not need to be explicitly deleted as they are managed by auto_ptrs.
-PIC16Section *PIC16Section::Create(StringRef Name, PIC16SectionType Ty,
- StringRef Address, int Color,
- MCContext &Ctx) {
-
- /// Determine the internal SectionKind info.
- /// Users of PIC16Section class should not need to know the internal
- /// SectionKind. They should work only with PIC16SectionType.
- ///
- /// PIC16 Terminology for section kinds is as below.
- /// UDATA - BSS
- /// IDATA - initialized data (equiv to Metadata)
- /// ROMDATA - ReadOnly.
- /// UDATA_OVR - Sections that can be overlaid. Section of such type is
- /// used to contain function autos an frame. We can think of
- /// it as equiv to llvm ThreadBSS)
- /// UDATA_SHR - Shared RAM. Memory area that is mapped to all banks.
-
- SectionKind K;
- switch (Ty) {
- default: llvm_unreachable ("can not create unknown section type");
- case UDATA_OVR: {
- K = SectionKind::getThreadBSS();
- break;
- }
- case UDATA_SHR:
- case UDATA: {
- K = SectionKind::getBSS();
- break;
- }
- case ROMDATA:
- case IDATA: {
- K = SectionKind::getMetadata();
- break;
- }
- case CODE: {
- K = SectionKind::getText();
- break;
- }
-
- }
-
- // Copy strings into context allocated memory so they get free'd when the
- // context is destroyed.
- char *NameCopy = static_cast<char*>(Ctx.Allocate(Name.size(), 1));
- memcpy(NameCopy, Name.data(), Name.size());
- char *AddressCopy = static_cast<char*>(Ctx.Allocate(Address.size(), 1));
- memcpy(AddressCopy, Address.data(), Address.size());
-
- // Create the Section.
- PIC16Section *S =
- new (Ctx) PIC16Section(StringRef(NameCopy, Name.size()), K,
- StringRef(AddressCopy, Address.size()), Color);
- S->T = Ty;
- return S;
-}
-
-// A generic way to print all types of sections.
-void PIC16Section::PrintSwitchToSection(const MCAsmInfo &MAI,
- raw_ostream &OS) const {
-
- // If the section is overlaid(i.e. it has a color), print overlay name for
- // it. Otherwise print its normal name.
- if (Color != -1)
- OS << PAN::getOverlayName(getName(), Color) << '\t';
- else
- OS << getName() << '\t';
-
- // Print type.
- switch (getType()) {
- default : llvm_unreachable ("unknown section type");
- case UDATA: OS << "UDATA"; break;
- case IDATA: OS << "IDATA"; break;
- case ROMDATA: OS << "ROMDATA"; break;
- case UDATA_SHR: OS << "UDATA_SHR"; break;
- case UDATA_OVR: OS << "UDATA_OVR"; break;
- case CODE: OS << "CODE"; break;
- }
-
- OS << '\t';
-
- // Print Address.
- OS << Address;
-
- OS << '\n';
-}
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16Section.h b/contrib/llvm/lib/Target/PIC16/PIC16Section.h
deleted file mode 100644
index 5b33b51..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16Section.h
+++ /dev/null
@@ -1,99 +0,0 @@
-//===- PIC16Section.h - PIC16-specific section representation -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the PIC16Section class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_PIC16SECTION_H
-#define LLVM_PIC16SECTION_H
-
-#include "llvm/MC/MCSection.h"
-#include "llvm/GlobalVariable.h"
-#include <vector>
-
-namespace llvm {
- /// PIC16Section - Represents a physical section in PIC16 COFF.
- /// Contains data objects.
- ///
- class PIC16Section : public MCSection {
- /// PIC16 Sections does not really use the SectionKind class to
- /// to distinguish between various types of sections. PIC16 maintain
- /// its own Section Type info. See the PIC16SectionType enum in PIC16.h
- /// for various section types.
- PIC16SectionType T;
-
- /// Name of the section to uniquely identify it.
- StringRef Name;
-
- /// User can specify an address at which a section should be placed.
- /// Negative value here means user hasn't specified any.
- StringRef Address;
-
- /// Overlay information - Sections with same color can be overlaid on
- /// one another.
- int Color;
-
- /// Total size of all data objects contained here.
- unsigned Size;
-
- PIC16Section(StringRef name, SectionKind K, StringRef addr, int color)
- : MCSection(SV_PIC16, K), Name(name), Address(addr),
- Color(color), Size(0) {
- }
-
- public:
- /// Return the name of the section.
- StringRef getName() const { return Name; }
-
- /// Return the Address of the section.
- StringRef getAddress() const { return Address; }
-
- /// Return the Color of the section.
- int getColor() const { return Color; }
- void setColor(int color) { Color = color; }
-
- /// Return the size of the section.
- unsigned getSize() const { return Size; }
- void setSize(unsigned size) { Size = size; }
-
- /// Conatined data objects.
- // FIXME: This vector is leaked because sections are allocated with a
- // BumpPtrAllocator.
- std::vector<const GlobalVariable *>Items;
-
- /// Check section type.
- bool isUDATA_Type() const { return T == UDATA; }
- bool isIDATA_Type() const { return T == IDATA; }
- bool isROMDATA_Type() const { return T == ROMDATA; }
- bool isUDATA_OVR_Type() const { return T == UDATA_OVR; }
- bool isUDATA_SHR_Type() const { return T == UDATA_SHR; }
- bool isCODE_Type() const { return T == CODE; }
-
- PIC16SectionType getType() const { return T; }
-
- /// This would be the only way to create a section.
- static PIC16Section *Create(StringRef Name, PIC16SectionType Ty,
- StringRef Address, int Color,
- MCContext &Ctx);
-
- /// Override this as PIC16 has its own way of printing switching
- /// to a section.
- virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
- raw_ostream &OS) const;
-
- static bool classof(const MCSection *S) {
- return S->getVariant() == SV_PIC16;
- }
- static bool classof(const PIC16Section *) { return true; }
- };
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
deleted file mode 100644
index 995955a..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- PIC16SelectionDAGInfo.cpp - PIC16 SelectionDAG Info ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PIC16SelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pic16-selectiondag-info"
-#include "PIC16TargetMachine.h"
-using namespace llvm;
-
-PIC16SelectionDAGInfo::PIC16SelectionDAGInfo(const PIC16TargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
-
-PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() {
-}
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16SelectionDAGInfo.h b/contrib/llvm/lib/Target/PIC16/PIC16SelectionDAGInfo.h
deleted file mode 100644
index c67fd8b..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16SelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- PIC16SelectionDAGInfo.h - PIC16 SelectionDAG Info -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PIC16 subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16SELECTIONDAGINFO_H
-#define PIC16SELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class PIC16TargetMachine;
-
-class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
- explicit PIC16SelectionDAGInfo(const PIC16TargetMachine &TM);
- ~PIC16SelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16Subtarget.cpp b/contrib/llvm/lib/Target/PIC16/PIC16Subtarget.cpp
deleted file mode 100644
index 33fc3fb..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16Subtarget.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===- PIC16Subtarget.cpp - PIC16 Subtarget Information -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PIC16 specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16Subtarget.h"
-#include "PIC16GenSubtarget.inc"
-
-using namespace llvm;
-
-PIC16Subtarget::PIC16Subtarget(const std::string &TT, const std::string &FS,
- bool Cooper)
- :IsCooper(Cooper)
-{
- std::string CPU = "generic";
-
- // Parse features string.
- ParseSubtargetFeatures(FS, CPU);
-}
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16Subtarget.h b/contrib/llvm/lib/Target/PIC16/PIC16Subtarget.h
deleted file mode 100644
index 81e3783..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16Subtarget.h
+++ /dev/null
@@ -1,44 +0,0 @@
-//=====-- PIC16Subtarget.h - Define Subtarget for the PIC16 ---*- C++ -*--====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the PIC16 specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PIC16SUBTARGET_H
-#define PIC16SUBTARGET_H
-
-#include "llvm/Target/TargetSubtarget.h"
-
-#include <string>
-
-namespace llvm {
-
-class PIC16Subtarget : public TargetSubtarget {
-
- // IsCooper - Target ISA is Cooper.
- bool IsCooper;
-
-public:
- /// This constructor initializes the data members to match that
- /// of the specified triple.
- ///
- PIC16Subtarget(const std::string &TT, const std::string &FS, bool Cooper);
-
- /// isCooper - Returns true if the target ISA is Cooper.
- bool isCooper() const { return IsCooper; }
-
- /// ParseSubtargetFeatures - Parses features string setting specified
- /// subtarget options. Definition of function is auto generated by tblgen.
- std::string ParseSubtargetFeatures(const std::string &FS,
- const std::string &CPU);
-};
-} // End llvm namespace
-
-#endif // PIC16SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16TargetMachine.cpp b/contrib/llvm/lib/Target/PIC16/PIC16TargetMachine.cpp
deleted file mode 100644
index 82b69be..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- PIC16TargetMachine.cpp - Define TargetMachine for PIC16 -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Top-level implementation for the PIC16 target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16.h"
-#include "PIC16MCAsmInfo.h"
-#include "PIC16TargetMachine.h"
-#include "llvm/PassManager.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetRegistry.h"
-
-using namespace llvm;
-
-extern "C" void LLVMInitializePIC16Target() {
- // Register the target. Curretnly the codegen works for
- // enhanced pic16 mid-range.
- RegisterTargetMachine<PIC16TargetMachine> X(ThePIC16Target);
- RegisterAsmInfo<PIC16MCAsmInfo> A(ThePIC16Target);
-}
-
-
-// PIC16TargetMachine - Enhanced PIC16 mid-range Machine. May also represent
-// a Traditional Machine if 'Trad' is true.
-PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT,
- const std::string &FS, bool Trad)
-: LLVMTargetMachine(T, TT),
- Subtarget(TT, FS, Trad),
- DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { }
-
-
-bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- // Install an instruction selector.
- PM.add(createPIC16ISelDag(*this));
- return false;
-}
-
-bool PIC16TargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- PM.add(createPIC16MemSelOptimizerPass());
- return true; // -print-machineinstr should print after this.
-}
-
-
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16TargetMachine.h b/contrib/llvm/lib/Target/PIC16/PIC16TargetMachine.h
deleted file mode 100644
index dae5d31..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16TargetMachine.h
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- PIC16TargetMachine.h - Define TargetMachine for PIC16 ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the PIC16 specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef PIC16_TARGETMACHINE_H
-#define PIC16_TARGETMACHINE_H
-
-#include "PIC16InstrInfo.h"
-#include "PIC16ISelLowering.h"
-#include "PIC16SelectionDAGInfo.h"
-#include "PIC16RegisterInfo.h"
-#include "PIC16Subtarget.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-
-/// PIC16TargetMachine
-///
-class PIC16TargetMachine : public LLVMTargetMachine {
- PIC16Subtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
- PIC16InstrInfo InstrInfo;
- PIC16TargetLowering TLInfo;
- PIC16SelectionDAGInfo TSInfo;
-
- // PIC16 does not have any call stack frame, therefore not having
- // any PIC16 specific FrameInfo class.
- TargetFrameInfo FrameInfo;
-
-public:
- PIC16TargetMachine(const Target &T, const std::string &TT,
- const std::string &FS, bool Cooper = false);
-
- virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
- virtual const PIC16InstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetData *getTargetData() const { return &DataLayout;}
- virtual const PIC16Subtarget *getSubtargetImpl() const { return &Subtarget; }
-
- virtual const PIC16RegisterInfo *getRegisterInfo() const {
- return &(InstrInfo.getRegisterInfo());
- }
-
- virtual const PIC16TargetLowering *getTargetLowering() const {
- return &TLInfo;
- }
-
- virtual const PIC16SelectionDAGInfo* getSelectionDAGInfo() const {
- return &TSInfo;
- }
-
- virtual bool addInstSelector(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel);
- virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-}; // PIC16TargetMachine.
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/contrib/llvm/lib/Target/PIC16/PIC16TargetObjectFile.cpp
deleted file mode 100644
index ff0f971..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16TargetObjectFile.cpp
+++ /dev/null
@@ -1,384 +0,0 @@
-//===-- PIC16TargetObjectFile.cpp - PIC16 object files --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16TargetObjectFile.h"
-#include "PIC16TargetMachine.h"
-#include "PIC16Section.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-
-PIC16TargetObjectFile::PIC16TargetObjectFile() {
-}
-
-PIC16TargetObjectFile::~PIC16TargetObjectFile() {
-}
-
-/// Find a pic16 section. Return null if not found. Do not create one.
-PIC16Section *PIC16TargetObjectFile::
-findPIC16Section(const std::string &Name) const {
- /// Return if we have an already existing one.
- PIC16Section *Entry = SectionsByName[Name];
- if (Entry)
- return Entry;
-
- return NULL;
-}
-
-
-/// Find a pic16 section. If not found, create one.
-PIC16Section *PIC16TargetObjectFile::
-getPIC16Section(const std::string &Name, PIC16SectionType Ty,
- const std::string &Address, int Color) const {
-
- /// Return if we have an already existing one.
- PIC16Section *&Entry = SectionsByName[Name];
- if (Entry)
- return Entry;
-
-
- Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext());
- return Entry;
-}
-
-/// Find a standard pic16 data section. If not found, create one and keep
-/// track of it by adding it to appropriate std section list.
-PIC16Section *PIC16TargetObjectFile::
-getPIC16DataSection(const std::string &Name, PIC16SectionType Ty,
- const std::string &Address, int Color) const {
-
- /// Return if we have an already existing one.
- PIC16Section *&Entry = SectionsByName[Name];
- if (Entry)
- return Entry;
-
-
- /// Else create a new one and add it to appropriate section list.
- Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext());
-
- switch (Ty) {
- default: llvm_unreachable ("unknow standard section type.");
- case UDATA: UDATASections_.push_back(Entry); break;
- case IDATA: IDATASections_.push_back(Entry); break;
- case ROMDATA: ROMDATASection_ = Entry; break;
- case UDATA_SHR: SHAREDUDATASection_ = Entry; break;
- }
-
- return Entry;
-}
-
-
-/// Find a standard pic16 autos section. If not found, create one and keep
-/// track of it by adding it to appropriate std section list.
-PIC16Section *PIC16TargetObjectFile::
-getPIC16AutoSection(const std::string &Name, PIC16SectionType Ty,
- const std::string &Address, int Color) const {
-
- /// Return if we have an already existing one.
- PIC16Section *&Entry = SectionsByName[Name];
- if (Entry)
- return Entry;
-
-
- /// Else create a new one and add it to appropriate section list.
- Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext());
-
- assert (Ty == UDATA_OVR && "incorrect section type for autos");
- AUTOSections_.push_back(Entry);
-
- return Entry;
-}
-
-/// Find a pic16 user section. If not found, create one and keep
-/// track of it by adding it to appropriate std section list.
-PIC16Section *PIC16TargetObjectFile::
-getPIC16UserSection(const std::string &Name, PIC16SectionType Ty,
- const std::string &Address, int Color) const {
-
- /// Return if we have an already existing one.
- PIC16Section *&Entry = SectionsByName[Name];
- if (Entry)
- return Entry;
-
-
- /// Else create a new one and add it to appropriate section list.
- Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext());
-
- USERSections_.push_back(Entry);
-
- return Entry;
-}
-
-/// Do some standard initialization.
-void PIC16TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &tm){
- TargetLoweringObjectFile::Initialize(Ctx, tm);
- TM = &tm;
-
- ROMDATASection_ = NULL;
- SHAREDUDATASection_ = NULL;
-}
-
-/// allocateUDATA - Allocate a un-initialized global to an existing or new UDATA
-/// section and return that section.
-const MCSection *
-PIC16TargetObjectFile::allocateUDATA(const GlobalVariable *GV) const {
- assert(GV->hasInitializer() && "This global doesn't need space");
- const Constant *C = GV->getInitializer();
- assert(C->isNullValue() && "Unitialized globals has non-zero initializer");
-
- // Find how much space this global needs.
- const TargetData *TD = TM->getTargetData();
- const Type *Ty = C->getType();
- unsigned ValSize = TD->getTypeAllocSize(Ty);
-
- // Go through all UDATA Sections and assign this variable
- // to the first available section having enough space.
- PIC16Section *Found = NULL;
- for (unsigned i = 0; i < UDATASections_.size(); i++) {
- if (DataBankSize - UDATASections_[i]->getSize() >= ValSize) {
- Found = UDATASections_[i];
- break;
- }
- }
-
- // No UDATA section spacious enough was found. Crate a new one.
- if (!Found) {
- std::string name = PAN::getUdataSectionName(UDATASections_.size());
- Found = getPIC16DataSection(name.c_str(), UDATA);
- }
-
- // Insert the GV into this UDATA section.
- Found->Items.push_back(GV);
- Found->setSize(Found->getSize() + ValSize);
- return Found;
-}
-
-/// allocateIDATA - allocate an initialized global into an existing
-/// or new section and return that section.
-const MCSection *
-PIC16TargetObjectFile::allocateIDATA(const GlobalVariable *GV) const{
- assert(GV->hasInitializer() && "This global doesn't need space");
- const Constant *C = GV->getInitializer();
- assert(!C->isNullValue() && "initialized globals has zero initializer");
- assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE &&
- "can allocate initialized RAM data only");
-
- // Find how much space this global needs.
- const TargetData *TD = TM->getTargetData();
- const Type *Ty = C->getType();
- unsigned ValSize = TD->getTypeAllocSize(Ty);
-
- // Go through all IDATA Sections and assign this variable
- // to the first available section having enough space.
- PIC16Section *Found = NULL;
- for (unsigned i = 0; i < IDATASections_.size(); i++) {
- if (DataBankSize - IDATASections_[i]->getSize() >= ValSize) {
- Found = IDATASections_[i];
- break;
- }
- }
-
- // No IDATA section spacious enough was found. Crate a new one.
- if (!Found) {
- std::string name = PAN::getIdataSectionName(IDATASections_.size());
- Found = getPIC16DataSection(name.c_str(), IDATA);
- }
-
- // Insert the GV into this IDATA.
- Found->Items.push_back(GV);
- Found->setSize(Found->getSize() + ValSize);
- return Found;
-}
-
-// Allocate a program memory variable into ROMDATA section.
-const MCSection *
-PIC16TargetObjectFile::allocateROMDATA(const GlobalVariable *GV) const {
-
- std::string name = PAN::getRomdataSectionName();
- PIC16Section *S = getPIC16DataSection(name.c_str(), ROMDATA);
-
- S->Items.push_back(GV);
- return S;
-}
-
-// Get the section for an automatic variable of a function.
-// For PIC16 they are globals only with mangled names.
-const MCSection *
-PIC16TargetObjectFile::allocateAUTO(const GlobalVariable *GV) const {
-
- const std::string name = PAN::getSectionNameForSym(GV->getName());
- PIC16Section *S = getPIC16AutoSection(name.c_str());
-
- S->Items.push_back(GV);
- return S;
-}
-
-
-// Override default implementation to put the true globals into
-// multiple data sections if required.
-const MCSection *
-PIC16TargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV1,
- SectionKind Kind,
- Mangler *Mang,
- const TargetMachine &TM) const {
- // We select the section based on the initializer here, so it really
- // has to be a GlobalVariable.
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(GV1);
- if (!GV)
- return TargetLoweringObjectFile::SelectSectionForGlobal(GV1, Kind, Mang,TM);
-
- assert(GV->hasInitializer() && "A def without initializer?");
-
- // First, if this is an automatic variable for a function, get the section
- // name for it and return.
- std::string name = GV->getName();
- if (PAN::isLocalName(name))
- return allocateAUTO(GV);
-
- // See if this is an uninitialized global.
- const Constant *C = GV->getInitializer();
- if (C->isNullValue())
- return allocateUDATA(GV);
-
- // If this is initialized data in RAM. Put it in the correct IDATA section.
- if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE)
- return allocateIDATA(GV);
-
- // This is initialized data in rom, put it in the readonly section.
- if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE)
- return allocateROMDATA(GV);
-
- // Else let the default implementation take care of it.
- return TargetLoweringObjectFile::SelectSectionForGlobal(GV, Kind, Mang,TM);
-}
-
-
-
-
-/// getExplicitSectionGlobal - Allow the target to completely override
-/// section assignment of a global.
-const MCSection *PIC16TargetObjectFile::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang, const TargetMachine &TM) const {
- assert(GV->hasSection());
-
- if (const GlobalVariable *GVar = cast<GlobalVariable>(GV)) {
- std::string SectName = GVar->getSection();
- // If address for a variable is specified, get the address and create
- // section.
- // FIXME: move this attribute checking in PAN.
- std::string AddrStr = "Address=";
- if (SectName.compare(0, AddrStr.length(), AddrStr) == 0) {
- std::string SectAddr = SectName.substr(AddrStr.length());
- if (SectAddr.compare("NEAR") == 0)
- return allocateSHARED(GVar, Mang);
- else
- return allocateAtGivenAddress(GVar, SectAddr);
- }
-
- // Create the section specified with section attribute.
- return allocateInGivenSection(GVar);
- }
-
- return getPIC16DataSection(GV->getSection().c_str(), UDATA);
-}
-
-const MCSection *
-PIC16TargetObjectFile::allocateSHARED(const GlobalVariable *GV,
- Mangler *Mang) const {
- // Make sure that this is an uninitialized global.
- assert(GV->hasInitializer() && "This global doesn't need space");
- if (!GV->getInitializer()->isNullValue()) {
- // FIXME: Generate a warning in this case that near qualifier will be
- // ignored.
- return SelectSectionForGlobal(GV, SectionKind::getDataRel(), Mang, *TM);
- }
- std::string Name = PAN::getSharedUDataSectionName();
-
- PIC16Section *SharedUDataSect = getPIC16DataSection(Name.c_str(), UDATA_SHR);
- // Insert the GV into shared section.
- SharedUDataSect->Items.push_back(GV);
- return SharedUDataSect;
-}
-
-
-// Interface used by AsmPrinter to get a code section for a function.
-const PIC16Section *
-PIC16TargetObjectFile::SectionForCode(const std::string &FnName,
- bool isISR) const {
- const std::string &sec_name = PAN::getCodeSectionName(FnName);
- // If it is ISR, its code section starts at a specific address.
- if (isISR)
- return getPIC16Section(sec_name, CODE, PAN::getISRAddr());
- return getPIC16Section(sec_name, CODE);
-}
-
-// Interface used by AsmPrinter to get a frame section for a function.
-const PIC16Section *
-PIC16TargetObjectFile::SectionForFrame(const std::string &FnName) const {
- const std::string &sec_name = PAN::getFrameSectionName(FnName);
- return getPIC16Section(sec_name, UDATA_OVR);
-}
-
-// Allocate a global var in existing or new section of given name.
-const MCSection *
-PIC16TargetObjectFile::allocateInGivenSection(const GlobalVariable *GV) const {
- // Determine the type of section that we need to create.
- PIC16SectionType SecTy;
-
- // See if this is an uninitialized global.
- const Constant *C = GV->getInitializer();
- if (C->isNullValue())
- SecTy = UDATA;
- // If this is initialized data in RAM. Put it in the correct IDATA section.
- else if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE)
- SecTy = IDATA;
- // This is initialized data in rom, put it in the readonly section.
- else if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE)
- SecTy = ROMDATA;
- else
- llvm_unreachable ("Could not determine section type for global");
-
- PIC16Section *S = getPIC16UserSection(GV->getSection().c_str(), SecTy);
- S->Items.push_back(GV);
- return S;
-}
-
-// Allocate a global var in a new absolute sections at given address.
-const MCSection *
-PIC16TargetObjectFile::allocateAtGivenAddress(const GlobalVariable *GV,
- const std::string &Addr) const {
- // Determine the type of section that we need to create.
- PIC16SectionType SecTy;
-
- // See if this is an uninitialized global.
- const Constant *C = GV->getInitializer();
- if (C->isNullValue())
- SecTy = UDATA;
- // If this is initialized data in RAM. Put it in the correct IDATA section.
- else if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE)
- SecTy = IDATA;
- // This is initialized data in rom, put it in the readonly section.
- else if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE)
- SecTy = ROMDATA;
- else
- llvm_unreachable ("Could not determine section type for global");
-
- std::string Prefix = GV->getNameStr() + "." + Addr + ".";
- std::string SName = PAN::getUserSectionName(Prefix);
- PIC16Section *S = getPIC16UserSection(SName.c_str(), SecTy, Addr.c_str());
- S->Items.push_back(GV);
- return S;
-}
-
-
diff --git a/contrib/llvm/lib/Target/PIC16/PIC16TargetObjectFile.h b/contrib/llvm/lib/Target/PIC16/PIC16TargetObjectFile.h
deleted file mode 100644
index b1eb9f9..0000000
--- a/contrib/llvm/lib/Target/PIC16/PIC16TargetObjectFile.h
+++ /dev/null
@@ -1,168 +0,0 @@
-//===-- PIC16TargetObjectFile.h - PIC16 Object Info -------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_PIC16_TARGETOBJECTFILE_H
-#define LLVM_TARGET_PIC16_TARGETOBJECTFILE_H
-
-#include "PIC16.h"
-#include "PIC16ABINames.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/ADT/StringMap.h"
-#include <vector>
-#include <string>
-
-namespace llvm {
- class GlobalVariable;
- class Module;
- class PIC16TargetMachine;
- class PIC16Section;
-
- enum { DataBankSize = 80 };
-
- /// PIC16 Splits the global data into mulitple udata and idata sections.
- /// Each udata and idata section needs to contain a list of globals that
- /// they contain, in order to avoid scanning over all the global values
- /// again and printing only those that match the current section.
- /// Keeping values inside the sections make printing a section much easier.
- ///
- /// FIXME: MOVE ALL THIS STUFF TO PIC16Section.
- ///
-
- /// PIC16TargetObjectFile - PIC16 Object file. Contains data and code
- /// sections.
- // PIC16 Object File has two types of sections.
- // 1. Standard Sections
- // 1.1 un-initialized global data
- // 1.2 initialized global data
- // 1.3 program memory data
- // 1.4 local variables of functions.
- // 2. User defined sections
- // 2.1 Objects placed in a specific section. (By _Section() macro)
- // 2.2 Objects placed at a specific address. (By _Address() macro)
- class PIC16TargetObjectFile : public TargetLoweringObjectFile {
- /// SectionsByName - Bindings of names to allocated sections.
- mutable StringMap<PIC16Section*> SectionsByName;
-
- const TargetMachine *TM;
-
- /// Lists of sections.
- /// Standard Data Sections.
- mutable std::vector<PIC16Section *> UDATASections_;
- mutable std::vector<PIC16Section *> IDATASections_;
- mutable PIC16Section * ROMDATASection_;
- mutable PIC16Section * SHAREDUDATASection_;
-
- /// Standard Auto Sections.
- mutable std::vector<PIC16Section *> AUTOSections_;
-
- /// User specified sections.
- mutable std::vector<PIC16Section *> USERSections_;
-
-
- /// Find or Create a PIC16 Section, without adding it to any
- /// section list.
- PIC16Section *getPIC16Section(const std::string &Name,
- PIC16SectionType Ty,
- const std::string &Address = "",
- int Color = -1) const;
-
- /// Convenience functions. These wrappers also take care of adding
- /// the newly created section to the appropriate sections list.
-
- /// Find or Create PIC16 Standard Data Section.
- PIC16Section *getPIC16DataSection(const std::string &Name,
- PIC16SectionType Ty,
- const std::string &Address = "",
- int Color = -1) const;
-
- /// Find or Create PIC16 Standard Auto Section.
- PIC16Section *getPIC16AutoSection(const std::string &Name,
- PIC16SectionType Ty = UDATA_OVR,
- const std::string &Address = "",
- int Color = -1) const;
-
- /// Find or Create PIC16 Standard Auto Section.
- PIC16Section *getPIC16UserSection(const std::string &Name,
- PIC16SectionType Ty,
- const std::string &Address = "",
- int Color = -1) const;
-
- /// Allocate Un-initialized data to a standard UDATA section.
- const MCSection *allocateUDATA(const GlobalVariable *GV) const;
-
- /// Allocate Initialized data to a standard IDATA section.
- const MCSection *allocateIDATA(const GlobalVariable *GV) const;
-
- /// Allocate ROM data to the standard ROMDATA section.
- const MCSection *allocateROMDATA(const GlobalVariable *GV) const;
-
- /// Allocate an AUTO variable to an AUTO section.
- const MCSection *allocateAUTO(const GlobalVariable *GV) const;
-
- /// Allocate DATA in user specified section.
- const MCSection *allocateInGivenSection(const GlobalVariable *GV) const;
-
- /// Allocate DATA at user specified address.
- const MCSection *allocateAtGivenAddress(const GlobalVariable *GV,
- const std::string &Addr) const;
-
- /// Allocate a shared variable to SHARED section.
- const MCSection *allocateSHARED(const GlobalVariable *GV,
- Mangler *Mang) const;
-
- public:
- PIC16TargetObjectFile();
- ~PIC16TargetObjectFile();
- void Initialize(MCContext &Ctx, const TargetMachine &TM);
-
- /// Return the section with the given Name. Null if not found.
- PIC16Section *findPIC16Section(const std::string &Name) const;
-
- /// Override section allocations for user specified sections.
- virtual const MCSection *
- getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler *Mang, const TargetMachine &TM) const;
-
- /// Select sections for Data and Auto variables(globals).
- virtual const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
- SectionKind Kind,
- Mangler *Mang,
- const TargetMachine&) const;
-
-
- /// Return a code section for a function.
- const PIC16Section *SectionForCode (const std::string &FnName,
- bool isISR) const;
-
- /// Return a frame section for a function.
- const PIC16Section *SectionForFrame (const std::string &FnName) const;
-
- /// Accessors for various section lists.
- const std::vector<PIC16Section *> &UDATASections() const {
- return UDATASections_;
- }
- const std::vector<PIC16Section *> &IDATASections() const {
- return IDATASections_;
- }
- const PIC16Section *ROMDATASection() const {
- return ROMDATASection_;
- }
- const PIC16Section *SHAREDUDATASection() const {
- return SHAREDUDATASection_;
- }
- const std::vector<PIC16Section *> &AUTOSections() const {
- return AUTOSections_;
- }
- const std::vector<PIC16Section *> &USERSections() const {
- return USERSections_;
- }
- };
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp b/contrib/llvm/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
deleted file mode 100644
index f1bdb12..0000000
--- a/contrib/llvm/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===-- PIC16TargetInfo.cpp - PIC16 Target Implementation -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PIC16.h"
-#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
-using namespace llvm;
-
-Target llvm::ThePIC16Target, llvm::TheCooperTarget;
-
-extern "C" void LLVMInitializePIC16TargetInfo() {
- RegisterTarget<Triple::pic16> X(ThePIC16Target, "pic16",
- "PIC16 14-bit [experimental]");
-
- RegisterTarget<> Y(TheCooperTarget, "cooper", "PIC16 Cooper [experimental]");
-}
diff --git a/contrib/llvm/lib/Target/PTX/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/CMakeLists.txt
new file mode 100644
index 0000000..331266d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(LLVM_TARGET_DEFINITIONS PTX.td)
+
+tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
+tablegen(PTXGenDAGISel.inc -gen-dag-isel)
+tablegen(PTXGenInstrInfo.inc -gen-instr-desc)
+tablegen(PTXGenInstrNames.inc -gen-instr-enums)
+tablegen(PTXGenRegisterInfo.inc -gen-register-desc)
+tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(PTXGenRegisterNames.inc -gen-register-enums)
+tablegen(PTXGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(PTXCodeGen
+ PTXAsmPrinter.cpp
+ PTXISelDAGToDAG.cpp
+ PTXISelLowering.cpp
+ PTXInstrInfo.cpp
+ PTXFrameLowering.cpp
+ PTXMCAsmInfo.cpp
+ PTXMCAsmStreamer.cpp
+ PTXMFInfoExtract.cpp
+ PTXRegisterInfo.cpp
+ PTXSubtarget.cpp
+ PTXTargetMachine.cpp
+ )
+
+add_subdirectory(TargetInfo)
diff --git a/contrib/llvm/lib/Target/PTX/Makefile b/contrib/llvm/lib/Target/PTX/Makefile
new file mode 100644
index 0000000..2c40d69
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/Makefile
@@ -0,0 +1,26 @@
+##===- lib/Target/PTX/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMPTXCodeGen
+TARGET = PTX
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = PTXGenAsmWriter.inc \
+ PTXGenDAGISel.inc \
+ PTXGenInstrInfo.inc \
+ PTXGenInstrNames.inc \
+ PTXGenRegisterInfo.inc \
+ PTXGenRegisterInfo.h.inc \
+ PTXGenRegisterNames.inc \
+ PTXGenSubtarget.inc
+
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PTX/PTX.h b/contrib/llvm/lib/Target/PTX/PTX.h
new file mode 100644
index 0000000..19385ba
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTX.h
@@ -0,0 +1,49 @@
+//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PTX back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_H
+#define PTX_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class PTXTargetMachine;
+ class FunctionPass;
+
+ namespace PTX {
+ enum StateSpace {
+ GLOBAL = 0, // default to global state space
+ CONSTANT = 1,
+ LOCAL = 2,
+ PARAMETER = 3,
+ SHARED = 4
+ };
+ } // namespace PTX
+
+ FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ extern Target ThePTXTarget;
+} // namespace llvm;
+
+// Defines symbolic names for PTX registers.
+#include "PTXGenRegisterNames.inc"
+
+// Defines symbolic names for the PTX instructions.
+#include "PTXGenInstrNames.inc"
+
+#endif // PTX_H
diff --git a/contrib/llvm/lib/Target/PTX/PTX.td b/contrib/llvm/lib/Target/PTX/PTX.td
new file mode 100644
index 0000000..8b1a1b1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTX.td
@@ -0,0 +1,54 @@
+//===- PTX.td - Describe the PTX Target Machine ---------------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the PTX target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+//===----------------------------------------------------------------------===//
+
+def FeatureSM20 : SubtargetFeature<"sm20", "is_sm20", "true",
+ "Enable sm_20 target architecture">;
+
+//===----------------------------------------------------------------------===//
+// PTX supported processors.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PTXRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrInfo.td"
+
+def PTXInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def PTX : Target {
+ let InstructionSet = PTXInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
new file mode 100644
index 0000000..a605997
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -0,0 +1,347 @@
+//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-asm-printer"
+
+#include "PTX.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<std::string>
+OptPTXVersion("ptx-version", cl::desc("Set PTX version"), cl::init("1.4"));
+
+static cl::opt<std::string>
+OptPTXTarget("ptx-target", cl::desc("Set GPU target (comma-separated list)"),
+ cl::init("sm_10"));
+
+namespace {
+class PTXAsmPrinter : public AsmPrinter {
+public:
+ explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ const char *getPassName() const { return "PTX Assembly Printer"; }
+
+ bool doFinalization(Module &M);
+
+ virtual void EmitStartOfAsmFile(Module &M);
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); }
+
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+ const char *Modifier = 0);
+ void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+ const char *Modifier = 0);
+
+ // autogen'd.
+ void printInstruction(const MachineInstr *MI, raw_ostream &OS);
+ static const char *getRegisterName(unsigned RegNo);
+
+private:
+ void EmitVariableDeclaration(const GlobalVariable *gv);
+ void EmitFunctionDeclaration();
+}; // class PTXAsmPrinter
+} // namespace
+
+static const char PARAM_PREFIX[] = "__param_";
+
+static const char *getRegisterTypeName(unsigned RegNo) {
+#define TEST_REGCLS(cls, clsstr) \
+ if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
+ TEST_REGCLS(RRegs32, s32);
+ TEST_REGCLS(Preds, pred);
+#undef TEST_REGCLS
+
+ llvm_unreachable("Not in any register class!");
+ return NULL;
+}
+
+static const char *getInstructionTypeName(const MachineInstr *MI) {
+ for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.getType() == MachineOperand::MO_Register)
+ return getRegisterTypeName(MO.getReg());
+ }
+
+ llvm_unreachable("No reg operand found in instruction!");
+ return NULL;
+}
+
+static const char *getStateSpaceName(unsigned addressSpace) {
+ switch (addressSpace) {
+ default: llvm_unreachable("Unknown state space");
+ case PTX::GLOBAL: return "global";
+ case PTX::CONSTANT: return "const";
+ case PTX::LOCAL: return "local";
+ case PTX::PARAMETER: return "param";
+ case PTX::SHARED: return "shared";
+ }
+ return NULL;
+}
+
+bool PTXAsmPrinter::doFinalization(Module &M) {
+ // XXX Temproarily remove global variables so that doFinalization() will not
+ // emit them again (global variables are emitted at beginning).
+
+ Module::GlobalListType &global_list = M.getGlobalList();
+ int i, n = global_list.size();
+ GlobalVariable **gv_array = new GlobalVariable* [n];
+
+ // first, back-up GlobalVariable in gv_array
+ i = 0;
+ for (Module::global_iterator I = global_list.begin(), E = global_list.end();
+ I != E; ++I)
+ gv_array[i++] = &*I;
+
+ // second, empty global_list
+ while (!global_list.empty())
+ global_list.remove(global_list.begin());
+
+ // call doFinalization
+ bool ret = AsmPrinter::doFinalization(M);
+
+ // now we restore global variables
+ for (i = 0; i < n; i ++)
+ global_list.insert(global_list.end(), gv_array[i]);
+
+ delete[] gv_array;
+ return ret;
+}
+
+void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
+{
+ OutStreamer.EmitRawText(Twine("\t.version " + OptPTXVersion));
+ OutStreamer.EmitRawText(Twine("\t.target " + OptPTXTarget));
+ OutStreamer.AddBlankLine();
+
+ // declare global variables
+ for (Module::const_global_iterator i = M.global_begin(), e = M.global_end();
+ i != e; ++i)
+ EmitVariableDeclaration(i);
+}
+
+bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SetupMachineFunction(MF);
+ EmitFunctionDeclaration();
+ EmitFunctionBody();
+ return false;
+}
+
+void PTXAsmPrinter::EmitFunctionBodyStart() {
+ OutStreamer.EmitRawText(Twine("{"));
+
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+
+ // Print local variable definition
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) {
+ unsigned reg = *i;
+
+ std::string def = "\t.reg .";
+ def += getRegisterTypeName(reg);
+ def += ' ';
+ def += getRegisterName(reg);
+ def += ';';
+ OutStreamer.EmitRawText(Twine(def));
+ }
+}
+
+void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ std::string str;
+ str.reserve(64);
+
+ // Write instruction to str
+ raw_string_ostream OS(str);
+ printInstruction(MI, OS);
+ OS << ';';
+ OS.flush();
+
+ // Replace "%type" if found
+ size_t pos;
+ if ((pos = str.find("%type")) != std::string::npos)
+ str.replace(pos, /*strlen("%type")==*/5, getInstructionTypeName(MI));
+
+ StringRef strref = StringRef(str);
+ OutStreamer.EmitRawText(strref);
+}
+
+void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &OS) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("<unknown operand type>");
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ OS << *Mang->getSymbol(MO.getGlobal());
+ break;
+ case MachineOperand::MO_Immediate:
+ OS << (int) MO.getImm();
+ break;
+ case MachineOperand::MO_Register:
+ OS << getRegisterName(MO.getReg());
+ break;
+ }
+}
+
+void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &OS, const char *Modifier) {
+ printOperand(MI, opNum, OS);
+
+ if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+ return; // don't print "+0"
+
+ OS << "+";
+ printOperand(MI, opNum+1, OS);
+}
+
+void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &OS, const char *Modifier) {
+ OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+}
+
+void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(gv))
+ return;
+
+ MCSymbol *gvsym = Mang->getSymbol(gv);
+
+ assert(gvsym->isUndefined() && "Cannot define a symbol twice!");
+
+ std::string decl;
+
+ // check if it is defined in some other translation unit
+ if (gv->isDeclaration())
+ decl += ".extern ";
+
+ // state space: e.g., .global
+ decl += ".";
+ decl += getStateSpaceName(gv->getType()->getAddressSpace());
+ decl += " ";
+
+ // alignment (optional)
+ unsigned alignment = gv->getAlignment();
+ if (alignment != 0) {
+ decl += ".align ";
+ decl += utostr(Log2_32(gv->getAlignment()));
+ decl += " ";
+ }
+
+ // TODO: add types
+ decl += ".s32 ";
+
+ decl += gvsym->getName();
+
+ if (ArrayType::classof(gv->getType()) || PointerType::classof(gv->getType()))
+ decl += "[]";
+
+ decl += ";";
+
+ OutStreamer.EmitRawText(Twine(decl));
+
+ OutStreamer.AddBlankLine();
+}
+
+void PTXAsmPrinter::EmitFunctionDeclaration() {
+ // The function label could have already been emitted if two symbols end up
+ // conflicting due to asm renaming. Detect this and emit an error.
+ if (!CurrentFnSym->isUndefined()) {
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' label emitted multiple times to assembly file");
+ return;
+ }
+
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const bool isKernel = MFI->isKernel();
+ unsigned reg;
+
+ std::string decl = isKernel ? ".entry" : ".func";
+
+ // Print return register
+ reg = MFI->retReg();
+ if (!isKernel && reg != PTX::NoRegister) {
+ decl += " (.reg ."; // FIXME: could it return in .param space?
+ decl += getRegisterTypeName(reg);
+ decl += " ";
+ decl += getRegisterName(reg);
+ decl += ")";
+ }
+
+ // Print function name
+ decl += " ";
+ decl += CurrentFnSym->getName().str();
+
+ // Print parameter list
+ if (!MFI->argRegEmpty()) {
+ decl += " (";
+ if (isKernel) {
+ for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
+ if (i != 0)
+ decl += ", ";
+ decl += ".param .s32 "; // TODO: add types
+ decl += PARAM_PREFIX;
+ decl += utostr(i + 1);
+ }
+ } else {
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; i != e; ++i) {
+ reg = *i;
+ assert(reg != PTX::NoRegister && "Not a valid register!");
+ if (i != b)
+ decl += ", ";
+ decl += ".reg .";
+ decl += getRegisterTypeName(reg);
+ decl += " ";
+ decl += getRegisterName(reg);
+ }
+ }
+ decl += ")";
+ }
+
+ OutStreamer.EmitRawText(Twine(decl));
+}
+
+#include "PTXGenAsmWriter.inc"
+
+// Force static initialization.
+extern "C" void LLVMInitializePTXAsmPrinter() {
+ RegisterAsmPrinter<PTXAsmPrinter> X(ThePTXTarget);
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp
new file mode 100644
index 0000000..b621b9d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp
@@ -0,0 +1,24 @@
+//=======- PTXFrameLowering.cpp - PTX Frame Information -------*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXFrameLowering.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+using namespace llvm;
+
+void PTXFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+
+void PTXFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h
new file mode 100644
index 0000000..574ae7a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h
@@ -0,0 +1,43 @@
+//===--- PTXFrameLowering.h - Define frame lowering for PTX --*- C++ -*----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_FRAMEINFO_H
+#define PTX_FRAMEINFO_H
+
+#include "PTX.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class PTXSubtarget;
+
+class PTXFrameLowering : public TargetFrameLowering {
+protected:
+ const PTXSubtarget &STI;
+
+public:
+ explicit PTXFrameLowering(const PTXSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
new file mode 100644
index 0000000..efb0e8b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -0,0 +1,151 @@
+//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/DerivedTypes.h"
+
+using namespace llvm;
+
+namespace {
+// PTXDAGToDAGISel - PTX specific code to select PTX machine
+// instructions for SelectionDAG operations.
+class PTXDAGToDAGISel : public SelectionDAGISel {
+ public:
+ PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+ virtual const char *getPassName() const {
+ return "PTX DAG->DAG Pattern Instruction Selection";
+ }
+
+ SDNode *Select(SDNode *Node);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
+ bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
+
+ // Include the pieces auto'gened from the target description
+#include "PTXGenDAGISel.inc"
+
+ private:
+ SDNode *SelectREAD_PARAM(SDNode *Node);
+
+ bool isImm(const SDValue &operand);
+ bool SelectImm(const SDValue &operand, SDValue &imm);
+}; // class PTXDAGToDAGISel
+} // namespace
+
+// createPTXISelDag - This pass converts a legalized DAG into a
+// PTX-specific DAG, ready for instruction scheduling
+FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXDAGToDAGISel(TM, OptLevel);
+}
+
+PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel) {}
+
+SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
+ if (Node->getOpcode() == PTXISD::READ_PARAM)
+ return SelectREAD_PARAM(Node);
+ else
+ return SelectCode(Node);
+}
+
+SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) {
+ SDValue index = Node->getOperand(1);
+ DebugLoc dl = Node->getDebugLoc();
+
+ if (index.getOpcode() != ISD::TargetConstant)
+ llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant");
+
+ return PTXInstrInfo::
+ GetPTXMachineNode(CurDAG, PTX::LDpi, dl, MVT::i32, index);
+}
+
+// Match memory operand of the form [reg+reg]
+bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
+ if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
+ isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1)))
+ return false;
+
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
+bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() != ISD::ADD) {
+ // let SelectADDRii handle the [imm] case
+ if (isImm(Addr))
+ return false;
+ // it is [reg]
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (Addr.getNumOperands() < 2)
+ return false;
+
+ // let SelectADDRii handle the [imm+imm] case
+ if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
+ return false;
+
+ // try [reg+imm] and [imm+reg]
+ for (int i = 0; i < 2; i ++)
+ if (SelectImm(Addr.getOperand(1-i), Offset)) {
+ Base = Addr.getOperand(i);
+ return true;
+ }
+
+ // neither [reg+imm] nor [imm+reg]
+ return false;
+}
+
+// Match memory operand of the form [imm+imm] and [imm]
+bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ // is [imm+imm]?
+ if (Addr.getOpcode() == ISD::ADD) {
+ return SelectImm(Addr.getOperand(0), Base) &&
+ SelectImm(Addr.getOperand(1), Offset);
+ }
+
+ // is [imm]?
+ if (SelectImm(Addr, Base)) {
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
+ return ConstantSDNode::classof(operand.getNode());
+}
+
+bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) {
+ SDNode *node = operand.getNode();
+ if (!ConstantSDNode::classof(node))
+ return false;
+
+ ConstantSDNode *CN = cast<ConstantSDNode>(node);
+ imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), MVT::i32);
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
new file mode 100644
index 0000000..e6d4490
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
@@ -0,0 +1,210 @@
+//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXISelLowering.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+using namespace llvm;
+
+PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ // Set up the register classes.
+ addRegisterClass(MVT::i1, PTX::PredsRegisterClass);
+ addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+
+ // Customize translation of memory addresses
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+}
+
+SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unimplemented operand");
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ }
+}
+
+const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unknown opcode");
+ case PTXISD::READ_PARAM:
+ return "PTXISD::READ_PARAM";
+ case PTXISD::EXIT:
+ return "PTXISD::EXIT";
+ case PTXISD::RET:
+ return "PTXISD::RET";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Custom Lower Operation
+//===----------------------------------------------------------------------===//
+
+SDValue PTXTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ return DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct argmap_entry {
+ MVT::SimpleValueType VT;
+ TargetRegisterClass *RC;
+ TargetRegisterClass::iterator loc;
+
+ argmap_entry(MVT::SimpleValueType _VT, TargetRegisterClass *_RC)
+ : VT(_VT), RC(_RC), loc(_RC->begin()) {}
+
+ void reset() { loc = RC->begin(); }
+ bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
+} argmap[] = {
+ argmap_entry(MVT::i1, PTX::PredsRegisterClass),
+ argmap_entry(MVT::i32, PTX::RRegs32RegisterClass)
+};
+} // end anonymous namespace
+
+SDValue PTXTargetLowering::
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ break;
+ case CallingConv::PTX_Kernel:
+ MFI->setKernel(true);
+ break;
+ case CallingConv::PTX_Device:
+ MFI->setKernel(false);
+ break;
+ }
+
+ // Make sure we don't add argument registers twice
+ if (MFI->isDoneAddArg())
+ llvm_unreachable("cannot add argument registers twice");
+
+ // Reset argmap before allocation
+ for (struct argmap_entry *i = argmap, *e = argmap + array_lengthof(argmap);
+ i != e; ++ i)
+ i->reset();
+
+ for (int i = 0, e = Ins.size(); i != e; ++ i) {
+ MVT::SimpleValueType VT = Ins[i].VT.SimpleTy;
+
+ struct argmap_entry *entry = std::find(argmap,
+ argmap + array_lengthof(argmap), VT);
+ if (entry == argmap + array_lengthof(argmap))
+ llvm_unreachable("Type of argument is not supported");
+
+ if (MFI->isKernel() && entry->RC == PTX::PredsRegisterClass)
+ llvm_unreachable("cannot pass preds to kernel");
+
+ MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+
+ unsigned preg = *++(entry->loc); // allocate start from register 1
+ unsigned vreg = RegInfo.createVirtualRegister(entry->RC);
+ RegInfo.addLiveIn(preg, vreg);
+
+ MFI->addArgReg(preg);
+
+ SDValue inval;
+ if (MFI->isKernel())
+ inval = DAG.getNode(PTXISD::READ_PARAM, dl, VT, Chain,
+ DAG.getTargetConstant(i, MVT::i32));
+ else
+ inval = DAG.getCopyFromReg(Chain, dl, vreg, VT);
+ InVals.push_back(inval);
+ }
+
+ MFI->doneAddArg();
+
+ return Chain;
+}
+
+SDValue PTXTargetLowering::
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl,
+ SelectionDAG &DAG) const {
+ if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention.");
+ case CallingConv::PTX_Kernel:
+ assert(Outs.size() == 0 && "Kernel must return void.");
+ return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
+ case CallingConv::PTX_Device:
+ assert(Outs.size() <= 1 && "Can at most return one value.");
+ break;
+ }
+
+ // PTX_Device
+
+ // return void
+ if (Outs.size() == 0)
+ return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+
+ assert(Outs[0].VT == MVT::i32 && "Can return only basic types");
+
+ SDValue Flag;
+ unsigned reg = PTX::R0;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ MFI->setRetReg(reg);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(reg);
+
+ // Copy the result values into the output registers
+ Chain = DAG.getCopyToReg(Chain, dl, reg, OutVals[0], Flag);
+
+ // Guarantee that all emitted copies are stuck together,
+ // avoiding something bad
+ Flag = Chain.getValue(1);
+
+ return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
new file mode 100644
index 0000000..b03a9f6
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
@@ -0,0 +1,67 @@
+//==-- PTXISelLowering.h - PTX DAG Lowering Interface ------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_ISEL_LOWERING_H
+#define PTX_ISEL_LOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+class PTXSubtarget;
+class PTXTargetMachine;
+
+namespace PTXISD {
+ enum NodeType {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ READ_PARAM,
+ EXIT,
+ RET
+ };
+} // namespace PTXISD
+
+class PTXTargetLowering : public TargetLowering {
+ public:
+ explicit PTXTargetLowering(TargetMachine &TM);
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual unsigned getFunctionAlignment(const Function *F) const {
+ return 2; }
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl,
+ SelectionDAG &DAG) const;
+
+ private:
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+}; // class PTXTargetLowering
+} // namespace llvm
+
+#endif // PTX_ISEL_LOWERING_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
new file mode 100644
index 0000000..e4e0999
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
@@ -0,0 +1,24 @@
+//===- PTXInstrFormats.td - PTX Instruction Formats ----------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Predicate operand, default to (0, 0) = (zero-reg, always).
+// Leave PrintMethod empty; predicate printing is defined elsewhere.
+def pred : PredicateOperand<OtherVT, (ops Preds, i32imm),
+ (ops (i1 zero_reg), (i32 0))>;
+
+let Namespace = "PTX" in {
+ class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
+ : Instruction {
+ dag OutOperandList = oops;
+ dag InOperandList = !con(iops, (ins pred:$_p));
+ let AsmString = asmstr; // Predicate printing is defined elsewhere.
+ let Pattern = pattern;
+ let isPredicable = 1;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
new file mode 100644
index 0000000..805759b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
@@ -0,0 +1,87 @@
+//===- PTXInstrInfo.cpp - PTX Instruction Information ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#include "PTXGenInstrInfo.inc"
+
+PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
+ : TargetInstrInfoImpl(PTXInsts, array_lengthof(PTXInsts)),
+ RI(_TM, *this), TM(_TM) {}
+
+static const struct map_entry {
+ const TargetRegisterClass *cls;
+ const int opcode;
+} map[] = {
+ { &PTX::RRegs32RegClass, PTX::MOVrr },
+ { &PTX::PredsRegClass, PTX::MOVpp }
+};
+
+void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg, unsigned SrcReg,
+ bool KillSrc) const {
+ for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
+ if (PTX::RRegs32RegClass.contains(DstReg, SrcReg)) {
+ BuildMI(MBB, I, DL,
+ get(PTX::MOVrr), DstReg).addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
+ if (DstRC != SrcRC)
+ return false;
+
+ for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
+ if (DstRC == map[i].cls) {
+ MachineInstr *MI = BuildMI(MBB, I, DL, get(map[i].opcode),
+ DstReg).addReg(SrcReg);
+ if (MI->findFirstPredOperandIdx() == -1) {
+ MI->addOperand(MachineOperand::CreateReg(0, false));
+ MI->addOperand(MachineOperand::CreateImm(/*IsInv=*/0));
+ }
+ return true;
+ }
+
+ return false;
+}
+
+bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case PTX::MOVpp:
+ case PTX::MOVrr:
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
+ "Invalid register-register move instruction");
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h
new file mode 100644
index 0000000..e7f00f0
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h
@@ -0,0 +1,75 @@
+//===- PTXInstrInfo.h - PTX Instruction Information -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_INSTR_INFO_H
+#define PTX_INSTR_INFO_H
+
+#include "PTXRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+class PTXTargetMachine;
+
+class PTXInstrInfo : public TargetInstrInfoImpl {
+ private:
+ const PTXRegisterInfo RI;
+ PTXTargetMachine &TM;
+
+ public:
+ explicit PTXInstrInfo(PTXTargetMachine &_TM);
+
+ virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
+
+ virtual bool isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ // static helper routines
+
+ static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT,
+ SDValue Op1) {
+ SDValue pred_reg = DAG->getRegister(0, MVT::i1);
+ SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32);
+ SDValue ops[] = { Op1, pred_reg, pred_imm };
+ return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+ }
+
+ static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT,
+ SDValue Op1,
+ SDValue Op2) {
+ SDValue pred_reg = DAG->getRegister(0, MVT::i1);
+ SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32);
+ SDValue ops[] = { Op1, Op2, pred_reg, pred_imm };
+ return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+ }
+
+ }; // class PTXInstrInfo
+} // namespace llvm
+
+#endif // PTX_INSTR_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
new file mode 100644
index 0000000..9a74778
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
@@ -0,0 +1,257 @@
+//===- PTXInstrInfo.td - PTX Instruction defs -----------------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::GLOBAL;
+ return false;
+}]>;
+
+def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::CONSTANT;
+ return false;
+}]>;
+
+def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::LOCAL;
+ return false;
+}]>;
+
+def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::PARAMETER;
+ return false;
+}]>;
+
+def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::SHARED;
+ return false;
+}]>;
+
+def store_global
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::GLOBAL;
+ return false;
+}]>;
+
+def store_local
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::LOCAL;
+ return false;
+}]>;
+
+def store_parameter
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::PARAMETER;
+ return false;
+}]>;
+
+def store_shared
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTX::SHARED;
+ return false;
+}]>;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
+def ADDRii : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+
+// Address operands
+def MEMri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RRegs32, i32imm);
+}
+def MEMii : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+def MEMpi : Operand<i32> {
+ let PrintMethod = "printParamOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// PTX Specific Node Definitions
+//===----------------------------------------------------------------------===//
+
+// PTX allow generic 3-reg shifts like shl r0, r1, r2
+def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>;
+def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>;
+def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>;
+
+def PTXexit
+ : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
+def PTXret
+ : SDNode<"PTXISD::RET", SDTNone, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+multiclass INT3<string opcstr, SDNode opnode> {
+ def rr : InstPTX<(outs RRegs32:$d),
+ (ins RRegs32:$a, RRegs32:$b),
+ !strconcat(opcstr, ".%type\t$d, $a, $b"),
+ [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
+ def ri : InstPTX<(outs RRegs32:$d),
+ (ins RRegs32:$a, i32imm:$b),
+ !strconcat(opcstr, ".%type\t$d, $a, $b"),
+ [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
+}
+
+// no %type directive, non-communtable
+multiclass INT3ntnc<string opcstr, SDNode opnode> {
+ def rr : InstPTX<(outs RRegs32:$d),
+ (ins RRegs32:$a, RRegs32:$b),
+ !strconcat(opcstr, "\t$d, $a, $b"),
+ [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
+ def ri : InstPTX<(outs RRegs32:$d),
+ (ins RRegs32:$a, i32imm:$b),
+ !strconcat(opcstr, "\t$d, $a, $b"),
+ [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
+ def ir : InstPTX<(outs RRegs32:$d),
+ (ins i32imm:$a, RRegs32:$b),
+ !strconcat(opcstr, "\t$d, $a, $b"),
+ [(set RRegs32:$d, (opnode imm:$a, RRegs32:$b))]>;
+}
+
+multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> {
+ def rr : InstPTX<(outs RC:$d),
+ (ins MEMri:$a),
+ !strconcat(opstr, ".%type\t$d, [$a]"),
+ [(set RC:$d, (pat_load ADDRrr:$a))]>;
+ def ri : InstPTX<(outs RC:$d),
+ (ins MEMri:$a),
+ !strconcat(opstr, ".%type\t$d, [$a]"),
+ [(set RC:$d, (pat_load ADDRri:$a))]>;
+ def ii : InstPTX<(outs RC:$d),
+ (ins MEMii:$a),
+ !strconcat(opstr, ".%type\t$d, [$a]"),
+ [(set RC:$d, (pat_load ADDRii:$a))]>;
+}
+
+multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
+ def rr : InstPTX<(outs),
+ (ins RC:$d, MEMri:$a),
+ !strconcat(opstr, ".%type\t[$a], $d"),
+ [(pat_store RC:$d, ADDRrr:$a)]>;
+ def ri : InstPTX<(outs),
+ (ins RC:$d, MEMri:$a),
+ !strconcat(opstr, ".%type\t[$a], $d"),
+ [(pat_store RC:$d, ADDRri:$a)]>;
+ def ii : InstPTX<(outs),
+ (ins RC:$d, MEMii:$a),
+ !strconcat(opstr, ".%type\t[$a], $d"),
+ [(pat_store RC:$d, ADDRii:$a)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+///===- Integer Arithmetic Instructions -----------------------------------===//
+
+defm ADD : INT3<"add", add>;
+defm SUB : INT3<"sub", sub>;
+
+///===- Logic and Shift Instructions --------------------------------------===//
+
+defm SHL : INT3ntnc<"shl.b32", PTXshl>;
+defm SRL : INT3ntnc<"shr.u32", PTXsrl>;
+defm SRA : INT3ntnc<"shr.s32", PTXsra>;
+
+///===- Data Movement and Conversion Instructions -------------------------===//
+
+let neverHasSideEffects = 1 in {
+ // rely on isMoveInstr to separate MOVpp, MOVrr, etc.
+ def MOVpp
+ : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
+ def MOVrr
+ : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def MOVpi
+ : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
+ [(set Preds:$d, imm:$a)]>;
+ def MOVri
+ : InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a",
+ [(set RRegs32:$d, imm:$a)]>;
+}
+
+defm LDg : PTX_LD<"ld.global", RRegs32, load_global>;
+defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>;
+defm LDl : PTX_LD<"ld.local", RRegs32, load_local>;
+defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>;
+defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>;
+
+def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a),
+ "ld.param.%type\t$d, [$a]", []>;
+
+defm STg : PTX_ST<"st.global", RRegs32, store_global>;
+defm STl : PTX_ST<"st.local", RRegs32, store_local>;
+// Store to parameter state space requires PTX 2.0 or higher?
+// defm STp : PTX_ST<"st.param", RRegs32, store_parameter>;
+defm STs : PTX_ST<"st.shared", RRegs32, store_shared>;
+
+///===- Control Flow Instructions -----------------------------------------===//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
+ def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmInfo.cpp
new file mode 100644
index 0000000..b670abd
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmInfo.cpp
@@ -0,0 +1,30 @@
+//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the PTXMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCAsmInfo.h"
+
+using namespace llvm;
+
+PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
+ CommentString = "//";
+
+ PrivateGlobalPrefix = "$L__";
+
+ AllowPeriodsInName = false;
+
+ HasSetDirective = false;
+
+ HasDotTypeDotSizeDirective = false;
+
+ HasSingleParameterDotFile = false;
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmInfo.h b/contrib/llvm/lib/Target/PTX/PTXMCAsmInfo.h
new file mode 100644
index 0000000..03f5d66
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmInfo.h
@@ -0,0 +1,28 @@
+//=====-- PTXMCAsmInfo.h - PTX asm properties -----------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the PTXMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MCASM_INFO_H
+#define PTX_MCASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+ class StringRef;
+
+ struct PTXMCAsmInfo : public MCAsmInfo {
+ explicit PTXMCAsmInfo(const Target &T, const StringRef &TT);
+ };
+} // namespace llvm
+
+#endif // PTX_MCASM_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
new file mode 100644
index 0000000..0886ba8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -0,0 +1,542 @@
+//===- lib/Target/PTX/PTXMCAsmStreamer.cpp - PTX Text Assembly Output -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+
+using namespace llvm;
+
+namespace {
+class PTXMCAsmStreamer : public MCStreamer {
+ formatted_raw_ostream &OS;
+ const MCAsmInfo &MAI;
+ OwningPtr<MCInstPrinter> InstPrinter;
+ OwningPtr<MCCodeEmitter> Emitter;
+
+ SmallString<128> CommentToEmit;
+ raw_svector_ostream CommentStream;
+
+ unsigned IsVerboseAsm : 1;
+ unsigned ShowInst : 1;
+
+public:
+ PTXMCAsmStreamer(MCContext &Context,
+ formatted_raw_ostream &os,
+ bool isVerboseAsm, bool useLoc,
+ MCInstPrinter *printer,
+ MCCodeEmitter *emitter,
+ bool showInst)
+ : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
+ InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
+ IsVerboseAsm(isVerboseAsm),
+ ShowInst(showInst) {
+ if (InstPrinter && IsVerboseAsm)
+ InstPrinter->setCommentStream(CommentStream);
+ }
+
+ ~PTXMCAsmStreamer() {}
+
+ inline void EmitEOL() {
+ // If we don't have any comments, just emit a \n.
+ if (!IsVerboseAsm) {
+ OS << '\n';
+ return;
+ }
+ EmitCommentsAndEOL();
+ }
+ void EmitCommentsAndEOL();
+
+ /// isVerboseAsm - Return true if this streamer supports verbose assembly at
+ /// all.
+ virtual bool isVerboseAsm() const { return IsVerboseAsm; }
+
+ /// hasRawTextSupport - We support EmitRawText.
+ virtual bool hasRawTextSupport() const { return true; }
+
+ /// AddComment - Add a comment that can be emitted to the generated .s
+ /// file if applicable as a QoI issue to make the output of the compiler
+ /// more readable. This only affects the MCAsmStreamer, and only when
+ /// verbose assembly output is enabled.
+ virtual void AddComment(const Twine &T);
+
+ /// AddEncodingComment - Add a comment showing the encoding of an instruction.
+ virtual void AddEncodingComment(const MCInst &Inst);
+
+ /// GetCommentOS - Return a raw_ostream that comments can be written to.
+ /// Unlike AddComment, you are required to terminate comments with \n if you
+ /// use this method.
+ virtual raw_ostream &GetCommentOS() {
+ if (!IsVerboseAsm)
+ return nulls(); // Discard comments unless in verbose asm mode.
+ return CommentStream;
+ }
+
+ /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
+ virtual void AddBlankLine() {
+ EmitEOL();
+ }
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void ChangeSection(const MCSection *Section);
+ virtual void InitSections() {}
+
+ virtual void EmitLabel(MCSymbol *Symbol);
+
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+
+ virtual void EmitThumbFunc(MCSymbol *Func);
+
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label);
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+ virtual void EmitCOFFSymbolType(int Type);
+ virtual void EndCOFFSymbolDef();
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+
+ /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+ ///
+ /// @param Symbol - The common symbol to emit.
+ /// @param Size - The size of the common symbol.
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0);
+
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0);
+
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ bool isPCRel, unsigned AddrSpace);
+ virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+ virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+ virtual void EmitGPRel32Value(const MCExpr *Value);
+
+
+ virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace);
+
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual void EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0);
+
+ virtual void EmitFileDirective(StringRef Filename);
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+
+ virtual void EmitInstruction(const MCInst &Inst);
+
+ /// EmitRawText - If this file is backed by an assembly streamer, this dumps
+ /// the specified string in the output .s file. This capability is
+ /// indicated by the hasRawTextSupport() predicate.
+ virtual void EmitRawText(StringRef String);
+
+ virtual void Finish();
+
+ /// @}
+
+}; // class PTXMCAsmStreamer
+
+}
+
+/// TODO: Add appropriate implementation of Emit*() methods when needed
+
+void PTXMCAsmStreamer::AddComment(const Twine &T) {
+ if (!IsVerboseAsm) return;
+
+ // Make sure that CommentStream is flushed.
+ CommentStream.flush();
+
+ T.toVector(CommentToEmit);
+ // Each comment goes on its own line.
+ CommentToEmit.push_back('\n');
+
+ // Tell the comment stream that the vector changed underneath it.
+ CommentStream.resync();
+}
+
+void PTXMCAsmStreamer::EmitCommentsAndEOL() {
+ if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
+ OS << '\n';
+ return;
+ }
+
+ CommentStream.flush();
+ StringRef Comments = CommentToEmit.str();
+
+ assert(Comments.back() == '\n' &&
+ "Comment array not newline terminated");
+ do {
+ // Emit a line of comments.
+ OS.PadToColumn(MAI.getCommentColumn());
+ size_t Position = Comments.find('\n');
+ OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
+
+ Comments = Comments.substr(Position+1);
+ } while (!Comments.empty());
+
+ CommentToEmit.clear();
+ // Tell the comment stream that the vector changed underneath it.
+ CommentStream.resync();
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+ assert(Bytes && "Invalid size!");
+ return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
+ assert(Section && "Cannot switch to a null section!");
+}
+
+void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+
+ OS << *Symbol << MAI.getLabelSuffix();
+ EmitEOL();
+ Symbol->setSection(*getCurrentSection());
+}
+
+void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+
+void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {}
+
+void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ OS << *Symbol << " = " << *Value;
+ EmitEOL();
+
+ // FIXME: Lift context changes into super class.
+ Symbol->setVariableValue(Value);
+}
+
+void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias,
+ const MCSymbol *Symbol) {
+ OS << ".weakref " << *Alias << ", " << *Symbol;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label) {
+ report_fatal_error("Unimplemented.");
+}
+
+void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {}
+
+void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {}
+
+void PTXMCAsmStreamer::EndCOFFSymbolDef() {}
+
+void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+
+void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+
+void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ unsigned Size, unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section,
+ MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {}
+
+static inline char toOctal(int X) { return (X&7)+'0'; }
+
+static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
+ OS << '"';
+
+ for (unsigned i = 0, e = Data.size(); i != e; ++i) {
+ unsigned char C = Data[i];
+ if (C == '"' || C == '\\') {
+ OS << '\\' << (char)C;
+ continue;
+ }
+
+ if (isprint((unsigned char)C)) {
+ OS << (char)C;
+ continue;
+ }
+
+ switch (C) {
+ case '\b': OS << "\\b"; break;
+ case '\f': OS << "\\f"; break;
+ case '\n': OS << "\\n"; break;
+ case '\r': OS << "\\r"; break;
+ case '\t': OS << "\\t"; break;
+ default:
+ OS << '\\';
+ OS << toOctal(C >> 6);
+ OS << toOctal(C >> 3);
+ OS << toOctal(C >> 0);
+ break;
+ }
+ }
+
+ OS << '"';
+}
+
+void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ if (Data.empty()) return;
+
+ if (Data.size() == 1) {
+ OS << MAI.getData8bitsDirective(AddrSpace);
+ OS << (unsigned)(unsigned char)Data[0];
+ EmitEOL();
+ return;
+ }
+
+ // If the data ends with 0 and the target supports .asciz, use it, otherwise
+ // use .ascii
+ if (MAI.getAscizDirective() && Data.back() == 0) {
+ OS << MAI.getAscizDirective();
+ Data = Data.substr(0, Data.size()-1);
+ } else {
+ OS << MAI.getAsciiDirective();
+ }
+
+ OS << ' ';
+ PrintQuotedString(Data, OS);
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ bool isPCRel, unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ assert(!isPCRel && "Cannot emit pc relative relocations!");
+ const char *Directive = 0;
+ switch (Size) {
+ default: break;
+ case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
+ case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
+ case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
+ case 8:
+ Directive = MAI.getData64bitsDirective(AddrSpace);
+ // If the target doesn't support 64-bit data, emit as two 32-bit halves.
+ if (Directive) break;
+ int64_t IntValue;
+ if (!Value->EvaluateAsAbsolute(IntValue))
+ report_fatal_error("Don't know how to emit this value.");
+ if (getContext().getTargetAsmInfo().isLittleEndian()) {
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ } else {
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ }
+ return;
+ }
+
+ assert(Directive && "Invalid size for machine code value!");
+ OS << Directive << *Value;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value,
+ unsigned AddrSpace) {
+ assert(MAI.hasLEB128() && "Cannot print a .uleb");
+ OS << ".uleb128 " << *Value;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value,
+ unsigned AddrSpace) {
+ assert(MAI.hasLEB128() && "Cannot print a .sleb");
+ OS << ".sleb128 " << *Value;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
+ assert(MAI.getGPRel32Directive() != 0);
+ OS << MAI.getGPRel32Directive() << *Value;
+ EmitEOL();
+}
+
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue. This implements directives such as '.space'.
+void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace) {
+ if (NumBytes == 0) return;
+
+ if (AddrSpace == 0)
+ if (const char *ZeroDirective = MAI.getZeroDirective()) {
+ OS << ZeroDirective << NumBytes;
+ if (FillValue != 0)
+ OS << ',' << (int)FillValue;
+ EmitEOL();
+ return;
+ }
+
+ // Emit a byte at a time.
+ MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
+}
+
+void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // Some assemblers don't support non-power of two alignments, so we always
+ // emit alignments as a power of two if possible.
+ if (isPowerOf2_32(ByteAlignment)) {
+ switch (ValueSize) {
+ default: llvm_unreachable("Invalid size for machine code value!");
+ case 1: OS << MAI.getAlignDirective(); break;
+ // FIXME: use MAI for this!
+ case 2: OS << ".p2alignw "; break;
+ case 4: OS << ".p2alignl "; break;
+ case 8: llvm_unreachable("Unsupported alignment size!");
+ }
+
+ if (MAI.getAlignmentIsInBytes())
+ OS << ByteAlignment;
+ else
+ OS << Log2_32(ByteAlignment);
+
+ if (Value || MaxBytesToEmit) {
+ OS << ", 0x";
+ OS.write_hex(truncateToSize(Value, ValueSize));
+
+ if (MaxBytesToEmit)
+ OS << ", " << MaxBytesToEmit;
+ }
+ EmitEOL();
+ return;
+ }
+
+ // Non-power of two alignment. This is not widely supported by assemblers.
+ // FIXME: Parameterize this based on MAI.
+ switch (ValueSize) {
+ default: llvm_unreachable("Invalid size for machine code value!");
+ case 1: OS << ".balign"; break;
+ case 2: OS << ".balignw"; break;
+ case 4: OS << ".balignl"; break;
+ case 8: llvm_unreachable("Unsupported alignment size!");
+ }
+
+ OS << ' ' << ByteAlignment;
+ OS << ", " << truncateToSize(Value, ValueSize);
+ if (MaxBytesToEmit)
+ OS << ", " << MaxBytesToEmit;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {}
+
+void PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {}
+
+
+void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
+ assert(MAI.hasSingleParameterDotFile());
+ OS << "\t.file\t";
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+}
+
+// FIXME: should we inherit from MCAsmStreamer?
+bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
+ StringRef Filename){
+ OS << "\t.file\t" << FileNo << ' ';
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+ return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+}
+
+void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {}
+
+void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+ // Show the encoding in a comment if we have a code emitter.
+ if (Emitter)
+ AddEncodingComment(Inst);
+
+ // Show the MCInst if enabled.
+ if (ShowInst) {
+ Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
+ GetCommentOS() << "\n";
+ }
+
+ // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
+ if (InstPrinter)
+ InstPrinter->printInst(&Inst, OS);
+ else
+ Inst.print(OS, &MAI);
+ EmitEOL();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file. This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void PTXMCAsmStreamer::EmitRawText(StringRef String) {
+ if (!String.empty() && String.back() == '\n')
+ String = String.substr(0, String.size()-1);
+ OS << String;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::Finish() {}
+
+namespace llvm {
+ MCStreamer *createPTXAsmStreamer(MCContext &Context,
+ formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc,
+ MCInstPrinter *IP,
+ MCCodeEmitter *CE, TargetAsmBackend *TAB,
+ bool ShowInst) {
+ return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
+ IP, CE, ShowInst);
+ }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
new file mode 100644
index 0000000..b37c740
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -0,0 +1,96 @@
+//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an information extractor for PTX machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-mf-info-extract"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "PTXMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+// NOTE: PTXMFInfoExtract must after register allocation!
+
+namespace llvm {
+ /// PTXMFInfoExtract - PTX specific code to extract of PTX machine
+ /// function information for PTXAsmPrinter
+ ///
+ class PTXMFInfoExtract : public MachineFunctionPass {
+ private:
+ static char ID;
+
+ public:
+ PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PTX Machine Function Info Extractor";
+ }
+ }; // class PTXMFInfoExtract
+} // namespace llvm
+
+using namespace llvm;
+
+char PTXMFInfoExtract::ID = 0;
+
+bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n");
+
+ unsigned retreg = MFI->retReg();
+
+ DEBUG(dbgs()
+ << "PTX::NoRegister == " << PTX::NoRegister << "\n"
+ << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n");
+
+ DEBUG(for (unsigned reg = PTX::NoRegister + 1;
+ reg < PTX::NUM_TARGET_REGS; ++reg)
+ if (MRI.isPhysRegUsed(reg))
+ dbgs() << "Used Reg: " << reg << "\n";);
+
+ // FIXME: This is a slow linear scanning
+ for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
+ if (MRI.isPhysRegUsed(reg) &&
+ reg != retreg &&
+ (MFI->isKernel() || !MFI->isArgReg(reg)))
+ MFI->addLocalVarReg(reg);
+
+ // Notify MachineFunctionInfo that I've done adding local var reg
+ MFI->doneAddLocalVar();
+
+ DEBUG(dbgs() << "Return Reg: " << retreg << "\n");
+
+ DEBUG(for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argRegBegin(), e = MFI->argRegEnd();
+ i != e; ++i)
+ dbgs() << "Arg Reg: " << *i << "\n";);
+
+ DEBUG(for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
+ i != e; ++i)
+ dbgs() << "Local Var Reg: " << *i << "\n";);
+
+ return false;
+}
+
+FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXMFInfoExtract(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
new file mode 100644
index 0000000..56d044b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -0,0 +1,79 @@
+//===- PTXMachineFuctionInfo.h - PTX machine function info -------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares PTX-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MACHINE_FUNCTION_INFO_H
+#define PTX_MACHINE_FUNCTION_INFO_H
+
+#include "PTX.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private PTX target-specific information for each MachineFunction.
+///
+class PTXMachineFunctionInfo : public MachineFunctionInfo {
+private:
+ bool is_kernel;
+ std::vector<unsigned> reg_arg, reg_local_var;
+ unsigned reg_ret;
+ bool _isDoneAddArg;
+
+public:
+ PTXMachineFunctionInfo(MachineFunction &MF)
+ : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) {
+ reg_arg.reserve(8);
+ reg_local_var.reserve(32);
+ }
+
+ void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; }
+
+ void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
+ void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
+ void setRetReg(unsigned reg) { reg_ret = reg; }
+
+ void doneAddArg(void) {
+ std::sort(reg_arg.begin(), reg_arg.end());
+ _isDoneAddArg = true;
+ }
+ void doneAddLocalVar(void) {
+ std::sort(reg_local_var.begin(), reg_local_var.end());
+ }
+
+ bool isDoneAddArg(void) { return _isDoneAddArg; }
+
+ bool isKernel() const { return is_kernel; }
+
+ typedef std::vector<unsigned>::const_iterator reg_iterator;
+
+ bool argRegEmpty() const { return reg_arg.empty(); }
+ int getNumArg() const { return reg_arg.size(); }
+ reg_iterator argRegBegin() const { return reg_arg.begin(); }
+ reg_iterator argRegEnd() const { return reg_arg.end(); }
+
+ bool localVarRegEmpty() const { return reg_local_var.empty(); }
+ reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
+ reg_iterator localVarRegEnd() const { return reg_local_var.end(); }
+
+ unsigned retReg() const { return reg_ret; }
+
+ bool isArgReg(unsigned reg) const {
+ return std::binary_search(reg_arg.begin(), reg_arg.end(), reg);
+ }
+
+ bool isLocalVarReg(unsigned reg) const {
+ return std::binary_search(reg_local_var.begin(), reg_local_var.end(), reg);
+ }
+}; // class PTXMachineFunctionInfo
+} // namespace llvm
+
+#endif // PTX_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/TargetFrameInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
index 873d60a..0f3e7bc 100644
--- a/contrib/llvm/lib/Target/TargetFrameInfo.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- TargetFrameInfo.cpp - Implement machine frame interface -*- C++ -*-===//
+//===- PTXRegisterInfo.cpp - PTX Register Information ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,13 +7,13 @@
//
//===----------------------------------------------------------------------===//
//
-// Implements the layout of a stack frame on the target machine.
+// This file contains the PTX implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetFrameInfo.h"
-#include <cstdlib>
+#include "PTX.h"
+#include "PTXRegisterInfo.h"
+
using namespace llvm;
-TargetFrameInfo::~TargetFrameInfo() {
-}
+#include "PTXGenRegisterInfo.inc"
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
new file mode 100644
index 0000000..67e130f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
@@ -0,0 +1,63 @@
+//===- PTXRegisterInfo.h - PTX Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_REGISTER_INFO_H
+#define PTX_REGISTER_INFO_H
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/BitVector.h"
+
+#include "PTXGenRegisterInfo.h.inc"
+
+namespace llvm {
+class PTXTargetMachine;
+class MachineFunction;
+
+struct PTXRegisterInfo : public PTXGenRegisterInfo {
+ PTXRegisterInfo(PTXTargetMachine &TM,
+ const TargetInstrInfo &TII) {}
+
+ virtual const unsigned
+ *getCalleeSavedRegs(const MachineFunction *MF = 0) const {
+ static const unsigned CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs; // save nothing
+ }
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ return Reserved; // reserve no regs
+ }
+
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj,
+ RegScavenger *RS = NULL) const {
+ llvm_unreachable("PTX does not support general function call");
+ }
+
+ virtual unsigned getFrameRegister(const MachineFunction &MF) const {
+ llvm_unreachable("PTX does not have a frame register");
+ return 0;
+ }
+
+ virtual unsigned getRARegister() const {
+ llvm_unreachable("PTX does not have a return address register");
+ return 0;
+ }
+
+ virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+ }
+}; // struct PTXRegisterInfo
+} // namespace llvm
+
+#endif // PTX_REGISTER_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
new file mode 100644
index 0000000..22e2b34
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
@@ -0,0 +1,102 @@
+//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class PTXReg<string n> : Register<n> {
+ let Namespace = "PTX";
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+def P0 : PTXReg<"p0">;
+def P1 : PTXReg<"p1">;
+def P2 : PTXReg<"p2">;
+def P3 : PTXReg<"p3">;
+def P4 : PTXReg<"p4">;
+def P5 : PTXReg<"p5">;
+def P6 : PTXReg<"p6">;
+def P7 : PTXReg<"p7">;
+def P8 : PTXReg<"p8">;
+def P9 : PTXReg<"p9">;
+def P10 : PTXReg<"p10">;
+def P11 : PTXReg<"p11">;
+def P12 : PTXReg<"p12">;
+def P13 : PTXReg<"p13">;
+def P14 : PTXReg<"p14">;
+def P15 : PTXReg<"p15">;
+def P16 : PTXReg<"p16">;
+def P17 : PTXReg<"p17">;
+def P18 : PTXReg<"p18">;
+def P19 : PTXReg<"p19">;
+def P20 : PTXReg<"p20">;
+def P21 : PTXReg<"p21">;
+def P22 : PTXReg<"p22">;
+def P23 : PTXReg<"p23">;
+def P24 : PTXReg<"p24">;
+def P25 : PTXReg<"p25">;
+def P26 : PTXReg<"p26">;
+def P27 : PTXReg<"p27">;
+def P28 : PTXReg<"p28">;
+def P29 : PTXReg<"p29">;
+def P30 : PTXReg<"p30">;
+def P31 : PTXReg<"p31">;
+
+def R0 : PTXReg<"r0">;
+def R1 : PTXReg<"r1">;
+def R2 : PTXReg<"r2">;
+def R3 : PTXReg<"r3">;
+def R4 : PTXReg<"r4">;
+def R5 : PTXReg<"r5">;
+def R6 : PTXReg<"r6">;
+def R7 : PTXReg<"r7">;
+def R8 : PTXReg<"r8">;
+def R9 : PTXReg<"r9">;
+def R10 : PTXReg<"r10">;
+def R11 : PTXReg<"r11">;
+def R12 : PTXReg<"r12">;
+def R13 : PTXReg<"r13">;
+def R14 : PTXReg<"r14">;
+def R15 : PTXReg<"r15">;
+def R16 : PTXReg<"r16">;
+def R17 : PTXReg<"r17">;
+def R18 : PTXReg<"r18">;
+def R19 : PTXReg<"r19">;
+def R20 : PTXReg<"r20">;
+def R21 : PTXReg<"r21">;
+def R22 : PTXReg<"r22">;
+def R23 : PTXReg<"r23">;
+def R24 : PTXReg<"r24">;
+def R25 : PTXReg<"r25">;
+def R26 : PTXReg<"r26">;
+def R27 : PTXReg<"r27">;
+def R28 : PTXReg<"r28">;
+def R29 : PTXReg<"r29">;
+def R30 : PTXReg<"r30">;
+def R31 : PTXReg<"r31">;
+
+//===----------------------------------------------------------------------===//
+// Register classes
+//===----------------------------------------------------------------------===//
+
+def Preds : RegisterClass<"PTX", [i1], 8,
+ [P0, P1, P2, P3, P4, P5, P6, P7,
+ P8, P9, P10, P11, P12, P13, P14, P15,
+ P16, P17, P18, P19, P20, P21, P22, P23,
+ P24, P25, P26, P27, P28, P29, P30, P31]>;
+
+def RRegs32 : RegisterClass<"PTX", [i32], 32,
+ [R0, R1, R2, R3, R4, R5, R6, R7,
+ R8, R9, R10, R11, R12, R13, R14, R15,
+ R16, R17, R18, R19, R20, R21, R22, R23,
+ R24, R25, R26, R27, R28, R29, R30, R31]>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
new file mode 100644
index 0000000..00e2c88
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
@@ -0,0 +1,23 @@
+//===- PTXSubtarget.cpp - PTX Subtarget Information ---------------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTX specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXSubtarget.h"
+
+using namespace llvm;
+
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) {
+ std::string TARGET = "sm_20";
+ // TODO: call ParseSubtargetFeatures(FS, TARGET);
+}
+
+#include "PTXGenSubtarget.inc"
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
new file mode 100644
index 0000000..7fd85f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
@@ -0,0 +1,32 @@
+//====-- PTXSubtarget.h - Define Subtarget for the PTX ---------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_SUBTARGET_H
+#define PTX_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+namespace llvm {
+ class PTXSubtarget : public TargetSubtarget {
+ private:
+ bool is_sm20;
+
+ public:
+ PTXSubtarget(const std::string &TT, const std::string &FS);
+
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+ }; // class PTXSubtarget
+} // namespace llvm
+
+#endif // PTX_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
new file mode 100644
index 0000000..b263813
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
@@ -0,0 +1,60 @@
+//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXMCAsmInfo.h"
+#include "PTXTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace llvm {
+ MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc,
+ MCInstPrinter *InstPrint,
+ MCCodeEmitter *CE,
+ TargetAsmBackend *TAB,
+ bool ShowInst);
+}
+
+extern "C" void LLVMInitializePTXTarget() {
+ RegisterTargetMachine<PTXTargetMachine> X(ThePTXTarget);
+ RegisterAsmInfo<PTXMCAsmInfo> Y(ThePTXTarget);
+ TargetRegistry::RegisterAsmStreamer(ThePTXTarget, createPTXAsmStreamer);
+}
+
+// DataLayout and FrameLowering are filled with dummy data
+PTXTargetMachine::PTXTargetMachine(const Target &T,
+ const std::string &TT,
+ const std::string &FS)
+ : LLVMTargetMachine(T, TT),
+ DataLayout("e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"),
+ FrameLowering(Subtarget),
+ InstrInfo(*this),
+ TLInfo(*this),
+ Subtarget(TT, FS) {
+}
+
+bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createPTXISelDag(*this, OptLevel));
+ return false;
+}
+
+bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // PTXMFInfoExtract must after register allocation!
+ PM.add(createPTXMFInfoExtract(*this, OptLevel));
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
new file mode 100644
index 0000000..728e36f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
@@ -0,0 +1,60 @@
+//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_TARGET_MACHINE_H
+#define PTX_TARGET_MACHINE_H
+
+#include "PTXISelLowering.h"
+#include "PTXInstrInfo.h"
+#include "PTXFrameLowering.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class PTXTargetMachine : public LLVMTargetMachine {
+ private:
+ const TargetData DataLayout;
+ PTXFrameLowering FrameLowering;
+ PTXInstrInfo InstrInfo;
+ PTXTargetLowering TLInfo;
+ PTXSubtarget Subtarget;
+
+ public:
+ PTXTargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS);
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo(); }
+
+ virtual const PTXTargetLowering *getTargetLowering() const {
+ return &TLInfo; }
+
+ virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+}; // class PTXTargetMachine
+} // namespace llvm
+
+#endif // PTX_TARGET_MACHINE_H
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..4b09cf5
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPTXInfo
+ PTXTargetInfo.cpp
+ )
+
+add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile
new file mode 100644
index 0000000..8619785
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPTXInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
new file mode 100644
index 0000000..a577d77
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::ThePTXTarget;
+
+extern "C" void LLVMInitializePTXTargetInfo() {
+ // see llvm/ADT/Triple.h
+ RegisterTarget<Triple::ptx> X(ThePTXTarget, "ptx", "PTX");
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000..389ea77
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPowerPCAsmPrinter
+ PPCInstPrinter.cpp
+ )
+add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile b/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile
new file mode 100644
index 0000000..f097e84
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCAsmPrinter
+
+# Hack: we need to include 'main' powerpc target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
new file mode 100644
index 0000000..c8db0c4
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -0,0 +1,292 @@
+//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "PPCInstPrinter.h"
+#include "PPCPredicates.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "PPCGenAsmWriter.inc"
+
+StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+
+void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+ // Check for slwi/srwi mnemonics.
+ if (MI->getOpcode() == PPC::RLWINM) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char MB = MI->getOperand(3).getImm();
+ unsigned char ME = MI->getOperand(4).getImm();
+ bool useSubstituteMnemonic = false;
+ if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+ O << "\tslwi "; useSubstituteMnemonic = true;
+ }
+ if (SH <= 31 && MB == (32-SH) && ME == 31) {
+ O << "\tsrwi "; useSubstituteMnemonic = true;
+ SH = 32-SH;
+ }
+ if (useSubstituteMnemonic) {
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << ", " << (unsigned int)SH;
+ return;
+ }
+ }
+
+ if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ O << "\tmr ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ return;
+ }
+
+ if (MI->getOpcode() == PPC::RLDICR) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char ME = MI->getOperand(3).getImm();
+ // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+ if (63-SH == ME) {
+ O << "\tsldi ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << ", " << (unsigned int)SH;
+ return;
+ }
+ }
+
+ printInstruction(MI, O);
+}
+
+
+void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O,
+ const char *Modifier) {
+ assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
+ unsigned Code = MI->getOperand(OpNo).getImm();
+ if (StringRef(Modifier) == "cc") {
+ switch ((PPC::Predicate)Code) {
+ default: assert(0 && "Invalid predicate");
+ case PPC::PRED_ALWAYS: return; // Don't print anything for always.
+ case PPC::PRED_LT: O << "lt"; return;
+ case PPC::PRED_LE: O << "le"; return;
+ case PPC::PRED_EQ: O << "eq"; return;
+ case PPC::PRED_GE: O << "ge"; return;
+ case PPC::PRED_GT: O << "gt"; return;
+ case PPC::PRED_NE: O << "ne"; return;
+ case PPC::PRED_UN: O << "un"; return;
+ case PPC::PRED_NU: O << "nu"; return;
+ }
+ }
+
+ assert(StringRef(Modifier) == "reg" &&
+ "Need to specify 'cc' or 'reg' as predicate op modifier!");
+ // Don't print the register for 'always'.
+ if (Code == PPC::PRED_ALWAYS) return;
+ printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ char Value = MI->getOperand(OpNo).getImm();
+ Value = (Value << (32-5)) >> (32-5);
+ O << (int)Value;
+}
+
+void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned char Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 31 && "Invalid u5imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned char Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 63 && "Invalid u6imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << (short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printS16X4ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ O << (short)(MI->getOperand(OpNo).getImm()*4);
+ else
+ printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (!MI->getOperand(OpNo).isImm())
+ return printOperand(MI, OpNo, O);
+
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ O << "$+";
+ printAbsAddrOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << (int)MI->getOperand(OpNo).getImm()*4;
+}
+
+
+void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned CCReg = MI->getOperand(OpNo).getReg();
+ unsigned RegNo;
+ switch (CCReg) {
+ default: assert(0 && "Unknown CR register");
+ case PPC::CR0: RegNo = 0; break;
+ case PPC::CR1: RegNo = 1; break;
+ case PPC::CR2: RegNo = 2; break;
+ case PPC::CR3: RegNo = 3; break;
+ case PPC::CR4: RegNo = 4; break;
+ case PPC::CR5: RegNo = 5; break;
+ case PPC::CR6: RegNo = 6; break;
+ case PPC::CR7: RegNo = 7; break;
+ }
+ O << (0x80 >> RegNo);
+}
+
+void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printSymbolLo(MI, OpNo, O);
+ O << '(';
+ if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+}
+
+void PPCInstPrinter::printMemRegImmShifted(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ printS16X4ImmOperand(MI, OpNo, O);
+ else
+ printSymbolLo(MI, OpNo, O);
+ O << '(';
+
+ if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+}
+
+
+void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ if (MI->getOperand(OpNo).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo, O);
+ O << ", ";
+ printOperand(MI, OpNo+1, O);
+}
+
+
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v': return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+}
+
+void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ const char *RegName = getRegisterName(Op.getReg());
+ // The linux and AIX assembler does not take register prefixes.
+ if (!isDarwinSyntax())
+ RegName = stripRegisterPrefix(RegName);
+
+ O << RegName;
+ return;
+ }
+
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+}
+
+void PPCInstPrinter::printSymbolLo(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ return printS16ImmOperand(MI, OpNo, O);
+
+ // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+ // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower.
+ if (MI->getOperand(OpNo).isExpr() &&
+ isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+ O << "lo16(";
+ printOperand(MI, OpNo, O);
+ O << ')';
+ } else {
+ printOperand(MI, OpNo, O);
+ }
+}
+
+void PPCInstPrinter::printSymbolHi(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ return printS16ImmOperand(MI, OpNo, O);
+
+ // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+ // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower.
+ if (MI->getOperand(OpNo).isExpr() &&
+ isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+ O << "ha16(";
+ printOperand(MI, OpNo, O);
+ O << ')';
+ } else {
+ printOperand(MI, OpNo, O);
+ }
+}
+
+
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
new file mode 100644
index 0000000..ebc10da
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -0,0 +1,69 @@
+//===-- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCINSTPRINTER_H
+#define PPCINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+ class MCOperand;
+
+class PPCInstPrinter : public MCInstPrinter {
+ // 0 -> AIX, 1 -> Darwin.
+ unsigned SyntaxVariant;
+public:
+ PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant)
+ : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {}
+
+ bool isDarwinSyntax() const {
+ return SyntaxVariant == 1;
+ }
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+ static const char *getInstructionName(unsigned Opcode);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier);
+
+
+ void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegImmShifted(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ // FIXME: Remove
+ void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index 67e3a4a..7242f3a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -15,24 +15,70 @@
#ifndef LLVM_TARGET_POWERPC_H
#define LLVM_TARGET_POWERPC_H
+#include <string>
+
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
-#include "llvm/Target/TargetMachine.h"
-
namespace llvm {
class PPCTargetMachine;
class FunctionPass;
class formatted_raw_ostream;
+ class JITCodeEmitter;
+ class Target;
+ class MachineInstr;
+ class AsmPrinter;
+ class MCInst;
+ class MCCodeEmitter;
+ class MCContext;
+ class TargetMachine;
+ class TargetAsmBackend;
+
+ FunctionPass *createPPCBranchSelectionPass();
+ FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+ FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &MCE);
+ MCCodeEmitter *createPPCMCCodeEmitter(const Target &, TargetMachine &TM,
+ MCContext &Ctx);
+ TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &);
-FunctionPass *createPPCBranchSelectionPass();
-FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
-FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
- JITCodeEmitter &MCE);
+ void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP);
+
+ extern Target ThePPC32Target;
+ extern Target ThePPC64Target;
+
+ namespace PPCII {
+
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // PPC Specific MachineOperand flags.
+ MO_NO_FLAG,
+
+ /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" symbol. This is used for calls
+ /// and jumps to external functions on Tiger and earlier.
+ MO_DARWIN_STUB = 1,
+
+ /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
+ MO_LO16 = 4, MO_HA16 = 8,
-extern Target ThePPC32Target;
-extern Target ThePPC64Target;
+ /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
+ /// the function's picbase, e.g. lo16(symbol-picbase).
+ MO_PIC_FLAG = 16,
+ /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
+ /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
+ MO_NLP_FLAG = 32,
+
+ /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
+ /// symbol with hidden visibility. This causes a different kind of
+ /// non-lazy-pointer to be generated.
+ MO_NLP_HIDDEN_FLAG = 64
+ };
+ } // end namespace PPCII
+
} // end namespace llvm;
// Defines symbolic names for PowerPC registers. This defines a mapping from
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index 27644b2..aabf494 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -99,8 +99,14 @@ def PPCInstrInfo : InstrInfo {
let isLittleEndianEncoding = 1;
}
+def PPCAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
def PPC : Target {
// Information about the instructions.
let InstructionSet = PPCInstrInfo;
+
+ let AssemblyWriters = [PPCAsmWriter];
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp
new file mode 100644
index 0000000..c4d4ac9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmBackend.cpp
@@ -0,0 +1,119 @@
+//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "PPC.h"
+#include "PPCFixupKinds.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+namespace {
+class PPCMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+ PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
+};
+
+class PPCAsmBackend : public TargetAsmBackend {
+const Target &TheTarget;
+public:
+ PPCAsmBackend(const Target &T) : TargetAsmBackend(), TheTarget(T) {}
+
+ unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = {
+ // name offset bits flags
+ { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_lo16", 16, 16, 0 },
+ { "fixup_ppc_ha16", 16, 16, 0 },
+ { "fixup_ppc_lo14", 16, 14, 0 }
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return TargetAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ bool MayNeedRelaxation(const MCInst &Inst) const {
+ // FIXME.
+ return false;
+ }
+
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ // FIXME.
+ assert(0 && "RelaxInstruction() unimplemented");
+ }
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ // FIXME: Zero fill for now. That's not right, but at least will get the
+ // section size right.
+ for (uint64_t i = 0; i != Count; ++i)
+ OW->Write8(0);
+ return true;
+ }
+
+ unsigned getPointerSize() const {
+ StringRef Name = TheTarget.getName();
+ if (Name == "ppc64") return 8;
+ assert(Name == "ppc32" && "Unknown target name!");
+ return 4;
+ }
+};
+} // end anonymous namespace
+
+
+// FIXME: This should be in a separate file.
+namespace {
+ class DarwinPPCAsmBackend : public PPCAsmBackend {
+ public:
+ DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const {
+ assert(0 && "UNIMP");
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ bool is64 = getPointerSize() == 8;
+ return createMachObjectWriter(new PPCMachObjectWriter(
+ /*Is64Bit=*/is64,
+ (is64 ? object::mach::CTM_PowerPC64 :
+ object::mach::CTM_PowerPC),
+ object::mach::CSPPC_ALL),
+ OS, /*IsLittleEndian=*/false);
+ }
+
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ return false;
+ }
+ };
+} // end anonymous namespace
+
+
+
+
+TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T,
+ const std::string &TT) {
+ switch (Triple(TT).getOS()) {
+ case Triple::Darwin:
+ return new DarwinPPCAsmBackend(T);
+ default:
+ return 0;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index c1a5663..8ed5d7f 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -35,6 +35,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -43,6 +44,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ErrorHandling.h"
@@ -50,6 +52,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/SmallString.h"
+#include "InstPrinter/PPCInstPrinter.h"
using namespace llvm;
namespace {
@@ -57,88 +60,20 @@ namespace {
protected:
DenseMap<MCSymbol*, MCSymbol*> TOC;
const PPCSubtarget &Subtarget;
- uint64_t LabelID;
+ uint64_t TOCLabelID;
public:
explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer),
- Subtarget(TM.getSubtarget<PPCSubtarget>()), LabelID(0) {}
+ Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
virtual const char *getPassName() const {
return "PowerPC Assembly Printer";
}
- PPCTargetMachine &getTM() {
- return static_cast<PPCTargetMachine&>(TM);
- }
-
- unsigned enumRegToMachineReg(unsigned enumReg) {
- switch (enumReg) {
- default: llvm_unreachable("Unhandled register!");
- case PPC::CR0: return 0;
- case PPC::CR1: return 1;
- case PPC::CR2: return 2;
- case PPC::CR3: return 3;
- case PPC::CR4: return 4;
- case PPC::CR5: return 5;
- case PPC::CR6: return 6;
- case PPC::CR7: return 7;
- }
- llvm_unreachable(0);
- }
-
- /// printInstruction - This method is automatically generated by tablegen
- /// from the instruction set description. This method returns true if the
- /// machine instruction was sufficiently described to print it, otherwise it
- /// returns false.
- void printInstruction(const MachineInstr *MI, raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
-
virtual void EmitInstruction(const MachineInstr *MI);
- void printOp(const MachineOperand &MO, raw_ostream &O);
-
- /// stripRegisterPrefix - This method strips the character prefix from a
- /// register name so that only the number is left. Used by for linux asm.
- const char *stripRegisterPrefix(const char *RegName) {
- switch (RegName[0]) {
- case 'r':
- case 'f':
- case 'v': return RegName + 1;
- case 'c': if (RegName[1] == 'r') return RegName + 2;
- }
- return RegName;
- }
-
- /// printRegister - Print register according to target requirements.
- ///
- void printRegister(const MachineOperand &MO, bool R0AsZero, raw_ostream &O){
- unsigned RegNo = MO.getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??");
-
- // If we should use 0 for R0.
- if (R0AsZero && RegNo == PPC::R0) {
- O << "0";
- return;
- }
-
- const char *RegName = getRegisterName(RegNo);
- // Linux assembler (Others?) does not take register mnemonics.
- // FIXME - What about special registers used in mfspr/mtspr?
- if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
- O << RegName;
- }
-
- void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(OpNo);
- if (MO.isReg()) {
- printRegister(MO, false, O);
- } else if (MO.isImm()) {
- O << MO.getImm();
- } else {
- printOp(MO, O);
- }
- }
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
@@ -147,192 +82,9 @@ namespace {
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O);
-
- void printS5ImmOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- char value = MI->getOperand(OpNo).getImm();
- value = (value << (32-5)) >> (32-5);
- O << (int)value;
- }
- void printU5ImmOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned char value = MI->getOperand(OpNo).getImm();
- assert(value <= 31 && "Invalid u5imm argument!");
- O << (unsigned int)value;
- }
- void printU6ImmOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned char value = MI->getOperand(OpNo).getImm();
- assert(value <= 63 && "Invalid u6imm argument!");
- O << (unsigned int)value;
- }
- void printS16ImmOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- O << (short)MI->getOperand(OpNo).getImm();
- }
- void printU16ImmOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- O << (unsigned short)MI->getOperand(OpNo).getImm();
- }
- void printS16X4ImmOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm()) {
- O << (short)(MI->getOperand(OpNo).getImm()*4);
- } else {
- O << "lo16(";
- printOp(MI->getOperand(OpNo), O);
- if (TM.getRelocationModel() == Reloc::PIC_)
- O << "-\"L" << getFunctionNumber() << "$pb\")";
- else
- O << ')';
- }
- }
- void printBranchOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- // Branches can take an immediate operand. This is used by the branch
- // selection pass to print $+8, an eight byte displacement from the PC.
- if (MI->getOperand(OpNo).isImm()) {
- O << "$+" << MI->getOperand(OpNo).getImm()*4;
- } else {
- printOp(MI->getOperand(OpNo), O);
- }
- }
- void printCallOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(OpNo);
- if (TM.getRelocationModel() != Reloc::Static) {
- if (MO.isGlobal()) {
- const GlobalValue *GV = MO.getGlobal();
- if (GV->isDeclaration() || GV->isWeakForLinker()) {
- // Dynamically-resolved functions need a stub for the function.
- MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
- if (StubSym.getPointer() == 0)
- StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
- O << *Sym;
- return;
- }
- }
- if (MO.isSymbol()) {
- SmallString<128> TempNameStr;
- TempNameStr += StringRef(MO.getSymbolName());
- TempNameStr += StringRef("$stub");
-
- MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
- if (StubSym.getPointer() == 0)
- StubSym = MachineModuleInfoImpl::
- StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
- O << *Sym;
- return;
- }
- }
-
- printOp(MI->getOperand(OpNo), O);
- }
- void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- O << (int)MI->getOperand(OpNo).getImm()*4;
- }
- void printPICLabel(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- O << "\"L" << getFunctionNumber() << "$pb\"\n";
- O << "\"L" << getFunctionNumber() << "$pb\":";
- }
- void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm()) {
- printS16ImmOperand(MI, OpNo, O);
- } else {
- if (Subtarget.isDarwin()) O << "ha16(";
- printOp(MI->getOperand(OpNo), O);
- if (TM.getRelocationModel() == Reloc::PIC_)
- O << "-\"L" << getFunctionNumber() << "$pb\"";
- if (Subtarget.isDarwin())
- O << ')';
- else
- O << "@ha";
- }
- }
- void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm()) {
- printS16ImmOperand(MI, OpNo, O);
- } else {
- if (Subtarget.isDarwin()) O << "lo16(";
- printOp(MI->getOperand(OpNo), O);
- if (TM.getRelocationModel() == Reloc::PIC_)
- O << "-\"L" << getFunctionNumber() << "$pb\"";
- if (Subtarget.isDarwin())
- O << ')';
- else
- O << "@l";
- }
- }
- void printcrbitm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- unsigned CCReg = MI->getOperand(OpNo).getReg();
- unsigned RegNo = enumRegToMachineReg(CCReg);
- O << (0x80 >> RegNo);
- }
- // The new addressing mode printers.
- void printMemRegImm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- printSymbolLo(MI, OpNo, O);
- O << '(';
- if (MI->getOperand(OpNo+1).isReg() &&
- MI->getOperand(OpNo+1).getReg() == PPC::R0)
- O << "0";
- else
- printOperand(MI, OpNo+1, O);
- O << ')';
- }
- void printMemRegImmShifted(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm())
- printS16X4ImmOperand(MI, OpNo, O);
- else
- printSymbolLo(MI, OpNo, O);
- O << '(';
- if (MI->getOperand(OpNo+1).isReg() &&
- MI->getOperand(OpNo+1).getReg() == PPC::R0)
- O << "0";
- else
- printOperand(MI, OpNo+1, O);
- O << ')';
- }
-
- void printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
- // When used as the base register, r0 reads constant zero rather than
- // the value contained in the register. For this reason, the darwin
- // assembler requires that we print r0 as 0 (no r) when used as the base.
- const MachineOperand &MO = MI->getOperand(OpNo);
- printRegister(MO, true, O);
- O << ", ";
- printOperand(MI, OpNo+1, O);
- }
-
- void printTOCEntryLabel(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(OpNo);
- assert(MO.isGlobal());
- MCSymbol *Sym = Mang->getSymbol(MO.getGlobal());
-
- // Map symbol -> label of TOC entry.
- MCSymbol *&TOCEntry = TOC[Sym];
- if (TOCEntry == 0)
- TOCEntry = OutContext.
- GetOrCreateSymbol(StringRef(MAI->getPrivateGlobalPrefix()) +
- "C" + Twine(LabelID++));
-
- O << *TOCEntry << "@toc";
- }
-
- void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier);
-
MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
-
MachineLocation Location;
- assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
// Frame address. Currently handles register +- offset only.
if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
@@ -376,13 +128,35 @@ namespace {
};
} // end of anonymous namespace
-// Include the auto-generated portion of the assembly writer
-#include "PPCGenAsmWriter.inc"
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v': return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+}
-void PPCAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
+ // Linux assembler (Others?) does not take register mnemonics.
+ // FIXME - What about special registers used in mfspr/mtspr?
+ if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+ O << RegName;
+ return;
+ }
case MachineOperand::MO_Immediate:
- llvm_unreachable("printOp() does not handle immediate values");
+ O << MO.getImm();
+ return;
case MachineOperand::MO_MachineBasicBlock:
O << *MO.getMBB()->getSymbol();
@@ -475,9 +249,7 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
switch (ExtraCode[0]) {
default: return true; // Unknown modifier.
case 'c': // Don't print "$" before a global var name or constant.
- // PPC never has a prefix.
- printOperand(MI, OpNo, O);
- return false;
+ break; // PPC never has a prefix.
case 'L': // Write second word of DImode reference.
// Verify that this operand has two consecutive registers.
if (!MI->getOperand(OpNo).isReg() ||
@@ -509,48 +281,28 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0])
return true; // Unknown modifier.
- assert (MI->getOperand(OpNo).isReg());
+ assert(MI->getOperand(OpNo).isReg());
O << "0(";
printOperand(MI, OpNo, O);
O << ")";
return false;
}
-void PPCAsmPrinter::printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier){
- assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
- unsigned Code = MI->getOperand(OpNo).getImm();
- if (!strcmp(Modifier, "cc")) {
- switch ((PPC::Predicate)Code) {
- case PPC::PRED_ALWAYS: return; // Don't print anything for always.
- case PPC::PRED_LT: O << "lt"; return;
- case PPC::PRED_LE: O << "le"; return;
- case PPC::PRED_EQ: O << "eq"; return;
- case PPC::PRED_GE: O << "ge"; return;
- case PPC::PRED_GT: O << "gt"; return;
- case PPC::PRED_NE: O << "ne"; return;
- case PPC::PRED_UN: O << "un"; return;
- case PPC::PRED_NU: O << "nu"; return;
- }
-
- } else {
- assert(!strcmp(Modifier, "reg") &&
- "Need to specify 'cc' or 'reg' as predicate op modifier!");
- // Don't print the register for 'always'.
- if (Code == PPC::PRED_ALWAYS) return;
- printOperand(MI, OpNo+1, O);
- }
-}
-
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- SmallString<128> Str;
- raw_svector_ostream O(Str);
-
- if (MI->getOpcode() == TargetOpcode::DBG_VALUE) {
+ MCInst TmpInst;
+
+ // Lower multi-instruction pseudo operations.
+ switch (MI->getOpcode()) {
+ default: break;
+ case TargetOpcode::DBG_VALUE: {
+ if (!isVerbose() || !OutStreamer.hasRawTextSupport()) return;
+
+ SmallString<32> Str;
+ raw_svector_ostream O(Str);
unsigned NOps = MI->getNumOperands();
assert(NOps==4);
O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
@@ -567,56 +319,65 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitRawText(O.str());
return;
}
- // Check for slwi/srwi mnemonics.
- if (MI->getOpcode() == PPC::RLWINM) {
- unsigned char SH = MI->getOperand(2).getImm();
- unsigned char MB = MI->getOperand(3).getImm();
- unsigned char ME = MI->getOperand(4).getImm();
- bool useSubstituteMnemonic = false;
- if (SH <= 31 && MB == 0 && ME == (31-SH)) {
- O << "\tslwi "; useSubstituteMnemonic = true;
- }
- if (SH <= 31 && MB == (32-SH) && ME == 31) {
- O << "\tsrwi "; useSubstituteMnemonic = true;
- SH = 32-SH;
- }
- if (useSubstituteMnemonic) {
- printOperand(MI, 0, O);
- O << ", ";
- printOperand(MI, 1, O);
- O << ", " << (unsigned int)SH;
- OutStreamer.EmitRawText(O.str());
- return;
- }
+
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8: {
+ // Transform %LR = MovePCtoLR
+ // Into this, where the label is the PIC base:
+ // bl L1$pb
+ // L1$pb:
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+
+ // Emit the 'bl'.
+ TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here.
+
+
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::
+ Create(PICBase, OutContext)));
+ OutStreamer.EmitInstruction(TmpInst);
+
+ // Emit the label.
+ OutStreamer.EmitLabel(PICBase);
+ return;
}
-
- if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
- O << "\tmr ";
- printOperand(MI, 0, O);
- O << ", ";
- printOperand(MI, 1, O);
- OutStreamer.EmitRawText(O.str());
+ case PPC::LDtoc: {
+ // Transform %X3 = LDtoc <ga:@min1>, %X2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
+
+ // Change the opcode to LD, and the global address operand to be a
+ // reference to the TOC entry we will synthesize later.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+ assert(MO.isGlobal());
+
+ // Map symbol -> label of TOC entry.
+ MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())];
+ if (TOCEntry == 0)
+ TOCEntry = GetTempSymbol("C", TOCLabelID++);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ OutStreamer.EmitInstruction(TmpInst);
return;
}
-
- if (MI->getOpcode() == PPC::RLDICR) {
- unsigned char SH = MI->getOperand(2).getImm();
- unsigned char ME = MI->getOperand(3).getImm();
- // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
- if (63-SH == ME) {
- O << "\tsldi ";
- printOperand(MI, 0, O);
- O << ", ";
- printOperand(MI, 1, O);
- O << ", " << (unsigned int)SH;
- OutStreamer.EmitRawText(O.str());
- return;
- }
+
+ case PPC::MFCRpseud:
+ // Transform: %R3 = MFCRpseud %CR7
+ // Into: %R3 = MFCR ;; cr7
+ OutStreamer.AddComment(PPCInstPrinter::
+ getRegisterName(MI->getOperand(1).getReg()));
+ TmpInst.setOpcode(PPC::MFCR);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
}
- printInstruction(MI, O);
- OutStreamer.EmitRawText(O.str());
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
+ OutStreamer.EmitInstruction(TmpInst);
}
void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
@@ -677,7 +438,10 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
Directive = PPC::DIR_64;
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
- OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
+
+ // FIXME: This is a total hack, finish mc'izing the PPC backend.
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
// Prime text sections so they are adjacent. This reduces the likelihood a
// large data or debug section causes a branch to exceed 16M limit.
@@ -915,8 +679,18 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
return new PPCLinuxAsmPrinter(tm, Streamer);
}
+static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI) {
+ return new PPCInstPrinter(MAI, SyntaxVariant);
+}
+
+
// Force static initialization.
extern "C" void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
+
+ TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
index df9ab52..42232a0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -50,13 +50,24 @@ namespace {
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
/// machine instructions.
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
- unsigned getBinaryCodeForInstr(const MachineInstr &MI);
-
+
+ MachineRelocation GetRelocation(const MachineOperand &MO,
+ unsigned RelocID) const;
+
/// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
-
unsigned getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO);
+ const MachineOperand &MO) const;
+
+ unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+ unsigned getHA16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
@@ -67,10 +78,6 @@ namespace {
/// emitBasicBlock - emits the given MachineBasicBlock to memory
///
void emitBasicBlock(MachineBasicBlock &MBB);
-
- /// getValueBit - return the particular bit of Val
- ///
- unsigned getValueBit(int64_t Val, unsigned bit) { return (Val >> bit) & 1; }
};
}
@@ -128,125 +135,127 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
}
}
-unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) {
+unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+ return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+}
- unsigned rv = 0; // Return value; defaults to 0 for unhandled cases
- // or things that get fixed up later by the JIT.
- if (MO.isReg()) {
- rv = PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
+ unsigned RelocID) const {
+ // If in PIC mode, we need to encode the negated address of the
+ // 'movepctolr' into the unrelocated field. After relocation, we'll have
+ // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm
+ // field, we get &gv. This doesn't happen for branch relocations, which are
+ // always implicitly pc relative.
+ intptr_t Cst = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+ Cst = -(intptr_t)MovePCtoLROffset - 4;
+ }
+
+ if (MO.isGlobal())
+ return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID,
+ const_cast<GlobalValue *>(MO.getGlobal()),
+ Cst, isa<Function>(MO.getGlobal()));
+ if (MO.isSymbol())
+ return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ RelocID, MO.getSymbolName(), Cst);
+ if (MO.isCPI())
+ return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ RelocID, MO.getIndex(), Cst);
- // Special encoding for MTCRF and MFOCRF, which uses a bit mask for the
- // register, not the register number directly.
- if ((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
- (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)) {
- rv = 0x80 >> rv;
- }
- } else if (MO.isImm()) {
- rv = MO.getImm();
- } else if (MO.isGlobal() || MO.isSymbol() ||
- MO.isCPI() || MO.isJTI()) {
- unsigned Reloc = 0;
- if (MI.getOpcode() == PPC::BL_Darwin || MI.getOpcode() == PPC::BL8_Darwin ||
- MI.getOpcode() == PPC::BL_SVR4 || MI.getOpcode() == PPC::BL8_ELF ||
- MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8)
- Reloc = PPC::reloc_pcrel_bx;
- else {
- if (TM.getRelocationModel() == Reloc::PIC_) {
- assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
- }
- switch (MI.getOpcode()) {
- default: MI.dump(); llvm_unreachable("Unknown instruction for relocation!");
- case PPC::LIS:
- case PPC::LIS8:
- case PPC::ADDIS:
- case PPC::ADDIS8:
- Reloc = PPC::reloc_absolute_high; // Pointer to symbol
- break;
- case PPC::LI:
- case PPC::LI8:
- case PPC::LA:
- // Loads.
- case PPC::LBZ:
- case PPC::LBZ8:
- case PPC::LHA:
- case PPC::LHA8:
- case PPC::LHZ:
- case PPC::LHZ8:
- case PPC::LWZ:
- case PPC::LWZ8:
- case PPC::LFS:
- case PPC::LFD:
-
- // Stores.
- case PPC::STB:
- case PPC::STB8:
- case PPC::STH:
- case PPC::STH8:
- case PPC::STW:
- case PPC::STW8:
- case PPC::STFS:
- case PPC::STFD:
- Reloc = PPC::reloc_absolute_low;
- break;
-
- case PPC::LWA:
- case PPC::LD:
- case PPC::STD:
- case PPC::STD_32:
- Reloc = PPC::reloc_absolute_low_ix;
- break;
- }
- }
+ if (MO.isMBB())
+ return MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ RelocID, MO.getMBB());
+
+ assert(MO.isJTI());
+ return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ RelocID, MO.getIndex(), Cst);
+}
- MachineRelocation R;
- if (MO.isGlobal()) {
- R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- const_cast<GlobalValue *>(MO.getGlobal()), 0,
- isa<Function>(MO.getGlobal()));
- } else if (MO.isSymbol()) {
- R = MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
- Reloc, MO.getSymbolName(), 0);
- } else if (MO.isCPI()) {
- R = MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
- Reloc, MO.getIndex(), 0);
- } else {
- assert(MO.isJTI());
- R = MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
- Reloc, MO.getIndex(), 0);
- }
+unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx));
+ return 0;
+}
- // If in PIC mode, we need to encode the negated address of the
- // 'movepctolr' into the unrelocated field. After relocation, we'll have
- // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm
- // field, we get &gv. This doesn't happen for branch relocations, which are
- // always implicitly pc relative.
- if (TM.getRelocationModel() == Reloc::PIC_ && Reloc != PPC::reloc_pcrel_bx){
- assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
- R.setConstantVal(-(intptr_t)MovePCtoLROffset - 4);
- }
- MCE.addRelocation(R);
-
- } else if (MO.isMBB()) {
- unsigned Reloc = 0;
- unsigned Opcode = MI.getOpcode();
- if (Opcode == PPC::B || Opcode == PPC::BL_Darwin ||
- Opcode == PPC::BLA_Darwin|| Opcode == PPC::BL_SVR4 ||
- Opcode == PPC::BLA_SVR4)
- Reloc = PPC::reloc_pcrel_bx;
- else // BCC instruction
- Reloc = PPC::reloc_pcrel_bcx;
-
- MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
- Reloc, MO.getMBB()));
- } else {
-#ifndef NDEBUG
- errs() << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
-#endif
- llvm_unreachable(0);
- }
+unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx));
+ return 0;
+}
- return rv;
+unsigned PPCCodeEmitter::getHA16Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_high));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getLO16Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // Encode (imm, reg) as a memri, which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16;
+
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+ return RegBits;
+}
+
+unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14;
+
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO) & 0x3FFF) | RegBits;
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
+ return RegBits;
+}
+
+
+unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+
+ if (MO.isReg()) {
+ // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // The GPR operand should come through here though.
+ assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+ MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+ return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+ }
+
+ assert(MO.isImm() &&
+ "Relocation required in an instruction that we cannot encode!");
+ return MO.getImm();
}
#include "PPCGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h
new file mode 100644
index 0000000..b3c889e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFixupKinds.h
@@ -0,0 +1,45 @@
+//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PPC_PPCFIXUPKINDS_H
+#define LLVM_PPC_PPCFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace PPC {
+enum Fixups {
+ // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b'
+ // and 'bl'.
+ fixup_ppc_br24 = FirstTargetFixupKind,
+
+ /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional
+ /// branches.
+ fixup_ppc_brcond14,
+
+ /// fixup_ppc_lo16 - A 16-bit fixup corresponding to lo16(_foo) for instrs
+ /// like 'li'.
+ fixup_ppc_lo16,
+
+ /// fixup_ppc_ha16 - A 16-bit fixup corresponding to ha16(_foo) for instrs
+ /// like 'lis'.
+ fixup_ppc_ha16,
+
+ /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
+ /// like 'std'.
+ fixup_ppc_lo14,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
new file mode 100644
index 0000000..6aca6b0
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -0,0 +1,971 @@
+//=====- PPCFrameLowering.cpp - PPC Frame Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PPC implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+// FIXME This disables some code that aligns the stack to a boundary bigger than
+// the default (16 bytes on Darwin) when there is a stack local of greater
+// alignment. This does not currently work, because the delta between old and
+// new stack pointers is added to offsets that reference incoming parameters
+// after the prolog is generated, and the code that does that doesn't handle a
+// variable delta. You don't want to do that anyway; a better approach is to
+// reserve another register that retains to the incoming stack pointer, and
+// reference parameters relative to that.
+#define ALIGN_STACK 0
+
+
+/// VRRegNo - Map from a numbered VR register to its enum value.
+///
+static const unsigned short VRRegNo[] = {
+ PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
+ PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+/// RemoveVRSaveCode - We have found that this function does not need any code
+/// to manipulate the VRSAVE register, even though it uses vector registers.
+/// This can happen when the only registers used are known to be live in or out
+/// of the function. Remove all of the VRSAVE related code from the function.
+static void RemoveVRSaveCode(MachineInstr *MI) {
+ MachineBasicBlock *Entry = MI->getParent();
+ MachineFunction *MF = Entry->getParent();
+
+ // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
+ MachineBasicBlock::iterator MBBI = MI;
+ ++MBBI;
+ assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
+ MBBI->eraseFromParent();
+
+ bool RemovedAllMTVRSAVEs = true;
+ // See if we can find and remove the MTVRSAVE instruction from all of the
+ // epilog blocks.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().getDesc().isReturn()) {
+ bool FoundIt = false;
+ for (MBBI = I->end(); MBBI != I->begin(); ) {
+ --MBBI;
+ if (MBBI->getOpcode() == PPC::MTVRSAVE) {
+ MBBI->eraseFromParent(); // remove it.
+ FoundIt = true;
+ break;
+ }
+ }
+ RemovedAllMTVRSAVEs &= FoundIt;
+ }
+ }
+
+ // If we found and removed all MTVRSAVE instructions, remove the read of
+ // VRSAVE as well.
+ if (RemovedAllMTVRSAVEs) {
+ MBBI = MI;
+ assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
+ --MBBI;
+ assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
+ MBBI->eraseFromParent();
+ }
+
+ // Finally, nuke the UPDATE_VRSAVE.
+ MI->eraseFromParent();
+}
+
+// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
+// instruction selector. Based on the vector registers that have been used,
+// transform this into the appropriate ORI instruction.
+static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineFunction *MF = MI->getParent()->getParent();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned UsedRegMask = 0;
+ for (unsigned i = 0; i != 32; ++i)
+ if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+ UsedRegMask |= 1 << (31-i);
+
+ // Live in and live out values already must be in the mask, so don't bother
+ // marking them.
+ for (MachineRegisterInfo::livein_iterator
+ I = MF->getRegInfo().livein_begin(),
+ E = MF->getRegInfo().livein_end(); I != E; ++I) {
+ unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first);
+ if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I);
+ if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+
+ // If no registers are used, turn this into a copy.
+ if (UsedRegMask == 0) {
+ // Remove all VRSAVE code.
+ RemoveVRSaveCode(MI);
+ return;
+ }
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask);
+ } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+ } else {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(DstReg, RegState::Kill)
+ .addImm(UsedRegMask & 0xFFFF);
+ }
+
+ // Remove the old UPDATE_VRSAVE instruction.
+ MI->eraseFromParent();
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned TargetAlign = getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1; //
+
+ // If we are a leaf function, and use up to 224 bytes of stack space,
+ // don't have a frame pointer, calls, or dynamic alloca then we do not need
+ // to adjust the stack pointer (we fit in the Red Zone).
+ bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
+ // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.
+ if (!DisableRedZone &&
+ FrameSize <= 224 && // Fits in red zone.
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
+ // No need for frame
+ MFI->setStackSize(0);
+ return;
+ }
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // Maximum call frame needs to be at least big enough for linkage and 8 args.
+ unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(),
+ Subtarget.isDarwinABI());
+ maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register.
+bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // FIXME: This is pretty much broken by design: hasFP() might be called really
+ // early, before the stack layout was calculated and thus hasFP() might return
+ // true or false here depending on the time of call.
+ return (MFI->getStackSize()) && needsFP(MF);
+}
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Naked functions have no stack frame pushed, so we don't have a frame
+ // pointer.
+ if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+ return false;
+
+ return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
+ (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+}
+
+
+void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ DebugLoc dl;
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ !MF.getFunction()->doesNotThrow() ||
+ UnwindTablesMandatory;
+
+ // Prepare for frame info.
+ MCSymbol *FrameLabel = 0;
+
+ // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
+ // process it.
+ for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+ if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+ HandleVRSaveUpdate(MBBI, TII);
+ break;
+ }
+ }
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ // FIXME: determineFrameLayout() may change the frame size. This should be
+ // moved upper, to some hook.
+ determineFrameLayout(MF);
+ unsigned FrameSize = MFI->getStackSize();
+
+ int NegFrameSize = -FrameSize;
+
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ // Check if the link register (LR) must be saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF);
+
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+ int FPOffset = 0;
+ if (HasFP) {
+ if (Subtarget.isSVR4ABI()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = FFI->getObjectOffset(FPIndex);
+ } else {
+ FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ }
+ }
+
+ if (isPPC64) {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(PPC::X31)
+ .addImm(FPOffset/4)
+ .addReg(PPC::X1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(PPC::X0)
+ .addImm(LROffset / 4)
+ .addReg(PPC::X1);
+ } else {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(PPC::R31)
+ .addImm(FPOffset)
+ .addReg(PPC::R1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(PPC::R0)
+ .addImm(LROffset)
+ .addReg(PPC::R1);
+ }
+
+ // Skip if a leaf routine.
+ if (!FrameSize) return;
+
+ // Get stack alignments.
+ unsigned TargetAlign = getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Adjust stack pointer: r1 += NegFrameSize.
+ // If there is a preferred stack alignment, align R1 now
+ if (!isPPC64) {
+ // PPC32.
+ if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+ assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R1)
+ .addImm(0)
+ .addImm(32 - Log2_32(MaxAlign))
+ .addImm(31);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ .addReg(PPC::R1)
+ .addReg(PPC::R1)
+ .addReg(PPC::R0);
+ } else if (isInt<16>(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
+ .addReg(PPC::R1)
+ .addImm(NegFrameSize)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ .addReg(PPC::R1)
+ .addReg(PPC::R1)
+ .addReg(PPC::R0);
+ }
+ } else { // PPC64.
+ if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+ assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
+ .addReg(PPC::X1)
+ .addImm(0)
+ .addImm(64 - Log2_32(MaxAlign));
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
+ .addReg(PPC::X0)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X1)
+ .addReg(PPC::X1)
+ .addReg(PPC::X0);
+ } else if (isInt<16>(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
+ .addReg(PPC::X1)
+ .addImm(NegFrameSize / 4)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X1)
+ .addReg(PPC::X1)
+ .addReg(PPC::X0);
+ }
+ }
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ // Add the "machine moves" for the instructions we generated above, but in
+ // reverse order.
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
+
+ // Show update of SP.
+ if (NegFrameSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ } else {
+ MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabel, SP, SP));
+ }
+
+ if (HasFP) {
+ MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
+ MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ if (MustSaveLR) {
+ MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
+ MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
+ Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc));
+ }
+ }
+
+ MCSymbol *ReadyLabel = 0;
+
+ // If there is a frame pointer, copy R1 into R31
+ if (HasFP) {
+ if (!isPPC64) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
+ .addReg(PPC::R1)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
+ .addReg(PPC::X1)
+ .addReg(PPC::X1);
+ }
+
+ if (needsFrameMoves) {
+ ReadyLabel = MMI.getContext().CreateTempSymbol();
+
+ // Mark effective beginning of when frame pointer is ready.
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
+
+ MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
+ (isPPC64 ? PPC::X1 : PPC::R1));
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+ }
+ }
+
+ if (needsFrameMoves) {
+ MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel;
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+ }
+ }
+}
+
+void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI != MBB.end() && "Returning block has no terminator");
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl;
+
+ assert((RetOpcode == PPC::BLR ||
+ RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8) &&
+ "Can only insert epilog into returning blocks");
+
+ // Get alignment info so we know how to restore r1
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned TargetAlign = getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Get the number of bytes allocated from the FrameInfo.
+ int FrameSize = MFI->getStackSize();
+
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ // Check if the link register (LR) has been saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF);
+
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+ int FPOffset = 0;
+ if (HasFP) {
+ if (Subtarget.isSVR4ABI()) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = FFI->getObjectOffset(FPIndex);
+ } else {
+ FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ }
+ }
+
+ bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8;
+
+ if (UsesTCRet) {
+ int MaxTCRetDelta = FI->getTailCallSPDelta();
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int Delta = StackAdj - MaxTCRetDelta;
+ assert((Delta >= 0) && "Delta must be positive");
+ if (MaxTCRetDelta>0)
+ FrameSize += (StackAdj +Delta);
+ else
+ FrameSize += StackAdj;
+ }
+
+ if (FrameSize) {
+ // The loaded (or persistent) stack pointer value is offset by the 'stwu'
+ // on entry to the function. Add this offset back now.
+ if (!isPPC64) {
+ // If this function contained a fastcc call and GuaranteedTailCallOpt is
+ // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+ // call which invalidates the stack pointer value in SP(0). So we use the
+ // value of R31 in this case.
+ if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
+ .addReg(PPC::R1)
+ .addReg(PPC::R31)
+ .addReg(PPC::R0);
+ } else if (isInt<16>(FrameSize) &&
+ (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
+ .addImm(0).addReg(PPC::R1);
+ }
+ } else {
+ if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
+ .addReg(PPC::X1)
+ .addReg(PPC::X31)
+ .addReg(PPC::X0);
+ } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
+ .addImm(0).addReg(PPC::X1);
+ }
+ }
+ }
+
+ if (isPPC64) {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
+ .addImm(LROffset/4).addReg(PPC::X1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
+ .addImm(FPOffset/4).addReg(PPC::X1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
+ } else {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
+ .addImm(LROffset).addReg(PPC::R1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
+ .addImm(FPOffset).addReg(PPC::R1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
+ }
+
+ // Callee pop calling convention. Pop parameter/linkage area. Used for tail
+ // call optimization
+ if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+ MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+ unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
+
+ if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
+ BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg).addImm(CallerAllocatedAmt);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CallerAllocatedAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CallerAllocatedAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+ .addReg(StackReg)
+ .addReg(FPReg)
+ .addReg(TmpReg);
+ }
+ } else if (RetOpcode == PPC::TCRETURNdi) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+ } else if (RetOpcode == PPC::TCRETURNai) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+ } else if (RetOpcode == PPC::TCRETURNdi8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+ } else if (RetOpcode == PPC::TCRETURNai8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
+}
+
+void PPCFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(PPC::R1, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+static bool spillsCR(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isCRSpilled();
+}
+
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+ const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+ // We need a save/restore of LR if there is any def of LR (which is
+ // defined by calls, including the PIC setup sequence), or if there is
+ // some use of the LR stack slot (e.g. for builtin_return_address).
+ // (LR comes in 32 and 64 bit versions.)
+ MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+ return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
+void
+PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+ // Save and clear the LR state.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned LR = RegInfo->getRARegister();
+ FI->setMustSaveLR(MustSaveLR(MF, LR));
+ MF.getRegInfo().setPhysRegUnused(LR);
+
+ // Save R31 if necessary
+ int FPSI = FI->getFramePointerSaveIndex();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI && needsFP(MF)) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+
+ // Reserve stack space to move the linkage area to in case of a tail call.
+ int TCSPDelta = 0;
+ if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+ MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
+ }
+
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+ // FIXME: this doesn't actually check stack size, so is a bit pessimistic
+ // FIXME: doesn't detect whether or not we need to spill vXX, which requires
+ // r0 for now.
+
+ if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable.
+ if (needsFP(MF) || spillsCR(MF)) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
+
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+ const {
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI())
+ return;
+
+ // Get callee saved register information.
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty() && !needsFP(MF)) {
+ return;
+ }
+
+ unsigned MinGPR = PPC::R31;
+ unsigned MinG8R = PPC::X31;
+ unsigned MinFPR = PPC::F31;
+ unsigned MinVR = PPC::V31;
+
+ bool HasGPSaveArea = false;
+ bool HasG8SaveArea = false;
+ bool HasFPSaveArea = false;
+ bool HasCRSaveArea = false;
+ bool HasVRSAVESaveArea = false;
+ bool HasVRSaveArea = false;
+
+ SmallVector<CalleeSavedInfo, 18> GPRegs;
+ SmallVector<CalleeSavedInfo, 18> G8Regs;
+ SmallVector<CalleeSavedInfo, 18> FPRegs;
+ SmallVector<CalleeSavedInfo, 18> VRegs;
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (PPC::GPRCRegisterClass->contains(Reg)) {
+ HasGPSaveArea = true;
+
+ GPRegs.push_back(CSI[i]);
+
+ if (Reg < MinGPR) {
+ MinGPR = Reg;
+ }
+ } else if (PPC::G8RCRegisterClass->contains(Reg)) {
+ HasG8SaveArea = true;
+
+ G8Regs.push_back(CSI[i]);
+
+ if (Reg < MinG8R) {
+ MinG8R = Reg;
+ }
+ } else if (PPC::F8RCRegisterClass->contains(Reg)) {
+ HasFPSaveArea = true;
+
+ FPRegs.push_back(CSI[i]);
+
+ if (Reg < MinFPR) {
+ MinFPR = Reg;
+ }
+// FIXME SVR4: Disable CR save area for now.
+ } else if (PPC::CRBITRCRegisterClass->contains(Reg)
+ || PPC::CRRCRegisterClass->contains(Reg)) {
+// HasCRSaveArea = true;
+ } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+ HasVRSAVESaveArea = true;
+ } else if (PPC::VRRCRegisterClass->contains(Reg)) {
+ HasVRSaveArea = true;
+
+ VRegs.push_back(CSI[i]);
+
+ if (Reg < MinVR) {
+ MinVR = Reg;
+ }
+ } else {
+ llvm_unreachable("Unknown RegisterClass!");
+ }
+ }
+
+ PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+
+ int64_t LowerBound = 0;
+
+ // Take into account stack space reserved for tail calls.
+ int TCSPDelta = 0;
+ if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+ LowerBound = TCSPDelta;
+ }
+
+ // The Floating-point register save area is right below the back chain word
+ // of the previous stack frame.
+ if (HasFPSaveArea) {
+ for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
+ int FI = FPRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ LowerBound -= (31 - PPCRegisterInfo::getRegisterNumbering(MinFPR) + 1) * 8;
+ }
+
+ // Check whether the frame pointer register is allocated. If so, make sure it
+ // is spilled to the correct offset.
+ if (needsFP(MF)) {
+ HasGPSaveArea = true;
+
+ int FI = PFI->getFramePointerSaveIndex();
+ assert(FI && "No Frame Pointer Save Slot!");
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ // General register save area starts right below the Floating-point
+ // register save area.
+ if (HasGPSaveArea || HasG8SaveArea) {
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
+ int FI = GPRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
+ int FI = G8Regs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ unsigned MinReg =
+ std::min<unsigned>(PPCRegisterInfo::getRegisterNumbering(MinGPR),
+ PPCRegisterInfo::getRegisterNumbering(MinG8R));
+
+ if (Subtarget.isPPC64()) {
+ LowerBound -= (31 - MinReg + 1) * 8;
+ } else {
+ LowerBound -= (31 - MinReg + 1) * 4;
+ }
+ }
+
+ // The CR save area is below the general register save area.
+ if (HasCRSaveArea) {
+ // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+ // which have the CR/CRBIT register class?
+ // Adjust the frame index of the CR spill slot.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if (PPC::CRBITRCRegisterClass->contains(Reg) ||
+ PPC::CRRCRegisterClass->contains(Reg)) {
+ int FI = CSI[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ LowerBound -= 4; // The CR save area is always 4 bytes long.
+ }
+
+ if (HasVRSAVESaveArea) {
+ // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+ // which have the VRSAVE register class?
+ // Adjust the frame index of the VRSAVE spill slot.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+ int FI = CSI[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
+ }
+
+ if (HasVRSaveArea) {
+ // Insert alignment padding, we need 16-byte alignment.
+ LowerBound = (LowerBound - 15) & ~(15);
+
+ for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
+ int FI = VRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 7587b03..0c18de1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -1,4 +1,4 @@
-//===-- PPCFrameInfo.h - Define TargetFrameInfo for PowerPC -----*- C++ -*-===//
+//==-- PPCFrameLowering.h - Define frame lowering for PowerPC ----*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -15,20 +15,42 @@
#include "PPC.h"
#include "PPCSubtarget.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/STLExtras.h"
namespace llvm {
+ class PPCSubtarget;
-class PPCFrameInfo: public TargetFrameInfo {
- const TargetMachine &TM;
+class PPCFrameLowering: public TargetFrameLowering {
+ const PPCSubtarget &Subtarget;
public:
- PPCFrameInfo(const TargetMachine &tm, bool LP64)
- : TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), TM(tm) {
+ PPCFrameLowering(const PPCSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+ Subtarget(sti) {
}
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool needsFP(const MachineFunction &MF) const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const { return true; }
+
/// getReturnSaveOffset - Return the previous frame offset to save the
/// return address.
static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
@@ -48,17 +70,17 @@ public:
// around that does use it, and that needs to continue to work.
if (isDarwinABI)
return isPPC64 ? -8U : -4U;
-
+
// SVR4 ABI: First slot in the general register save area.
return isPPC64 ? -8U : -4U;
}
-
+
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
if (isDarwinABI || isPPC64)
return 6 * (isPPC64 ? 8 : 4);
-
+
// SVR4 ABI:
return 8;
}
@@ -74,7 +96,7 @@ public:
// least enough stack space for the caller to store the 8 GPRs.
if (isDarwinABI || isPPC64)
return 8 * (isPPC64 ? 8 : 4);
-
+
// 32-bit SVR4 ABI:
// There is no default stack allocated for the 8 first GPR arguments.
return 0;
@@ -91,9 +113,9 @@ public:
// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const {
- if (TM.getSubtarget<PPCSubtarget>().isDarwinABI()) {
+ if (Subtarget.isDarwinABI()) {
NumEntries = 1;
- if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ if (Subtarget.isPPC64()) {
static const SpillSlot darwin64Offsets = {PPC::X31, -8};
return &darwin64Offsets;
} else {
@@ -103,7 +125,7 @@ public:
}
// Early exit if not using the SVR4 ABI.
- if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
+ if (!Subtarget.isSVR4ABI()) {
NumEntries = 0;
return 0;
}
@@ -283,7 +305,7 @@ public:
{PPC::V20, -192}
};
- if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ if (Subtarget.isPPC64()) {
NumEntries = array_lengthof(Offsets64);
return Offsets64;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index db11fde..0de5844 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
//
// This models the dispatch group formation of the PPC970 processor. Dispatch
// groups are bundles of up to five instructions that can contain various mixes
-// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one
+// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one
// branch instruction per-cycle.
//
// There are a number of restrictions to dispatch group formation: some
@@ -55,14 +55,14 @@ PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
void PPCHazardRecognizer970::EndDispatchGroup() {
DEBUG(errs() << "=== Start of dispatch group\n");
NumIssued = 0;
-
+
// Structural hazard info.
HasCTRSet = false;
NumStores = 0;
}
-PPCII::PPC970_Unit
+PPCII::PPC970_Unit
PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
bool &isFirst, bool &isSingle,
bool &isCracked,
@@ -72,14 +72,14 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
return PPCII::PPC970_Pseudo;
}
Opcode = ~Opcode;
-
+
const TargetInstrDesc &TID = TII.get(Opcode);
-
+
isLoad = TID.mayLoad();
isStore = TID.mayStore();
-
+
uint64_t TSFlags = TID.TSFlags;
-
+
isFirst = TSFlags & PPCII::PPC970_First;
isSingle = TSFlags & PPCII::PPC970_Single;
isCracked = TSFlags & PPCII::PPC970_Cracked;
@@ -96,7 +96,7 @@ isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
return true;
if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
return true;
-
+
// Okay, we don't have an exact match, if this is an indexed offset, see if
// we have overlap (which happens during fp->int conversion for example).
if (StorePtr2[i] == Ptr2) {
@@ -122,26 +122,28 @@ isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
/// instructions that wouldn't terminate the dispatch group that would cause a
/// pipeline flush.
ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
-getHazardType(SUnit *SU) {
- const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+getHazardType(SUnit *SU, int Stalls) {
+ assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
+
+ const SDNode *Node = SU->getNode()->getGluedMachineNode();
bool isFirst, isSingle, isCracked, isLoad, isStore;
- PPCII::PPC970_Unit InstrType =
+ PPCII::PPC970_Unit InstrType =
GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
isLoad, isStore);
- if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
+ if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
unsigned Opcode = Node->getMachineOpcode();
// We can only issue a PPC970_First/PPC970_Single instruction (such as
// crand/mtspr/etc) if this is the first cycle of the dispatch group.
if (NumIssued != 0 && (isFirst || isSingle))
return Hazard;
-
+
// If this instruction is cracked into two ops by the decoder, we know that
// it is not a branch and that it cannot issue if 3 other instructions are
// already in the dispatch group.
if (isCracked && NumIssued > 2)
return Hazard;
-
+
switch (InstrType) {
default: llvm_unreachable("Unknown instruction type!");
case PPCII::PPC970_FXU:
@@ -159,11 +161,11 @@ getHazardType(SUnit *SU) {
case PPCII::PPC970_BRU:
break;
}
-
+
// Do not allow MTCTR and BCTRL to be in the same dispatch group.
if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
return NoopHazard;
-
+
// If this is a load following a store, make sure it's not to the same or
// overlapping address.
if (isLoad && NumStores) {
@@ -212,27 +214,27 @@ getHazardType(SUnit *SU) {
LoadSize = 16;
break;
}
-
- if (isLoadOfStoredAddress(LoadSize,
+
+ if (isLoadOfStoredAddress(LoadSize,
Node->getOperand(0), Node->getOperand(1)))
return NoopHazard;
}
-
+
return NoHazard;
}
void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
- const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+ const SDNode *Node = SU->getNode()->getGluedMachineNode();
bool isFirst, isSingle, isCracked, isLoad, isStore;
- PPCII::PPC970_Unit InstrType =
+ PPCII::PPC970_Unit InstrType =
GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
isLoad, isStore);
- if (InstrType == PPCII::PPC970_Pseudo) return;
+ if (InstrType == PPCII::PPC970_Pseudo) return;
unsigned Opcode = Node->getMachineOpcode();
// Update structural hazard information.
if (Opcode == PPC::MTCTR) HasCTRSet = true;
-
+
// Track the address stored to.
if (isStore) {
unsigned ThisStoreSize;
@@ -278,22 +280,22 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
ThisStoreSize = 16;
break;
}
-
+
StoreSize[NumStores] = ThisStoreSize;
StorePtr1[NumStores] = Node->getOperand(1);
StorePtr2[NumStores] = Node->getOperand(2);
++NumStores;
}
-
+
if (InstrType == PPCII::PPC970_BRU || isSingle)
NumIssued = 4; // Terminate a d-group.
++NumIssued;
-
+
// If this instruction is cracked into two ops by the decoder, remember that
// we issued two pieces.
if (isCracked)
++NumIssued;
-
+
if (NumIssued == 5)
EndDispatchGroup();
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
index 74bf8e5..2f81f0f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -19,7 +19,7 @@
#include "PPCInstrInfo.h"
namespace llvm {
-
+
/// PPCHazardRecognizer970 - This class defines a finite state automata that
/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
/// promotes good dispatch group formation and implements noop insertion to
@@ -28,14 +28,14 @@ namespace llvm {
/// or storing then loading from the same address within a dispatch group.
class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
const TargetInstrInfo &TII;
-
+
unsigned NumIssued; // Number of insts issued, including advanced cycles.
-
+
// Various things that can cause a structural hazard.
-
+
// HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
bool HasCTRSet;
-
+
// StoredPtr - Keep track of the address of any store. If we see a load from
// the same address (or one that aliases it), disallow the store. We can have
// up to four stores in one dispatch group, hence we track up to 4.
@@ -45,24 +45,24 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
SDValue StorePtr1[4], StorePtr2[4];
unsigned StoreSize[4];
unsigned NumStores;
-
+
public:
PPCHazardRecognizer970(const TargetInstrInfo &TII);
- virtual HazardType getHazardType(SUnit *SU);
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void EmitInstruction(SUnit *SU);
virtual void AdvanceCycle();
-
+
private:
/// EndDispatchGroup - Called when we are finishing a new dispatch group.
///
void EndDispatchGroup();
-
+
/// GetInstrType - Classify the specified powerpc opcode according to its
/// pipeline.
PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
bool &isFirst, bool &isSingle,bool &isCracked,
bool &isLoad, bool &isStore);
-
+
bool isLoadOfStoredAddress(unsigned LoadSize,
SDValue Ptr1, SDValue Ptr2) const;
};
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 00eebb8..faae9b2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -16,7 +16,6 @@
#include "PPC.h"
#include "PPCPredicates.h"
#include "PPCTargetMachine.h"
-#include "PPCHazardRecognizers.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -49,16 +48,16 @@ namespace {
: SelectionDAGISel(tm), TM(tm),
PPCLowering(*TM.getTargetLowering()),
PPCSubTarget(*TM.getSubtargetImpl()) {}
-
+
virtual bool runOnMachineFunction(MachineFunction &MF) {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
SelectionDAGISel::runOnMachineFunction(MF);
-
+
InsertVRSaveCode(MF);
return true;
}
-
+
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(unsigned Imm) {
@@ -70,13 +69,13 @@ namespace {
inline SDValue getI64Imm(uint64_t Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i64);
}
-
+
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
}
-
- /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
+
+ /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
/// with any number of 0s on either side. The 1s are allowed to wrap from
/// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
/// 0x0F0F0000 is not, since all 1s are not contiguous.
@@ -87,15 +86,15 @@ namespace {
/// rotate and mask opcode and mask operation.
static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
unsigned &SH, unsigned &MB, unsigned &ME);
-
+
/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
/// base register. Return the virtual register that holds this value.
SDNode *getGlobalBaseReg();
-
+
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
SDNode *Select(SDNode *N);
-
+
SDNode *SelectBitfieldInsert(SDNode *N);
/// SelectCC - Select a comparison of the specified values with the
@@ -104,42 +103,39 @@ namespace {
/// SelectAddrImm - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement [r+imm].
- bool SelectAddrImm(SDNode *Op, SDValue N, SDValue &Disp,
+ bool SelectAddrImm(SDValue N, SDValue &Disp,
SDValue &Base) {
return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
}
-
+
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
/// immediate field. Because preinc imms have already been validated, just
/// accept it.
- bool SelectAddrImmOffs(SDNode *Op, SDValue N, SDValue &Out) const {
+ bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
Out = N;
return true;
}
-
+
/// SelectAddrIdx - Given the specified addressed, check to see if it can be
/// represented as an indexed [r+r] operation. Returns false if it can
/// be represented by [r+imm], which are preferred.
- bool SelectAddrIdx(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index) {
+ bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
}
-
+
/// SelectAddrIdxOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
- bool SelectAddrIdxOnly(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Index) {
+ bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
}
/// SelectAddrImmShift - Returns true if the address N can be represented by
/// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable
/// for use by STD and friends.
- bool SelectAddrImmShift(SDNode *Op, SDValue N, SDValue &Disp,
- SDValue &Base) {
+ bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
}
-
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions. It is always correct to compute the value into
/// a register. The case of adding a (possibly relocatable) constant to a
@@ -151,29 +147,16 @@ namespace {
OutOps.push_back(Op);
return false;
}
-
- SDValue BuildSDIVSequence(SDNode *N);
- SDValue BuildUDIVSequence(SDNode *N);
-
+
void InsertVRSaveCode(MachineFunction &MF);
virtual const char *getPassName() const {
return "PowerPC DAG->DAG Pattern Instruction Selection";
- }
-
- /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
- /// this target when scheduling the DAG.
- virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
- // Should use subtarget info to pick the right hazard recognizer. For
- // now, always return a PPC970 recognizer.
- const TargetInstrInfo *II = TM.getInstrInfo();
- assert(II && "No InstrInfo?");
- return new PPCHazardRecognizer970(*II);
}
// Include the pieces autogenerated from the target description.
#include "PPCGenDAGISel.inc"
-
+
private:
SDNode *SelectSETCC(SDNode *N);
};
@@ -184,19 +167,20 @@ private:
/// check to see if we need to save/restore VRSAVE. If so, do it.
void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// Check to see if this function uses vector registers, which means we have to
- // save and restore the VRSAVE register and update it with the regs we use.
+ // save and restore the VRSAVE register and update it with the regs we use.
//
// In this case, there will be virtual registers of vector type created
// by the scheduler. Detect them now.
bool HasVectorVReg = false;
- for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
- e = RegInfo->getLastVirtReg()+1; i != e; ++i)
- if (RegInfo->getRegClass(i) == &PPC::VRRCRegClass) {
+ for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
HasVectorVReg = true;
break;
}
+ }
if (!HasVectorVReg) return; // nothing to do.
-
+
// If we have a vector register, we want to emit code into the entry and exit
// blocks to save and restore the VRSAVE register. We do this here (instead
// of marking all vector instructions as clobbering VRSAVE) for two reasons:
@@ -211,7 +195,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// function and one for the value after having bits or'd into it.
unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
-
+
const TargetInstrInfo &TII = *TM.getInstrInfo();
MachineBasicBlock &EntryBB = *Fn.begin();
DebugLoc dl;
@@ -224,21 +208,21 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
UpdatedVRSAVE).addReg(InVRSAVE);
BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
-
+
// Find all return blocks, outputting a restore in each epilog.
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
if (!BB->empty() && BB->back().getDesc().isReturn()) {
IP = BB->end(); --IP;
-
+
// Skip over all terminator instructions, which are part of the return
// sequence.
MachineBasicBlock::iterator I2 = IP;
while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
IP = I2;
-
+
// Emit: MTVRSAVE InVRSave
BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
- }
+ }
}
}
@@ -344,8 +328,8 @@ bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
return false;
}
-bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
- bool isShiftMask, unsigned &SH,
+bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
+ bool isShiftMask, unsigned &SH,
unsigned &MB, unsigned &ME) {
// Don't even go down this path for i64, since different logic will be
// necessary for rldicl/rldicr/rldimi.
@@ -358,13 +342,13 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
if (N->getNumOperands() != 2 ||
!isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
return false;
-
+
if (Opcode == ISD::SHL) {
// apply shift left to mask if it comes first
if (isShiftMask) Mask = Mask << Shift;
// determine which bits are made indeterminant by shift
Indeterminant = ~(0xFFFFFFFFu << Shift);
- } else if (Opcode == ISD::SRL) {
+ } else if (Opcode == ISD::SRL) {
// apply shift right to mask if it comes first
if (isShiftMask) Mask = Mask >> Shift;
// determine which bits are made indeterminant by shift
@@ -376,7 +360,7 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
} else {
return false;
}
-
+
// if the mask doesn't intersect any Indeterminant bits
if (Mask && !(Mask & Indeterminant)) {
SH = Shift & 31;
@@ -392,14 +376,14 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
DebugLoc dl = N->getDebugLoc();
-
+
APInt LKZ, LKO, RKZ, RKO;
CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO);
CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO);
-
+
unsigned TargetMask = LKZ.getZExtValue();
unsigned InsertMask = RKZ.getZExtValue();
-
+
if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
unsigned Op0Opc = Op0.getOpcode();
unsigned Op1Opc = Op1.getOpcode();
@@ -427,7 +411,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
std::swap(TargetMask, InsertMask);
}
}
-
+
unsigned MB, ME;
if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
SDValue Tmp1, Tmp2;
@@ -463,7 +447,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
ISD::CondCode CC, DebugLoc dl) {
// Always select the LHS.
unsigned Opc;
-
+
if (LHS.getValueType() == MVT::i32) {
unsigned Imm;
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
@@ -476,11 +460,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
if (isInt<16>((int)Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
-
+
// For non-equality comparisons, the default code would materialize the
// constant, then compare against it, like this:
// lis r2, 4660
- // ori r2, r2, 22136
+ // ori r2, r2, 22136
// cmpw cr0, r3, r2
// Since we are just comparing for equality, we can emit this instead:
// xoris r0,r3,0x1234
@@ -517,11 +501,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
if (isInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
-
+
// For non-equality comparisons, the default code would materialize the
// constant, then compare against it, like this:
// lis r2, 4660
- // ori r2, r2, 22136
+ // ori r2, r2, 22136
// cmpd cr0, r3, r2
// Since we are just comparing for equality, we can emit this instead:
// xoris r0,r3,0x1234
@@ -610,9 +594,9 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
case ISD::SETUNE:
case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
- case ISD::SETUEQ:
- case ISD::SETOGE:
- case ISD::SETOLE:
+ case ISD::SETUEQ:
+ case ISD::SETOGE:
+ case ISD::SETOLE:
case ISD::SETONE:
llvm_unreachable("Invalid branch code: should be expanded by legalize");
// These are invalid for floating point. Assume integer.
@@ -641,9 +625,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
}
case ISD::SETNE: {
SDValue AD =
- SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+ SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
Op, getI32Imm(~0U)), 0);
- return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
AD.getValue(1));
}
case ISD::SETLT: {
@@ -663,16 +647,16 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
switch (CC) {
default: break;
case ISD::SETEQ:
- Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+ Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
Op, getI32Imm(1)), 0);
- return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
- SDValue(CurDAG->getMachineNode(PPC::LI, dl,
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(CurDAG->getMachineNode(PPC::LI, dl,
MVT::i32,
getI32Imm(0)), 0),
Op.getValue(1));
case ISD::SETNE: {
Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
- SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+ SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
Op, getI32Imm(~0U));
return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
Op, SDValue(AD, 1));
@@ -687,35 +671,35 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
}
case ISD::SETGT: {
SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
- Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
+ Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
0);
- return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
getI32Imm(1));
}
}
}
}
-
+
bool Inv;
int OtherCondIdx;
unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx);
SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
SDValue IntCR;
-
+
// Force the ccreg into CR7.
SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
-
+
SDValue InFlag(0, 0); // Null incoming flag value.
- CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
+ CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
InFlag).getValue(1);
-
+
if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
CCReg), 0);
else
IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
CR7Reg, CCReg), 0);
-
+
SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
getI32Imm(31), getI32Imm(31) };
if (OtherCondIdx == -1 && !Inv)
@@ -734,7 +718,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// Get the other bit of the comparison.
Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
- SDValue OtherCond =
+ SDValue OtherCond =
SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
@@ -750,7 +734,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
-
+
case ISD::Constant: {
if (N->getValueType(0) == MVT::i64) {
// Get 64 bit value.
@@ -759,12 +743,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
unsigned Remainder = 0;
// Assume no shift required.
unsigned Shift = 0;
-
+
// If it can't be represented as a 32 bit value.
if (!isInt<32>(Imm)) {
Shift = CountTrailingZeros_64(Imm);
int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
-
+
// If the shifted value fits 32 bits.
if (isInt<32>(ImmSh)) {
// Go with the shifted value.
@@ -776,14 +760,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
Imm >>= 32;
}
}
-
+
// Intermediate operand.
SDNode *Result;
// Handle first 32 bits.
unsigned Lo = Imm & 0xFFFF;
unsigned Hi = (Imm >> 16) & 0xFFFF;
-
+
// Simple value.
if (isInt<16>(Imm)) {
// Just the Lo bits.
@@ -799,7 +783,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// Just the Hi bits.
Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
}
-
+
// If no shift, we're done.
if (!Shift) return Result;
@@ -815,22 +799,22 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if ((Hi = (Remainder >> 16) & 0xFFFF)) {
Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
SDValue(Result, 0), getI32Imm(Hi));
- }
+ }
if ((Lo = Remainder & 0xFFFF)) {
Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
SDValue(Result, 0), getI32Imm(Lo));
}
-
+
return Result;
}
break;
}
-
+
case ISD::SETCC:
return SelectSETCC(N);
case PPCISD::GlobalBaseReg:
return getGlobalBaseReg();
-
+
case ISD::FrameIndex: {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
@@ -852,11 +836,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
N->getOperand(0), InFlag);
}
-
+
case ISD::SDIV: {
// FIXME: since this depends on the setting of the carry flag from the srawi
// we should really be making notes about that for the scheduler.
- // FIXME: It sure would be nice if we could cheaply recognize the
+ // FIXME: It sure would be nice if we could cheaply recognize the
// srl/add/sra pattern the dag combiner will generate for this as
// sra/addze rather than having to handle sdiv ourselves. oh well.
unsigned Imm;
@@ -864,13 +848,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue N0 = N->getOperand(0);
if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
SDNode *Op =
- CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag,
+ CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
N0, getI32Imm(Log2_32(Imm)));
- return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
SDValue(Op, 0), SDValue(Op, 1));
} else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
SDNode *Op =
- CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag,
+ CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
N0, getI32Imm(Log2_32(-Imm)));
SDValue PT =
SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
@@ -879,24 +863,24 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
}
}
-
+
// Other cases are autogenerated.
break;
}
-
+
case ISD::LOAD: {
// Handle preincrement loads.
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
-
+
// Normal loads are handled by code generated from the .td file.
if (LD->getAddressingMode() != ISD::PRE_INC)
break;
-
+
SDValue Offset = LD->getOffset();
if (isa<ConstantSDNode>(Offset) ||
Offset.getOpcode() == ISD::TargetGlobalAddress) {
-
+
unsigned Opcode;
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
if (LD->getValueType(0) != MVT::i64) {
@@ -923,7 +907,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
case MVT::i8: Opcode = PPC::LBZU8; break;
}
}
-
+
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
@@ -935,7 +919,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
llvm_unreachable("R+R preindex loads not supported yet!");
}
}
-
+
case ISD::AND: {
unsigned Imm, Imm2, SH, MB, ME;
@@ -950,7 +934,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// If this is just a masked value where the input is not handled above, and
// is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
if (isInt32Immediate(N->getOperand(1), Imm) &&
- isRunOfOnes(Imm, MB, ME) &&
+ isRunOfOnes(Imm, MB, ME) &&
N->getOperand(0).getOpcode() != ISD::ROTL) {
SDValue Val = N->getOperand(0);
SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
@@ -963,7 +947,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
}
// ISD::OR doesn't get all the bitfield insertion fun.
// (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
- if (isInt32Immediate(N->getOperand(1), Imm) &&
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
N->getOperand(0).getOpcode() == ISD::OR &&
isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
unsigned MB, ME;
@@ -975,7 +959,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
}
}
-
+
// Other cases are autogenerated.
break;
}
@@ -983,7 +967,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (N->getValueType(0) == MVT::i32)
if (SDNode *I = SelectBitfieldInsert(N))
return I;
-
+
// Other cases are autogenerated.
break;
case ISD::SHL: {
@@ -994,25 +978,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
-
+
// Other cases are autogenerated.
break;
}
case ISD::SRL: {
unsigned Imm, SH, MB, ME;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
- isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
}
-
+
// Other cases are autogenerated.
break;
}
case ISD::SELECT_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
-
+
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
@@ -1022,7 +1006,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// FIXME: Implement this optzn for PPC64.
N->getValueType(0) == MVT::i32) {
SDNode *Tmp =
- CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+ CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
N->getOperand(0), getI32Imm(~0U));
return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
SDValue(Tmp, 0), N->getOperand(0),
@@ -1064,7 +1048,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::BR_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
- SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
+ SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
N->getOperand(4), N->getOperand(0) };
return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
}
@@ -1078,13 +1062,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->SelectNodeTo(N, PPC::BCTR, MVT::Other, Chain);
}
}
-
+
return SelectCode(N);
}
-/// createPPCISelDag - This pass converts a legalized DAG into a
+/// createPPCISelDag - This pass converts a legalized DAG into a
/// PowerPC-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 14d1b15..8f623b8 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -38,17 +38,17 @@
#include "llvm/DerivedTypes.h"
using namespace llvm;
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT,
- EVT &LocVT,
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
- EVT &LocVT,
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
@@ -73,6 +73,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
+ // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
+ // arguments are at least 4/8 bytes aligned.
+ setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
+
// Set up the register classes.
addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
@@ -174,10 +178,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
- setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
- setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
- setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::f64, Expand);
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -545,7 +549,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
-bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary) {
if (!isUnary)
return isVMerge(N, UnitSize, 8, 24);
@@ -554,7 +558,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
-bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary) {
if (!isUnary)
return isVMerge(N, UnitSize, 0, 16);
@@ -569,7 +573,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
"PPC only supports shuffles by bytes!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-
+
// Find the first non-undef value in the shuffle mask.
unsigned i;
for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
@@ -607,7 +611,7 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
// This is a splat operation if each element of the permute is the same, and
// if the value doesn't reference the second vector.
unsigned ElementBase = N->getMaskElt(0);
-
+
// FIXME: Handle UNDEF elements too!
if (ElementBase >= 16)
return false;
@@ -635,7 +639,7 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) {
APInt APVal, APUndef;
unsigned BitSize;
bool HasAnyUndefs;
-
+
if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
return CFP->getValueAPF().isNegZero();
@@ -1054,7 +1058,6 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
VT = LD->getMemoryVT();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- ST = ST;
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
} else
@@ -1094,158 +1097,126 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
// LowerOperation implementation
//===----------------------------------------------------------------------===//
-SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
- SelectionDAG &DAG) const {
- EVT PtrVT = Op.getValueType();
- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- const Constant *C = CP->getConstVal();
- SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
- SDValue Zero = DAG.getConstant(0, PtrVT);
- // FIXME there isn't really any debug info here
- DebugLoc dl = Op.getDebugLoc();
-
- const TargetMachine &TM = DAG.getTarget();
-
- SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, CPI, Zero);
- SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, CPI, Zero);
-
- // If this is a non-darwin platform, we don't support non-static relo models
- // yet.
- if (TM.getRelocationModel() == Reloc::Static ||
- !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
- // Generate non-pic code that has direct accesses to the constant pool.
- // The address of the global is just (hi(&g)+lo(&g)).
- return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+/// GetLabelAccessInfo - Return true if we should reference labels using a
+/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
+static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
+ unsigned &LoOpFlags, const GlobalValue *GV = 0) {
+ HiOpFlags = PPCII::MO_HA16;
+ LoOpFlags = PPCII::MO_LO16;
+
+ // Don't use the pic base if not in PIC relocation model. Or if we are on a
+ // non-darwin platform. We don't support PIC on other platforms yet.
+ bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
+ TM.getSubtarget<PPCSubtarget>().isDarwin();
+ if (isPIC) {
+ HiOpFlags |= PPCII::MO_PIC_FLAG;
+ LoOpFlags |= PPCII::MO_PIC_FLAG;
}
- if (TM.getRelocationModel() == Reloc::PIC_) {
- // With PIC, the first instruction is actually "GR+hi(&G)".
- Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
- DAG.getNode(PPCISD::GlobalBaseReg,
- DebugLoc(), PtrVT), Hi);
+ // If this is a reference to a global value that requires a non-lazy-ptr, make
+ // sure that instruction lowering adds it.
+ if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
+ HiOpFlags |= PPCII::MO_NLP_FLAG;
+ LoOpFlags |= PPCII::MO_NLP_FLAG;
+
+ if (GV->hasHiddenVisibility()) {
+ HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+ LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+ }
}
- Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
- return Lo;
+ return isPIC;
}
-SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
- EVT PtrVT = Op.getValueType();
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
+ SelectionDAG &DAG) {
+ EVT PtrVT = HiPart.getValueType();
SDValue Zero = DAG.getConstant(0, PtrVT);
- // FIXME there isn't really any debug loc here
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = HiPart.getDebugLoc();
- const TargetMachine &TM = DAG.getTarget();
+ SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
+ SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
- SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, JTI, Zero);
- SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, JTI, Zero);
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ if (isPIC)
+ Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
- // If this is a non-darwin platform, we don't support non-static relo models
- // yet.
- if (TM.getRelocationModel() == Reloc::Static ||
- !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
- // Generate non-pic code that has direct accesses to the constant pool.
- // The address of the global is just (hi(&g)+lo(&g)).
- return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
- }
+ // Generate non-pic code that has direct accesses to the constant pool.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+}
- if (TM.getRelocationModel() == Reloc::PIC_) {
- // With PIC, the first instruction is actually "GR+hi(&G)".
- Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
- DAG.getNode(PPCISD::GlobalBaseReg,
- DebugLoc(), PtrVT), Hi);
- }
+SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = CP->getConstVal();
- Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
- return Lo;
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue CPIHi =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
+ SDValue CPILo =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
+ return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
}
-SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
- SelectionDAG &DAG) const {
- llvm_unreachable("TLS not implemented for PPC.");
- return SDValue(); // Not reached
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
+ SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
+ return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
}
SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
- DebugLoc DL = Op.getDebugLoc();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- SDValue TgtBA = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true);
- SDValue Zero = DAG.getConstant(0, PtrVT);
- SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, TgtBA, Zero);
- SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, TgtBA, Zero);
-
- // If this is a non-darwin platform, we don't support non-static relo models
- // yet.
- const TargetMachine &TM = DAG.getTarget();
- if (TM.getRelocationModel() == Reloc::Static ||
- !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
- // Generate non-pic code that has direct accesses to globals.
- // The address of the global is just (hi(&g)+lo(&g)).
- return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
- }
- if (TM.getRelocationModel() == Reloc::PIC_) {
- // With PIC, the first instruction is actually "GR+hi(&G)".
- Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
- DAG.getNode(PPCISD::GlobalBaseReg,
- DebugLoc(), PtrVT), Hi);
- }
-
- return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag);
+ SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag);
+ return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
- // FIXME there isn't really any debug info here
- DebugLoc dl = GSDN->getDebugLoc();
+ DebugLoc DL = GSDN->getDebugLoc();
const GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GSDN->getOffset());
- SDValue Zero = DAG.getConstant(0, PtrVT);
-
- const TargetMachine &TM = DAG.getTarget();
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
- return DAG.getNode(PPCISD::TOC_ENTRY, dl, MVT::i64, GA,
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
}
- SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero);
- SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero);
-
- // If this is a non-darwin platform, we don't support non-static relo models
- // yet.
- if (TM.getRelocationModel() == Reloc::Static ||
- !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
- // Generate non-pic code that has direct accesses to globals.
- // The address of the global is just (hi(&g)+lo(&g)).
- return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
- }
-
- if (TM.getRelocationModel() == Reloc::PIC_) {
- // With PIC, the first instruction is actually "GR+hi(&G)".
- Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
- DAG.getNode(PPCISD::GlobalBaseReg,
- DebugLoc(), PtrVT), Hi);
- }
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
- Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ SDValue GAHi =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
+ SDValue GALo =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
- if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM))
- return Lo;
+ SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
- // If the global is weak or external, we have to go through the lazy
- // resolution stub.
- return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0,
- false, false, 0);
+ // If the global reference is actually to a non-lazy-pointer, we have to do an
+ // extra load to get the address of the global.
+ if (MOHiFlag & PPCII::MO_NLP_FLAG)
+ Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
+ false, false, 0);
+ return Ptr;
}
SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -1353,7 +1324,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV),
false, false, 0);
}
@@ -1406,43 +1378,47 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
// Store first byte : number of int regs
SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
- Op.getOperand(1), SV, 0, MVT::i8,
- false, false, 0);
+ Op.getOperand(1),
+ MachinePointerInfo(SV),
+ MVT::i8, false, false, 0);
uint64_t nextOffset = FPROffset;
SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
ConstFPROffset);
// Store second byte : number of float regs
SDValue secondStore =
- DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8,
+ DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
+ MachinePointerInfo(SV, nextOffset), MVT::i8,
false, false, 0);
nextOffset += StackOffset;
nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
// Store second word : arguments given on stack
SDValue thirdStore =
- DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset,
+ DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
+ MachinePointerInfo(SV, nextOffset),
false, false, 0);
nextOffset += FrameOffset;
nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
// Store third word : arguments given in registers
- return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset,
+ return DAG.getStore(thirdStore, dl, FR, nextPtr,
+ MachinePointerInfo(SV, nextOffset),
false, false, 0);
}
#include "PPCGenCallingConv.inc"
-static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
return true;
}
-static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT,
- EVT &LocVT,
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
@@ -1451,7 +1427,7 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
+
unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
// Skip one register if the first unallocated register has an even register
@@ -1461,15 +1437,15 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT,
if (RegNum != NumArgRegs && RegNum % 2 == 1) {
State.AllocateReg(ArgRegs[RegNum]);
}
-
+
// Always return false here, as this function only makes sure that the first
// unallocated register has an odd register number and does not actually
// allocate a register for the current argument.
return false;
}
-static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
- EVT &LocVT,
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
@@ -1479,7 +1455,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
};
const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
+
unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
// If there is only one Floating-point register left we need to put both f64
@@ -1487,7 +1463,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
State.AllocateReg(ArgRegs[RegNum]);
}
-
+
// Always return false here, as this function only makes sure that the two f64
// values a ppc_fp128 value is split into are both passed in registers or both
// passed on the stack and does not actually allocate a register for the
@@ -1572,7 +1548,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// Specifications:
// System V Application Binary Interface PowerPC Processor Supplement
// AltiVec Technology Programming Interface Manual
-
+
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -1588,18 +1564,18 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
*DAG.getContext());
// Reserve space for the linkage area on the stack.
- CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
-
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
-
+
// Arguments stored in registers.
if (VA.isRegLoc()) {
TargetRegisterClass *RC;
EVT ValVT = VA.getValVT();
-
+
switch (ValVT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("ValVT not supported by formal arguments Lowering");
@@ -1619,9 +1595,9 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
RC = PPC::VRRCRegisterClass;
break;
}
-
+
// Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
InVals.push_back(ArgValue);
@@ -1635,7 +1611,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo(),
false, false, 0));
}
}
@@ -1654,7 +1631,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
-
+
// Set the size that is at least reserved in caller of this function. Tail
// call optimized function's reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
@@ -1663,17 +1640,17 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
MinReservedArea =
std::max(MinReservedArea,
- PPCFrameInfo::getMinCallFrameSize(false, false));
-
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+ PPCFrameLowering::getMinCallFrameSize(false, false));
+
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
getStackAlignment();
unsigned AlignMask = TargetAlign-1;
MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
-
+
FI->setMinReservedArea(MinReservedArea);
SmallVector<SDValue, 8> MemOps;
-
+
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
@@ -1705,28 +1682,18 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
- // The fixed integer arguments of a variadic function are
- // stored to the VarArgsFrameIndex on the stack.
- unsigned GPRIndex = 0;
- for (; GPRIndex != FuncInfo->getVarArgsNumGPR(); ++GPRIndex) {
- SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
- SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
- false, false, 0);
- MemOps.push_back(Store);
- // Increment the address by four for the next argument to store
- SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
- FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
- }
-
- // If this function is vararg, store any remaining integer argument regs
- // to their spots on the stack so that they may be loaded by deferencing the
- // result of va_next.
- for (; GPRIndex != NumGPArgRegs; ++GPRIndex) {
- unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
+ // The fixed integer arguments of a variadic function are stored to the
+ // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
+ // Get an existing live-in vreg, or add a new one.
+ unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
+ if (!VReg)
+ VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass, dl);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -1735,27 +1702,17 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
// is set.
-
// The double arguments are stored to the VarArgsFrameIndex
// on the stack.
- unsigned FPRIndex = 0;
- for (FPRIndex = 0; FPRIndex != FuncInfo->getVarArgsNumFPR(); ++FPRIndex) {
- SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
- SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
- false, false, 0);
- MemOps.push_back(Store);
- // Increment the address by eight for the next argument to store
- SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
- PtrVT);
- FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
- }
-
- for (; FPRIndex != NumFPArgRegs; ++FPRIndex) {
- unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
+ for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
+ // Get an existing live-in vreg, or add a new one.
+ unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
+ if (!VReg)
+ VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass, dl);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by eight for the next argument to store
SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
@@ -1791,7 +1748,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
- unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
@@ -1915,18 +1872,18 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- NULL, 0,
+ MachinePointerInfo(),
ObjSize==1 ? MVT::i8 : MVT::i16,
false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
}
-
+
ArgOffset += PtrByteSize;
-
+
continue;
}
for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
@@ -1934,11 +1891,12 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// to memory. ArgVal will be address of the beginning of
// the object.
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(),
false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
@@ -1956,7 +1914,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
case MVT::i32:
if (!isPPC64) {
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
++GPR_idx;
} else {
@@ -1970,7 +1928,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// FALLTHROUGH
case MVT::i64: // PPC64
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
if (ObjectVT == MVT::i32) {
@@ -2008,9 +1966,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned VReg;
if (ObjectVT == MVT::f32)
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass, dl);
else
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass, dl);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
@@ -2028,7 +1986,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// Note that vector arguments in registers don't reserve stack space,
// except in varargs functions.
if (VR_idx != Num_VR_Regs) {
- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass, dl);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
if (isVarArg) {
while ((ArgOffset % 16) != 0) {
@@ -2063,7 +2021,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
CurArgOffset + (ArgSize - ObjSize),
isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
false, false, 0);
}
@@ -2082,8 +2040,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
}
MinReservedArea =
std::max(MinReservedArea,
- PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+ PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
getStackAlignment();
unsigned AlignMask = TargetAlign-1;
MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
@@ -2104,15 +2062,15 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// result of va_next.
for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
unsigned VReg;
-
+
if (isPPC64)
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl);
else
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
// Increment the address by four for the next argument to store
SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -2141,7 +2099,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
- unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, true);
+ unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
unsigned NumOps = Outs.size();
unsigned PtrByteSize = isPPC64 ? 8 : 4;
@@ -2153,7 +2111,6 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
// 16-byte aligned.
nAltivecParamsAtEnd = 0;
for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
EVT ArgVT = Outs[i].VT;
// Varargs Altivec parameters are padded to a 16 byte boundary.
@@ -2183,11 +2140,11 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
NumBytes = std::max(NumBytes,
- PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
+ PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
// Tail call needs the stack to be aligned.
if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
getStackAlignment();
unsigned AlignMask = TargetAlign-1;
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
@@ -2292,8 +2249,8 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
int FI = TailCallArgs[i].FrameIdx;
// Store relative to framepointer.
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
- PseudoSourceValue::getFixedStack(FI),
- 0, false, false, 0));
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
}
}
@@ -2311,26 +2268,26 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
if (SPDiff) {
// Calculate the new stack slot for the return address.
int SlotSize = isPPC64 ? 8 : 4;
- int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
+ int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
isDarwinABI);
int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
NewRetAddrLoc, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
- PseudoSourceValue::getFixedStack(NewRetAddr), 0,
+ MachinePointerInfo::getFixedStack(NewRetAddr),
false, false, 0);
// When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
// slot as the FP is never overwritten.
if (isDarwinABI) {
int NewFPLoc =
- SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
true);
SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
- PseudoSourceValue::getFixedStack(NewFPIdx), 0,
+ MachinePointerInfo::getFixedStack(NewFPIdx),
false, false, 0);
}
}
@@ -2369,15 +2326,15 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
// Load the LR and FP stack slot for later adjusting.
EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
LROpOut = getReturnAddrFrameIndex(DAG);
- LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0,
+ LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
false, false, 0);
Chain = SDValue(LROpOut.getNode(), 1);
-
+
// When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
// slot as the FP is never overwritten.
if (isDarwinABI) {
FPOpOut = getFramePointerFrameIndex(DAG);
- FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0,
+ FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
false, false, 0);
Chain = SDValue(FPOpOut.getNode(), 1);
}
@@ -2397,7 +2354,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
DebugLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- false, false, NULL, 0, NULL, 0);
+ false, false, MachinePointerInfo(0),
+ MachinePointerInfo(0));
}
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
@@ -2407,7 +2365,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
SDValue Arg, SDValue PtrOff, int SPDiff,
unsigned ArgOffset, bool isPPC64, bool isTailCall,
bool isVector, SmallVector<SDValue, 8> &MemOpChains,
- SmallVector<TailCallArgumentInfo, 8>& TailCallArguments,
+ SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
DebugLoc dl) {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
if (!isTailCall) {
@@ -2420,8 +2378,8 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
DAG.getConstant(ArgOffset, PtrVT));
}
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
- false, false, 0));
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0));
// Calculate and remember argument location.
} else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
TailCallArguments);
@@ -2460,10 +2418,14 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
- bool isPPC64, bool isSVR4ABI) {
+ const PPCSubtarget &PPCSubTarget) {
+
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
NodeTys.push_back(MVT::Other); // Returns a chain
- NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+ NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
@@ -2473,24 +2435,49 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
Callee = SDValue(Dest, 0);
needIndirectCall = false;
}
- // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
- // Use indirect calls for ALL functions calls in JIT mode, since the
- // far-call stubs may be outside relocation limits for a BL instruction.
- if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
- // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
- // node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
+ // Use indirect calls for ALL functions calls in JIT mode, since the
+ // far-call stubs may be outside relocation limits for a BL instruction.
+ if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
+ unsigned OpFlags = 0;
+ if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ PPCSubTarget.getDarwinVers() < 9 &&
+ (G->getGlobal()->isDeclaration() ||
+ G->getGlobal()->isWeakForLinker())) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_DARWIN_STUB;
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress /
+ // TargetExternalSymbol node so that legalize doesn't hack it.
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- Callee.getValueType());
+ Callee.getValueType(),
+ 0, OpFlags);
needIndirectCall = false;
}
}
+
if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
- Callee.getValueType());
- needIndirectCall = false;
+ unsigned char OpFlags = 0;
+
+ if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ PPCSubTarget.getDarwinVers() < 9) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_DARWIN_STUB;
+ }
+
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
+ OpFlags);
+ needIndirectCall = false;
}
+
if (needIndirectCall) {
// Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
// to do the call, we can't use PPCISD::CALL.
@@ -2525,7 +2512,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// Load the address of the function entry point from the function
// descriptor.
- SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Flag);
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
InFlag.getNode() ? 3 : 2);
Chain = LoadFuncPtr.getValue(1);
@@ -2552,7 +2539,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// prevents the register allocator from allocating it), resulting in an
// additional register being allocated and an unnecessary move instruction
// being generated.
- VTs = DAG.getVTList(MVT::Other, MVT::Flag);
+ VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
Callee, InFlag);
Chain = LoadTOCPtr.getValue(0);
@@ -2569,7 +2556,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
NodeTys.clear();
NodeTys.push_back(MVT::Other);
- NodeTys.push_back(MVT::Flag);
+ NodeTys.push_back(MVT::Glue);
Ops.push_back(Chain);
CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
Callee.setNode(0);
@@ -2637,8 +2624,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
isTailCall, RegsToPass, Ops, NodeTys,
- PPCSubTarget.isPPC64(),
- PPCSubTarget.isSVR4ABI());
+ PPCSubTarget);
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
@@ -2684,7 +2670,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// stack frame. If caller and callee belong to the same module (and have the
// same TOC), the NOP will remain unchanged.
if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
if (CallOpc == PPCISD::BCTRL_SVR4) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
@@ -2699,7 +2685,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
InFlag = Chain.getValue(1);
} else {
// Otherwise insert NOP.
- InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Flag, InFlag);
+ InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
}
}
@@ -2726,15 +2712,14 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
- if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
+ if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, OutVals, Ins,
dl, DAG, InVals);
- } else {
- return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
- dl, DAG, InVals);
- }
+
+ return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
}
SDValue
@@ -2763,7 +2748,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
// in this function's (MF) stack pointer stack slot 0(SP).
if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
-
+
// Count how many bytes are to be pushed on the stack, including the linkage
// area, parameter list area and the part of the local variable space which
// contains copies of aggregates which are passed by value.
@@ -2774,19 +2759,19 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
- CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
if (isVarArg) {
// Handle fixed and variable vector arguments differently.
// Fixed vector arguments go into registers as long as registers are
// available. Variable vector arguments always go into memory.
unsigned NumArgs = Outs.size();
-
+
for (unsigned i = 0; i != NumArgs; ++i) {
- EVT ArgVT = Outs[i].VT;
+ MVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
bool Result;
-
+
if (Outs[i].IsFixed) {
Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
CCInfo);
@@ -2794,11 +2779,11 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo);
}
-
+
if (Result) {
#ifndef NDEBUG
errs() << "Call operand #" << i << " has unhandled type "
- << ArgVT.getEVTString() << "\n";
+ << EVT(ArgVT).getEVTString() << "\n";
#endif
llvm_unreachable(0);
}
@@ -2807,7 +2792,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
// All arguments are treated the same.
CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
}
-
+
// Assign locations to all of the outgoing aggregate by value arguments.
SmallVector<CCValAssign, 16> ByValArgLocs;
CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(), ByValArgLocs,
@@ -2822,7 +2807,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
// space variable where copies of aggregates which are passed by value are
// stored.
unsigned NumBytes = CCByValInfo.getNextStackOffset();
-
+
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
@@ -2842,7 +2827,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
// arguments that may not fit in the registers available for argument
// passing.
SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
-
+
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
SmallVector<SDValue, 8> MemOpChains;
@@ -2854,7 +2839,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
-
+
if (Flags.isByVal()) {
// Argument is an aggregate which is passed by value, thus we need to
// create a copy of it in the local variable space of the current stack
@@ -2863,33 +2848,33 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
assert((j < ByValArgLocs.size()) && "Index out of bounds!");
CCValAssign &ByValVA = ByValArgLocs[j++];
assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
-
+
// Memory reserved in the local variable space of the callers stack frame.
unsigned LocMemOffset = ByValVA.getLocMemOffset();
-
+
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
-
+
// Create a copy of the argument in the local area of the current
// stack frame.
SDValue MemcpyCall =
CreateCopyOfByValArgument(Arg, PtrOff,
CallSeqStart.getNode()->getOperand(0),
Flags, DAG, dl);
-
+
// This must go outside the CALLSEQ_START..END.
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
CallSeqStart.getNode()->getOperand(1));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
Chain = CallSeqStart = NewCallSeqStart;
-
+
// Pass the address of the aggregate copy on the stack either in a
// physical register or in the parameter list area of the current stack
// frame to the callee.
Arg = PtrOff;
}
-
+
if (VA.isRegLoc()) {
// Put argument in a physical register.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -2903,7 +2888,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), LocMemOffset,
+ MachinePointerInfo(),
false, false, 0));
} else {
// Calculate and remember argument location.
@@ -2912,11 +2897,11 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
}
}
}
-
+
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
-
+
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
@@ -2925,7 +2910,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
-
+
// Set CR6 to true if this is a vararg call.
if (isVarArg) {
SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
@@ -2933,10 +2918,9 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
InFlag = Chain.getValue(1);
}
- if (isTailCall) {
+ if (isTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);
- }
return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
@@ -3012,7 +2996,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// memory. Also, if this is a vararg function, floating point operations
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
- unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
static const unsigned GPR_32[] = { // 32-bit registers.
@@ -3066,8 +3050,9 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// Everything else is passed left-justified.
EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
if (GPR_idx != NumGPRs) {
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, dl, Chain, Arg,
- NULL, 0, VT, false, false, 0);
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ MachinePointerInfo(), VT,
+ false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3104,7 +3089,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
if (GPR_idx != NumGPRs) {
- SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0,
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3136,21 +3122,22 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
if (isVarArg) {
- SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Store);
// Float varargs are always shadowed in available integer registers
if (GPR_idx != NumGPRs) {
- SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0,
- false, false, 0);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
- SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0,
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(),
false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3194,11 +3181,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// entirely in R registers. Maybe later.
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
DAG.getConstant(ArgOffset, PtrVT));
- SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Store);
if (VR_idx != NumVRs) {
- SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0,
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+ MachinePointerInfo(),
false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
@@ -3209,7 +3197,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
break;
SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
DAG.getConstant(i, PtrVT));
- SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0,
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3275,14 +3263,14 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// TOC save area offset.
SDValue PtrOff = DAG.getIntPtrConstant(40);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0,
+ Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
false, false, 0);
}
// On Darwin, R12 must contain the address of an indirect callee. This does
// not mean the MTCTR instruction must use R12; it's easier to model this as
// an extra parameter, so do that.
- if (!isTailCall &&
+ if (!isTailCall &&
!dyn_cast<GlobalAddressSDNode>(Callee) &&
!dyn_cast<ExternalSymbolSDNode>(Callee) &&
!isBLACompatibleAddress(Callee, DAG))
@@ -3298,10 +3286,9 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
InFlag = Chain.getValue(1);
}
- if (isTailCall) {
+ if (isTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- }
return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
@@ -3362,14 +3349,15 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
SDValue SaveSP = Op.getOperand(1);
// Load the old link SP.
- SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0,
+ SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
+ MachinePointerInfo(),
false, false, 0);
// Restore the stack pointer.
Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
// Store the old link SP.
- return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0,
+ return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
false, false, 0);
}
@@ -3390,7 +3378,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
// If the frame pointer save index hasn't been defined yet.
if (!RASI) {
// Find out what the fix offset of the frame pointer save area.
- int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
// Allocate the frame index for frame pointer save area.
RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
// Save the result.
@@ -3414,7 +3402,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
// If the frame pointer save index hasn't been defined yet.
if (!FPSI) {
// Find out what the fix offset of the frame pointer save area.
- int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
+ int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
isDarwinABI);
// Allocate the frame index for frame pointer save area.
@@ -3533,7 +3521,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
- PPCISD::FCTIDZ,
+ PPCISD::FCTIDZ,
dl, MVT::f64, Src);
break;
case MVT::i64:
@@ -3545,15 +3533,15 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
// Emit a store to the stack slot.
- SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0,
- false, false, 0);
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+ MachinePointerInfo(), false, false, 0);
// Result is a load from the stack slot. If loading 4 bytes, make sure to
// add in a bias.
if (Op.getValueType() == MVT::i32)
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
DAG.getConstant(4, FIPtr.getValueType()));
- return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0,
+ return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
false, false, 0);
}
@@ -3565,8 +3553,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
return SDValue();
if (Op.getOperand(0).getValueType() == MVT::i64) {
- SDValue Bits = DAG.getNode(ISD::BIT_CONVERT, dl,
- MVT::f64, Op.getOperand(0));
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
if (Op.getValueType() == MVT::f32)
FP = DAG.getNode(ISD::FP_ROUND, dl,
@@ -3591,14 +3578,15 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
// STD the extended value into the stack slot.
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
- MachineMemOperand::MOStore, 0, 8, 8);
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore, 8, 8);
SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
SDValue Store =
DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
Ops, 4, MVT::i64, MMO);
// Load the value as a double.
- SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0, false, false, 0);
+ SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
+ false, false, 0);
// FCFID it and return it.
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
@@ -3637,19 +3625,19 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// Save FP Control Word to register
NodeTys.push_back(MVT::f64); // return register
- NodeTys.push_back(MVT::Flag); // unused in this context
+ NodeTys.push_back(MVT::Glue); // unused in this context
SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
// Save FP register to stack slot
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
- StackSlot, NULL, 0, false, false, 0);
+ StackSlot, MachinePointerInfo(), false, false,0);
// Load FP Control Word from low 32 bits of stack slot.
SDValue Four = DAG.getConstant(4, PtrVT);
SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
- SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0,
+ SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
false, false, 0);
// Transform as necessary
@@ -3786,7 +3774,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
&Ops[0], Ops.size());
- return DAG.getNode(ISD::BIT_CONVERT, dl, ReqVT, Res);
+ return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
}
/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
@@ -3815,14 +3803,14 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
EVT VT, SelectionDAG &DAG, DebugLoc dl) {
// Force LHS/RHS to be the right type.
- LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS);
- RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS);
+ LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
int Ops[16];
for (unsigned i = 0; i != 16; ++i)
Ops[i] = i + Amt;
SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
+ return DAG.getNode(ISD::BITCAST, dl, VT, T);
}
// If this is a case we can't handle, return null and let the default
@@ -3856,7 +3844,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
SDValue Z = DAG.getConstant(0, MVT::i32);
Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
- Op = DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Z);
+ Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
}
return Op;
}
@@ -3875,7 +3863,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
@@ -3891,7 +3879,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// xor by OnesV to invert it.
Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
// Check to see if this is a wide variety of vsplti*, binop self cases.
@@ -3917,7 +3905,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
Intrinsic::ppc_altivec_vslw
};
Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
// vsplti + srl self.
@@ -3928,7 +3916,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
Intrinsic::ppc_altivec_vsrw
};
Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
// vsplti + sra self.
@@ -3939,7 +3927,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
Intrinsic::ppc_altivec_vsraw
};
Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
// vsplti + rol self.
@@ -3951,7 +3939,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
Intrinsic::ppc_altivec_vrlw
};
Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
// t = vsplti c, result = vsldoi t, t, 1
@@ -3978,14 +3966,14 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
}
// Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
if (SextVal >= -31 && SextVal <= 0) {
SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
}
return SDValue();
@@ -4062,10 +4050,10 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
}
EVT VT = OpLHS.getValueType();
- OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS);
- OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS);
+ OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
+ OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
+ return DAG.getNode(ISD::BITCAST, dl, VT, T);
}
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
@@ -4118,7 +4106,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// perfect shuffle table to emit an optimal matching sequence.
SmallVector<int, 16> PermMask;
SVOp->getMask(PermMask);
-
+
unsigned PFIndexes[4];
bool isFourElementShuffle = true;
for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
@@ -4253,7 +4241,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
Op.getOperand(1), Op.getOperand(2),
DAG.getConstant(CompareOpc, MVT::i32));
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Tmp);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
}
// Create the PPCISD altivec 'dot' comparison node.
@@ -4264,7 +4252,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
};
std::vector<EVT> VTs;
VTs.push_back(Op.getOperand(2).getValueType());
- VTs.push_back(MVT::Flag);
+ VTs.push_back(MVT::Glue);
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
// Now that we have the comparison, emit a copy from the CR to a GPR.
@@ -4317,10 +4305,10 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
// Store the input value into Value#0 of the stack slot.
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
- Op.getOperand(0), FIdx, NULL, 0,
+ Op.getOperand(0), FIdx, MachinePointerInfo(),
false, false, 0);
// Load it out.
- return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0,
+ return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
false, false, 0);
}
@@ -4336,9 +4324,9 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
// Shrinkify inputs to v8i16.
- LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, LHS);
- RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHS);
- RHSSwap = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHSSwap);
+ LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
+ RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
// Low parts multiplied together, generating 32-bit results (we ignore the
// top parts).
@@ -4364,12 +4352,12 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
// Multiply the even 8-bit parts, producing 16-bit sums.
SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
LHS, RHS, DAG, dl, MVT::v8i16);
- EvenParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, EvenParts);
+ EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
// Multiply the odd 8-bit parts, producing 16-bit sums.
SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
LHS, RHS, DAG, dl, MVT::v8i16);
- OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts);
+ OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
// Merge the results together.
int Ops[16];
@@ -4391,7 +4379,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC");
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
@@ -4456,20 +4444,20 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
NodeTys.push_back(MVT::f64); // Return register
- NodeTys.push_back(MVT::Flag); // Returns a flag for later insns
+ NodeTys.push_back(MVT::Glue); // Returns a flag for later insns
Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
MFFSreg = Result.getValue(0);
InFlag = Result.getValue(1);
NodeTys.clear();
- NodeTys.push_back(MVT::Flag); // Returns a flag
+ NodeTys.push_back(MVT::Glue); // Returns a flag
Ops[0] = DAG.getConstant(31, MVT::i32);
Ops[1] = InFlag;
Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
InFlag = Result.getValue(0);
NodeTys.clear();
- NodeTys.push_back(MVT::Flag); // Returns a flag
+ NodeTys.push_back(MVT::Glue); // Returns a flag
Ops[0] = DAG.getConstant(30, MVT::i32);
Ops[1] = InFlag;
Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
@@ -4477,7 +4465,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
NodeTys.clear();
NodeTys.push_back(MVT::f64); // result of add
- NodeTys.push_back(MVT::Flag); // Returns a flag
+ NodeTys.push_back(MVT::Glue); // Returns a flag
Ops[0] = Lo;
Ops[1] = Hi;
Ops[2] = InFlag;
@@ -5283,7 +5271,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAG.getConstant(CompareOpc, MVT::i32)
};
VTs.push_back(LHS.getOperand(2).getValueType());
- VTs.push_back(MVT::Flag);
+ VTs.push_back(MVT::Glue);
SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
// Unpack the result based on how the target uses it.
@@ -5377,6 +5365,47 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
return TargetLowering::getConstraintType(Constraint);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+PPCTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'b':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ case 'd':
+ if (type->isDoubleTy())
+ weight = CW_Register;
+ break;
+ case 'v':
+ if (type->isVectorTy())
+ weight = CW_Register;
+ break;
+ case 'y':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
std::pair<unsigned, const TargetRegisterClass*>
PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const {
@@ -5536,19 +5565,19 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
-
- DAG.getConstant(PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI),
+
+ DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
isPPC64? MVT::i64 : MVT::i32);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
- NULL, 0, false, false, 0);
+ MachinePointerInfo(), false, false, 0);
}
// Just load the return address off the stack.
SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, NULL, 0, false, false, 0);
+ RetAddrFI, MachinePointerInfo(), false, false, 0);
}
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -5571,7 +5600,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
PtrVT);
while (Depth--)
FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
- FrameAddr, NULL, 0, false, false, 0);
+ FrameAddr, MachinePointerInfo(), false, false, 0);
return FrameAddr;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 700816f..80cab75 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -308,6 +308,12 @@ namespace llvm {
bool is8bit, unsigned Opcode) const;
ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
@@ -383,7 +389,6 @@ namespace llvm {
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index a0781b9..6636b69 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -23,9 +23,11 @@ def u16imm64 : Operand<i64> {
}
def symbolHi64 : Operand<i64> {
let PrintMethod = "printSymbolHi";
+ let EncoderMethod = "getHA16Encoding";
}
def symbolLo64 : Operand<i64> {
let PrintMethod = "printSymbolLo";
+ let EncoderMethod = "getLO16Encoding";
}
//===----------------------------------------------------------------------===//
@@ -58,7 +60,7 @@ def HI48_64 : SDNodeXForm<imm, [{
//
let Defs = [LR8] in
- def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>,
+ def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "", []>,
PPC970_Unit_BRU;
// Darwin ABI Calls.
@@ -130,39 +132,31 @@ def : Pat<(PPCnop),
let usesCustomInserter = 1 in {
let Uses = [CR0] in {
def ATOMIC_LOAD_ADD_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
- "${:comment} ATOMIC_LOAD_ADD_I64 PSEUDO!",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
[(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
def ATOMIC_LOAD_SUB_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
- "${:comment} ATOMIC_LOAD_SUB_I64 PSEUDO!",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
[(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
def ATOMIC_LOAD_OR_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
- "${:comment} ATOMIC_LOAD_OR_I64 PSEUDO!",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
[(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
def ATOMIC_LOAD_XOR_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
- "${:comment} ATOMIC_LOAD_XOR_I64 PSEUDO!",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
[(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
def ATOMIC_LOAD_AND_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
- "${:comment} ATOMIC_LOAD_AND_I64 PSEUDO!",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
[(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
def ATOMIC_LOAD_NAND_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
- "${:comment} ATOMIC_LOAD_NAND_I64 PSEUDO!",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
[(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
def ATOMIC_CMP_SWAP_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new),
- "${:comment} ATOMIC_CMP_SWAP_I64 PSEUDO!",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "",
[(set G8RC:$dst,
(atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
def ATOMIC_SWAP_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new),
- "${:comment} ATOMIC_SWAP_I64 PSEUDO!",
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "",
[(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
}
}
@@ -240,8 +234,7 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
}
let Defs = [X1], Uses = [X1] in
-def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),
- "${:comment} DYNALLOC8 $result, $negsize, $fpsi",
+def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
[(set G8RC:$result,
(PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
@@ -500,7 +493,7 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
// Update forms.
let mayLoad = 1 in
-def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
+def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
ptr_rc:$rA),
"lhau $rD, $disp($rA)", LdStGeneral,
[]>, RegConstraint<"$rA = $ea_result">,
@@ -555,18 +548,20 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
"ld $rD, $src", LdStLD,
[(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
-def LDtoc: DSForm_1<58, 0, (outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
- "ld $rD, $disp($reg)", LdStLD,
- [(set G8RC:$rD,
+def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "",
+ [(set G8RC:$rD,
(PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
-let RST = 2, DS = 8 in
+
+let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg),
"ld 2, 8($reg)", LdStLD,
[(PPCload_toc G8RC:$reg)]>, isPPC64;
-let RST = 2, DS = 40, RA = 1 in
+
+let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
def LDtoc_restore : DSForm_1<58, 0, (outs), (ins),
"ld 2, 40(1)", LdStLD,
- []>, isPPC64;
+ [(PPCtoc_restore)]>, isPPC64;
def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
"ldx $rD, $src", LdStLD,
[(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
@@ -579,8 +574,6 @@ def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr
}
-def : Pat<(PPCtoc_restore),
- (LDtoc_restore)>;
def : Pat<(PPCload ixaddr:$src),
(LD ixaddr:$src)>;
def : Pat<(PPCload xaddr:$src),
@@ -621,14 +614,14 @@ def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
let PPC970_Unit = 2 in {
-def STBU8 : DForm_1<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
"stbu $rS, $ptroff($ptrreg)", LdStGeneral,
[(set ptr_rc:$ea_res,
(pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
"sthu $rS, $ptroff($ptrreg)", LdStGeneral,
[(set ptr_rc:$ea_res,
@@ -636,8 +629,8 @@ def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STDU : DSForm_1<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- s16immX4:$ptroff, ptr_rc:$ptrreg),
+def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ s16immX4:$ptroff, ptr_rc:$ptrreg),
"stdu $rS, $ptroff($ptrreg)", LdStSTD,
[(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 4357bdc..84a15b1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -102,6 +102,19 @@ class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> A;
+ bits<21> Addr;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = Addr{20-16}; // Base Reg
+ let Inst{16-31} = Addr{15-0}; // Displacement
+}
+
+class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
bits<16> C;
bits<5> B;
@@ -112,6 +125,7 @@ class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Inst{16-31} = C;
}
+
class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
@@ -147,8 +161,7 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
let A = 0;
- let B = 0;
- let C = 0;
+ let Addr = 0;
}
class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
@@ -188,17 +201,31 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> RST;
- bits<14> DS;
- bits<5> RA;
+ bits<19> DS_RA;
let Pattern = pattern;
let Inst{6-10} = RST;
- let Inst{11-15} = RA;
- let Inst{16-29} = DS;
+ let Inst{11-15} = DS_RA{18-14}; // Register #
+ let Inst{16-29} = DS_RA{13-0}; // Displacement.
let Inst{30-31} = xo;
}
+class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<14> DS;
+ bits<5> RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = RA;
+ let Inst{16-29} = DS;
+ let Inst{30-31} = xo;
+}
+
// 1.7.6 X-Form
class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index c17108f..53b0491 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -17,6 +17,7 @@
#include "PPCPredicates.h"
#include "PPCGenInstrInfo.inc"
#include "PPCTargetMachine.h"
+#include "PPCHazardRecognizers.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -39,7 +40,19 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
: TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm),
RI(*TM.getSubtargetImpl(), *this) {}
-unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
+ const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ // Should use subtarget info to pick the right hazard recognizer. For
+ // now, always return a PPC970 recognizer.
+ const TargetInstrInfo *TII = TM->getInstrInfo();
+ assert(TII && "No InstrInfo?");
+ return new PPCHazardRecognizer970(*TII);
+}
+
+unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
default: break;
@@ -57,7 +70,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return 0;
}
-unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
default: break;
@@ -84,11 +97,11 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Normal instructions can be commuted the obvious way.
if (MI->getOpcode() != PPC::RLWIMI)
return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
-
+
// Cannot commute if it has a non-zero rotate count.
if (MI->getOperand(3).getImm() != 0)
return 0;
-
+
// If we have a zero rotate count, we have:
// M = mask(MB,ME)
// Op0 = (Op1 & ~M) | (Op2 & M)
@@ -135,14 +148,14 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
MI->getOperand(1).setReg(Reg2);
MI->getOperand(2).setIsKill(Reg1IsKill);
MI->getOperand(1).setIsKill(Reg2IsKill);
-
+
// Swap the mask around.
MI->getOperand(4).setImm((ME+1) & 31);
MI->getOperand(5).setImm((MB-1) & 31);
return MI;
}
-void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
+void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
DebugLoc DL;
BuildMI(MBB, MI, DL, get(PPC::NOP));
@@ -169,7 +182,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
// Get the last instruction in the block.
MachineInstr *LastInst = I;
-
+
// If there is only one terminator instruction, process it.
if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
if (LastInst->getOpcode() == PPC::B) {
@@ -189,7 +202,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
// Otherwise, don't know what this is.
return true;
}
-
+
// Get the instruction before it if it's a terminator.
MachineInstr *SecondLastInst = I;
@@ -197,9 +210,9 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
if (SecondLastInst && I != MBB.begin() &&
isUnpredicatedTerminator(--I))
return true;
-
+
// If the block ends with PPC::B and PPC:BCC, handle it.
- if (SecondLastInst->getOpcode() == PPC::BCC &&
+ if (SecondLastInst->getOpcode() == PPC::BCC &&
LastInst->getOpcode() == PPC::B) {
if (!SecondLastInst->getOperand(2).isMBB() ||
!LastInst->getOperand(0).isMBB())
@@ -210,10 +223,10 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
FBB = LastInst->getOperand(0).getMBB();
return false;
}
-
+
// If the block ends with two PPC:Bs, handle it. The second one is not
// executed, so remove it.
- if (SecondLastInst->getOpcode() == PPC::B &&
+ if (SecondLastInst->getOpcode() == PPC::B &&
LastInst->getOpcode() == PPC::B) {
if (!SecondLastInst->getOperand(0).isMBB())
return true;
@@ -239,17 +252,17 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
}
if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
return 0;
-
+
// Remove the branch.
I->eraseFromParent();
-
+
I = MBB.end();
if (I == MBB.begin()) return 1;
--I;
if (I->getOpcode() != PPC::BCC)
return 1;
-
+
// Remove the branch.
I->eraseFromParent();
return 2;
@@ -262,9 +275,9 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- assert((Cond.size() == 2 || Cond.size() == 0) &&
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
"PPC branch conditions have two components!");
-
+
// One-way branch.
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch
@@ -274,7 +287,7 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
return 1;
}
-
+
// Two-way Conditional Branch.
BuildMI(&MBB, DL, get(PPC::BCC))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
@@ -377,11 +390,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
// We need to store the CR in the low 4-bits of the saved value. First,
// issue a MFCR to save all of the CRBits.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
PPC::R2 : PPC::R0;
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
.addReg(SrcReg, getKillRegState(isKill)));
-
+
// If the saved register wasn't CR0, shift the bits left so that they are
// in CR0's slot.
if (SrcReg != PPC::CR0) {
@@ -391,7 +404,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
.addReg(ScratchReg).addImm(ShiftBits)
.addImm(0).addImm(31));
}
-
+
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
.addReg(ScratchReg,
getKillRegState(isKill)),
@@ -428,14 +441,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
Reg = PPC::CR7;
- return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
+ return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
PPC::CRRCRegisterClass, NewMIs);
} else if (RC == PPC::VRRCRegisterClass) {
// We don't have indexed addressing for vector loads. Emit:
// R0 = ADDI FI#
// STVX VAL, 0, R0
- //
+ //
// FIXME: We use R0 here, because it isn't available for RA.
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
FrameIdx, 0, 0));
@@ -469,8 +482,9 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
- MachineMemOperand::MOStore, /*Offset=*/0,
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MachineMemOperand::MOStore,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
@@ -513,9 +527,9 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
// at the moment.
unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
PPC::R2 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
ScratchReg), FrameIdx));
-
+
// If the reloaded register isn't CR0, shift the bits right so that they are
// in the right CR's slot.
if (DestReg != PPC::CR0) {
@@ -525,11 +539,11 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
.addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
.addImm(31));
}
-
+
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
.addReg(ScratchReg));
} else if (RC == PPC::CRBITRCRegisterClass) {
-
+
unsigned Reg = 0;
if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN)
@@ -556,14 +570,14 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN)
Reg = PPC::CR7;
- return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
+ return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
PPC::CRRCRegisterClass, NewMIs);
} else if (RC == PPC::VRRCRegisterClass) {
// We don't have indexed addressing for vector loads. Emit:
// R0 = ADDI FI#
// Dest = LVX 0, R0
- //
+ //
// FIXME: We use R0 here, because it isn't available for RA.
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
FrameIdx, 0, 0));
@@ -590,8 +604,9 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
- MachineMemOperand::MOLoad, /*Offset=*/0,
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+ MachineMemOperand::MOLoad,
MFI.getObjectSize(FrameIdx),
MFI.getObjectAlignment(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index fc7b7b3..b5249ae 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -32,7 +32,7 @@ enum {
/// PPC970_First - This instruction starts a new dispatch group, so it will
/// always be the first one in the group.
PPC970_First = 0x1,
-
+
/// PPC970_Single - This instruction starts a new dispatch group and
/// terminates it, so it will be the sole instruction in the group.
PPC970_Single = 0x2,
@@ -40,7 +40,7 @@ enum {
/// PPC970_Cracked - This instruction is cracked into two pieces, requiring
/// two dispatch pipes to be available to issue.
PPC970_Cracked = 0x4,
-
+
/// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
/// an instruction is issued to.
PPC970_Shift = 3,
@@ -58,9 +58,9 @@ enum PPC970_Unit {
PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit
PPC970_BRU = 7 << PPC970_Shift // Branch Unit
};
-}
-
-
+} // end namespace PPCII
+
+
class PPCInstrInfo : public TargetInstrInfoImpl {
PPCTargetMachine &TM;
const PPCRegisterInfo RI;
@@ -69,7 +69,7 @@ class PPCInstrInfo : public TargetInstrInfoImpl {
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
- void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
@@ -82,6 +82,10 @@ public:
///
virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
@@ -90,8 +94,8 @@ public:
// commuteInstruction - We can commute rlwimi instructions, but only if the
// rotate amt is zero. We also have to munge the immediates a bit.
virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
-
- virtual void insertNoop(MachineBasicBlock &MBB,
+
+ virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
@@ -109,7 +113,7 @@ public:
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
-
+
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -121,7 +125,7 @@ public:
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
-
+
virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx,
uint64_t Offset,
@@ -130,7 +134,7 @@ public:
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
+
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index eb100ec..82aadeb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -68,17 +68,17 @@ def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
// This sequence is used for long double->int conversions. It changes the
// bits in the FPSCR which is not modelled.
def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
- [SDNPOutFlag]>;
+ [SDNPOutGlue]>;
def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
- [SDNPInFlag, SDNPOutFlag]>;
+ [SDNPInGlue, SDNPOutGlue]>;
def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
- [SDNPInFlag, SDNPOutFlag]>;
+ [SDNPInGlue, SDNPOutGlue]>;
def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
- [SDNPInFlag, SDNPOutFlag]>;
+ [SDNPInGlue, SDNPOutGlue]>;
def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3,
[SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
SDTCisVT<3, f64>]>,
- [SDNPInFlag]>;
+ [SDNPInGlue]>;
def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
@@ -105,45 +105,45 @@ def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore,
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInFlag, SDNPOutFlag]>;
+def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
- [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
-def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>;
+def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
- [SDNPHasChain, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOptInGlue]>;
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
[SDNPHasChain, SDNPMayLoad]>;
@@ -286,31 +286,38 @@ def u16imm : Operand<i32> {
def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing.
let PrintMethod = "printS16X4ImmOperand";
}
-def target : Operand<OtherVT> {
+def directbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getDirectBrEncoding";
+}
+def condbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getCondBrEncoding";
}
def calltarget : Operand<iPTR> {
- let PrintMethod = "printCallOperand";
+ let EncoderMethod = "getDirectBrEncoding";
}
def aaddr : Operand<iPTR> {
let PrintMethod = "printAbsAddrOperand";
}
-def piclabel: Operand<iPTR> {
- let PrintMethod = "printPICLabel";
-}
+def piclabel: Operand<iPTR> {}
def symbolHi: Operand<i32> {
let PrintMethod = "printSymbolHi";
+ let EncoderMethod = "getHA16Encoding";
}
def symbolLo: Operand<i32> {
let PrintMethod = "printSymbolLo";
+ let EncoderMethod = "getLO16Encoding";
}
def crbitm: Operand<i8> {
let PrintMethod = "printcrbitm";
+ let EncoderMethod = "get_crbitm_encoding";
}
// Address operands
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let EncoderMethod = "getMemRIEncoding";
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
@@ -319,9 +326,9 @@ def memrr : Operand<iPTR> {
def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+ let EncoderMethod = "getMemRIXEncoding";
}
def tocentry : Operand<iPTR> {
- let PrintMethod = "printTOCEntryLabel";
let MIOperandInfo = (ops i32imm:$imm);
}
@@ -355,11 +362,9 @@ def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
let hasCtrlDep = 1 in {
let Defs = [R1], Uses = [R1] in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt),
- "${:comment} ADJCALLSTACKDOWN",
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "",
[(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
- "${:comment} ADJCALLSTACKUP",
+def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
@@ -368,8 +373,7 @@ def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
}
let Defs = [R1], Uses = [R1] in
-def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi),
- "${:comment} DYNALLOC $result, $negsize, $fpsi",
+def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "",
[(set GPRC:$result,
(PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
@@ -378,26 +382,26 @@ def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi),
let usesCustomInserter = 1, // Expanded after instruction selection.
PPC970_Single = 1 in {
def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
- i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ i32imm:$BROPC), "",
[]>;
def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
- i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ i32imm:$BROPC), "",
[]>;
def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
- i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ i32imm:$BROPC), "",
[]>;
def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
- i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ i32imm:$BROPC), "",
[]>;
def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
- i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ i32imm:$BROPC), "",
[]>;
}
// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
// scavenge a register for it.
def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
- "${:comment} SPILL_CR $cond $F", []>;
+ "", []>;
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let isReturn = 1, Uses = [LR, RM] in
@@ -409,12 +413,12 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
}
let Defs = [LR] in
- def MovePCtoLR : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>,
+ def MovePCtoLR : Pseudo<(outs), (ins piclabel:$label), "", []>,
PPC970_Unit_BRU;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
let isBarrier = 1 in {
- def B : IForm<18, 0, 0, (outs), (ins target:$dst),
+ def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
"b $dst", BrB,
[(br bb:$dst)]>;
}
@@ -422,7 +426,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
// BCC represents an arbitrary conditional branch on a predicate.
// FIXME: should be able to write a pattern for PPCcondbranch, but can't use
// a two-value operand where a dag node expects two operands. :(
- def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, target:$dst),
+ def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
"b${cond:cc} ${cond:reg}, $dst"
/*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
}
@@ -548,105 +552,81 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
let usesCustomInserter = 1 in {
let Uses = [CR0] in {
def ATOMIC_LOAD_ADD_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_SUB_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_AND_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_OR_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_XOR_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_NAND_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_ADD_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_SUB_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_AND_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_OR_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_XOR_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_NAND_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_ADD_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_SUB_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_AND_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_OR_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_XOR_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_LOAD_NAND_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
- "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
def ATOMIC_CMP_SWAP_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
- "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
[(set GPRC:$dst,
(atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
def ATOMIC_CMP_SWAP_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
- "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
[(set GPRC:$dst,
(atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
def ATOMIC_CMP_SWAP_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
- "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
[(set GPRC:$dst,
(atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
def ATOMIC_SWAP_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
- "${:comment} ATOMIC_SWAP_I8 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
[(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
def ATOMIC_SWAP_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
- "${:comment} ATOMIC_SWAP_I16 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
[(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
def ATOMIC_SWAP_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
- "${:comment} ATOMIC_SWAP_I32 PSEUDO!",
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
[(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
}
}
@@ -785,33 +765,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
// Unindexed (r+i) Stores with Update (preinc).
let PPC970_Unit = 2 in {
-def STBU : DForm_1<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
"stbu $rS, $ptroff($ptrreg)", LdStGeneral,
[(set ptr_rc:$ea_res,
(pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU : DForm_1<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
"sthu $rS, $ptroff($ptrreg)", LdStGeneral,
[(set ptr_rc:$ea_res,
(pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STWU : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
"stwu $rS, $ptroff($ptrreg)", LdStGeneral,
[(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFSU : DForm_1<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
+def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
"stfsu $rS, $ptroff($ptrreg)", LdStGeneral,
[(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
+def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
symbolLo:$ptroff, ptr_rc:$ptrreg),
"stfdu $rS, $ptroff($ptrreg)", LdStGeneral,
[(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
@@ -1120,9 +1100,16 @@ def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
// As it turns out, in all cases where we currently use this,
// we're only interested in one subregister of it. Represent this in the
// instruction to keep the register allocator from becoming confused.
+//
+// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
- "mfcr $rT ${:comment} $FXM", SprMFCR>,
+ "", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
+ "mfcr $rT", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
"mfcr $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
index daf4ec6..78383e0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -16,7 +16,7 @@
#include "PPCRelocations.h"
#include "PPCTargetMachine.h"
#include "llvm/Function.h"
-#include "llvm/System/Memory.h"
+#include "llvm/Support/Memory.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCAsmInfo.cpp
index 3644c79..d1178dd 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCAsmInfo.cpp
@@ -17,10 +17,11 @@ using namespace llvm;
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
PCSymbol = ".";
CommentString = ";";
- ExceptionsType = ExceptionHandling::Dwarf;
+ ExceptionsType = ExceptionHandling::DwarfTable;
if (!is64Bit)
Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode.
+
AssemblerDialect = 1; // New-Style mnemonics.
SupportsDebugInformation= true; // Debug information.
}
@@ -47,7 +48,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
// Exceptions handling
if (!is64Bit)
- ExceptionsType = ExceptionHandling::Dwarf;
+ ExceptionsType = ExceptionHandling::DwarfTable;
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
new file mode 100644
index 0000000..65c2c82
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
@@ -0,0 +1,195 @@
+//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "PPC.h"
+#include "PPCRegisterInfo.h"
+#include "PPCFixupKinds.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class PPCMCCodeEmitter : public MCCodeEmitter {
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT
+ const TargetMachine &TM;
+ MCContext &Ctx;
+
+public:
+ PPCMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
+ : TM(tm), Ctx(ctx) {
+ }
+
+ ~PPCMCCodeEmitter() {}
+
+ unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getHA16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getLO16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ unsigned getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Bits = getBinaryCodeForInstr(MI, Fixups);
+
+ // Output the constant in big endian byte order.
+ for (unsigned i = 0; i != 4; ++i) {
+ OS << (char)(Bits >> 24);
+ Bits <<= 8;
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+ }
+
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createPPCMCCodeEmitter(const Target &, TargetMachine &TM,
+ MCContext &Ctx) {
+ return new PPCMCCodeEmitter(TM, Ctx);
+}
+
+unsigned PPCMCCodeEmitter::
+getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_br24));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_brcond14));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_ha16));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Encode (imm, reg) as a memri, which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo16));
+ return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_lo14));
+ return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::
+get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+ return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+}
+
+
+unsigned PPCMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // The GPR operand should come through here though.
+ assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+ MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+ return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+ }
+
+ assert(MO.isImm() &&
+ "Relocation required in an instruction that we cannot encode!");
+ return MO.getImm();
+}
+
+
+#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
new file mode 100644
index 0000000..6082587
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -0,0 +1,172 @@
+//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PPC MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
+ return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
+ MCContext &Ctx = AP.OutContext;
+
+ SmallString<128> Name;
+ if (!MO.isGlobal()) {
+ assert(MO.isSymbol() && "Isn't a symbol reference");
+ Name += AP.MAI->getGlobalPrefix();
+ Name += MO.getSymbolName();
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ bool isImplicitlyPrivate = false;
+ if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB ||
+ (MO.getTargetFlags() & PPCII::MO_NLP_FLAG))
+ isImplicitlyPrivate = true;
+
+ AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ }
+
+ // If the target flags on the operand changes the name of the symbol, do that
+ // before we return the symbol.
+ if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
+ Name += "$stub";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI(AP).getFnStubEntry(Sym);
+ if (StubSym.getPointer())
+ return Sym;
+
+ if (MO.isGlobal()) {
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ } else {
+ Name.erase(Name.end()-5, Name.end());
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+ }
+ return Sym;
+ }
+
+ // If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
+ // then add the suffix.
+ if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
+ Name += "$non_lazy_ptr";
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+
+ MachineModuleInfoMachO &MachO = getMachOMMI(AP);
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ?
+ MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym);
+
+ if (StubSym.getPointer() == 0) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ }
+ return Sym;
+ }
+
+ return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+ AsmPrinter &Printer) {
+ MCContext &Ctx = Printer.OutContext;
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+
+ if (MO.getTargetFlags() & PPCII::MO_LO16)
+ RefKind = MCSymbolRefExpr::VK_PPC_LO16;
+ else if (MO.getTargetFlags() & PPCII::MO_HA16)
+ RefKind = MCSymbolRefExpr::VK_PPC_HA16;
+
+ // FIXME: This isn't right, but we don't have a good way to express this in
+ // the MC Level, see below.
+ if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG)
+ RefKind = MCSymbolRefExpr::VK_None;
+
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+
+ // Subtract off the PIC base if required.
+ if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
+ const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+
+ const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+ Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx);
+ // FIXME: We have no way to make the result be VK_PPC_LO16/VK_PPC_HA16,
+ // since it is not a symbol!
+ }
+
+ return MCOperand::CreateExpr(Expr);
+}
+
+void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ assert(0 && "unknown operand type");
+ case MachineOperand::MO_Register:
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), AP.OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP);
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 653e143..45d8b6b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -17,7 +17,7 @@
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCRegisterInfo.h"
-#include "PPCFrameInfo.h"
+#include "PPCFrameLowering.h"
#include "PPCSubtarget.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
@@ -31,7 +31,7 @@
#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -44,16 +44,6 @@
#include "llvm/ADT/STLExtras.h"
#include <cstdlib>
-// FIXME This disables some code that aligns the stack to a boundary
-// bigger than the default (16 bytes on Darwin) when there is a stack local
-// of greater alignment. This does not currently work, because the delta
-// between old and new stack pointers is added to offsets that reference
-// incoming parameters after the prolog is generated, and the code that
-// does that doesn't handle a variable delta. You don't want to do that
-// anyway; a better approach is to reserve another register that retains
-// to the incoming stack pointer, and reference parameters relative to that.
-#define ALIGN_STACK 0
-
// FIXME (64-bit): Eventually enable by default.
namespace llvm {
cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
@@ -68,14 +58,11 @@ cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
using namespace llvm;
-#define EnableRegisterScavenging \
- ((EnablePPC32RS && !Subtarget.isPPC64()) || \
- (EnablePPC64RS && Subtarget.isPPC64()))
-
// FIXME (64-bit): Should be inlined.
bool
PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
- return EnableRegisterScavenging;
+ return ((EnablePPC32RS && !Subtarget.isPPC64()) ||
+ (EnablePPC64RS && Subtarget.isPPC64()));
}
/// getRegisterNumbering - Given the enum value for some register, e.g.
@@ -269,26 +256,11 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
}
-// needsFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
-static bool needsFP(const MachineFunction &MF) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- // Naked functions have no stack frame pushed, so we don't have a frame pointer.
- if (MF.getFunction()->hasFnAttr(Attribute::Naked))
- return false;
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
- (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
-}
-
-static bool spillsCR(const MachineFunction &MF) {
- const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- return FuncInfo->isCRSpilled();
-}
-
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const PPCFrameLowering *PPCFI =
+ static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+
Reserved.set(PPC::R0);
Reserved.set(PPC::R1);
Reserved.set(PPC::LR);
@@ -314,7 +286,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R13);
Reserved.set(PPC::R31);
- if (!EnableRegisterScavenging)
+ if (!requiresRegisterScavenging(MF))
Reserved.set(PPC::R0); // FIXME (64-bit): Remove
Reserved.set(PPC::X0);
@@ -334,7 +306,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
- if (needsFP(MF))
+ if (PPCFI->needsFP(MF))
Reserved.set(PPC::R31);
return Reserved;
@@ -344,30 +316,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-// hasFP - Return true if the specified function actually has a dedicated frame
-// pointer register. This is true if the function needs a frame pointer and has
-// a non-zero stack size.
-bool PPCRegisterInfo::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->getStackSize() && needsFP(MF);
-}
-
-/// MustSaveLR - Return true if this function requires that we save the LR
-/// register onto the stack in the prolog and restore it in the epilog of the
-/// function.
-static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
- const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
-
- // We need a save/restore of LR if there is any def of LR (which is
- // defined by calls, including the PIC setup sequence), or if there is
- // some use of the LR stack slot (e.g. for builtin_return_address).
- // (LR comes in 32 and 64 bit versions.)
- MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
- return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
-}
-
-
-
void PPCRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@@ -447,7 +395,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
unsigned FrameSize = MFI->getStackSize();
// Get stack alignments.
- unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
if (MaxAlign > TargetAlign)
report_fatal_error("Dynamic alloca with large aligns not supported");
@@ -464,7 +412,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// FIXME (64-bit): Use "findScratchRegister"
unsigned Reg;
- if (EnableRegisterScavenging)
+ if (requiresRegisterScavenging(MF))
Reg = findScratchRegister(II, RS, RC, SPAdj);
else
Reg = PPC::R0;
@@ -474,7 +422,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
.addReg(PPC::R31)
.addImm(FrameSize);
} else if (LP64) {
- if (EnableRegisterScavenging) // FIXME (64-bit): Use "true" part.
+ if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
.addImm(0)
.addReg(PPC::X1);
@@ -491,7 +439,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Grow the stack and update the stack pointer link, then determine the
// address of new allocated space.
if (LP64) {
- if (EnableRegisterScavenging) // FIXME (64-bit): Use "true" part.
+ if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
.addReg(Reg, RegState::Kill)
.addReg(PPC::X1)
@@ -593,6 +541,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MBB.getParent();
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
// Find out which operand is the frame index.
@@ -625,14 +574,15 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Special case for pseudo-op SPILL_CR.
- if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default.
+ if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default.
if (OpC == PPC::SPILL_CR) {
lowerCRSpilling(II, FrameIndex, SPAdj, RS);
return;
}
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
- MI.getOperand(FIOperandNo).ChangeToRegister(hasFP(MF) ? PPC::R31 : PPC::R1,
+ MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
+ PPC::R31 : PPC::R1,
false);
// Figure out if the offset in the instruction is shifted right two bits. This
@@ -682,7 +632,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// FIXME (64-bit): Use "findScratchRegister".
unsigned SReg;
- if (EnableRegisterScavenging)
+ if (requiresRegisterScavenging(MF))
SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
else
SReg = PPC::R0;
@@ -715,898 +665,17 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
}
-/// VRRegNo - Map from a numbered VR register to its enum value.
-///
-static const unsigned short VRRegNo[] = {
- PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
- PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
-/// RemoveVRSaveCode - We have found that this function does not need any code
-/// to manipulate the VRSAVE register, even though it uses vector registers.
-/// This can happen when the only registers used are known to be live in or out
-/// of the function. Remove all of the VRSAVE related code from the function.
-static void RemoveVRSaveCode(MachineInstr *MI) {
- MachineBasicBlock *Entry = MI->getParent();
- MachineFunction *MF = Entry->getParent();
-
- // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
- MachineBasicBlock::iterator MBBI = MI;
- ++MBBI;
- assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
- MBBI->eraseFromParent();
-
- bool RemovedAllMTVRSAVEs = true;
- // See if we can find and remove the MTVRSAVE instruction from all of the
- // epilog blocks.
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
- // If last instruction is a return instruction, add an epilogue
- if (!I->empty() && I->back().getDesc().isReturn()) {
- bool FoundIt = false;
- for (MBBI = I->end(); MBBI != I->begin(); ) {
- --MBBI;
- if (MBBI->getOpcode() == PPC::MTVRSAVE) {
- MBBI->eraseFromParent(); // remove it.
- FoundIt = true;
- break;
- }
- }
- RemovedAllMTVRSAVEs &= FoundIt;
- }
- }
-
- // If we found and removed all MTVRSAVE instructions, remove the read of
- // VRSAVE as well.
- if (RemovedAllMTVRSAVEs) {
- MBBI = MI;
- assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
- --MBBI;
- assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
- MBBI->eraseFromParent();
- }
-
- // Finally, nuke the UPDATE_VRSAVE.
- MI->eraseFromParent();
-}
-
-// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
-// instruction selector. Based on the vector registers that have been used,
-// transform this into the appropriate ORI instruction.
-static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
- MachineFunction *MF = MI->getParent()->getParent();
- DebugLoc dl = MI->getDebugLoc();
-
- unsigned UsedRegMask = 0;
- for (unsigned i = 0; i != 32; ++i)
- if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
- UsedRegMask |= 1 << (31-i);
-
- // Live in and live out values already must be in the mask, so don't bother
- // marking them.
- for (MachineRegisterInfo::livein_iterator
- I = MF->getRegInfo().livein_begin(),
- E = MF->getRegInfo().livein_end(); I != E; ++I) {
- unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first);
- if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
- UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
- }
- for (MachineRegisterInfo::liveout_iterator
- I = MF->getRegInfo().liveout_begin(),
- E = MF->getRegInfo().liveout_end(); I != E; ++I) {
- unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I);
- if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
- UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
- }
-
- // If no registers are used, turn this into a copy.
- if (UsedRegMask == 0) {
- // Remove all VRSAVE code.
- RemoveVRSaveCode(MI);
- return;
- }
-
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned DstReg = MI->getOperand(0).getReg();
-
- if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
- if (DstReg != SrcReg)
- BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
- .addReg(SrcReg)
- .addImm(UsedRegMask);
- else
- BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(UsedRegMask);
- } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
- if (DstReg != SrcReg)
- BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg)
- .addImm(UsedRegMask >> 16);
- else
- BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(UsedRegMask >> 16);
- } else {
- if (DstReg != SrcReg)
- BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg)
- .addImm(UsedRegMask >> 16);
- else
- BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(UsedRegMask >> 16);
-
- BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
- .addReg(DstReg, RegState::Kill)
- .addImm(UsedRegMask & 0xFFFF);
- }
-
- // Remove the old UPDATE_VRSAVE instruction.
- MI->eraseFromParent();
-}
-
-/// determineFrameLayout - Determine the size of the frame and maximum call
-/// frame size.
-void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize = MFI->getStackSize();
-
- // Get the alignments provided by the target, and the maximum alignment
- // (if any) of the fixed frame objects.
- unsigned MaxAlign = MFI->getMaxAlignment();
- unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
- unsigned AlignMask = TargetAlign - 1; //
-
- // If we are a leaf function, and use up to 224 bytes of stack space,
- // don't have a frame pointer, calls, or dynamic alloca then we do not need
- // to adjust the stack pointer (we fit in the Red Zone).
- bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
- // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.
- if (!DisableRedZone &&
- FrameSize <= 224 && // Fits in red zone.
- !MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->adjustsStack() && // No calls.
- (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
- // No need for frame
- MFI->setStackSize(0);
- return;
- }
-
- // Get the maximum call frame size of all the calls.
- unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
-
- // Maximum call frame needs to be at least big enough for linkage and 8 args.
- unsigned minCallFrameSize =
- PPCFrameInfo::getMinCallFrameSize(Subtarget.isPPC64(),
- Subtarget.isDarwinABI());
- maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
-
- // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
- // that allocations will be aligned.
- if (MFI->hasVarSizedObjects())
- maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
-
- // Update maximum call frame size.
- MFI->setMaxCallFrameSize(maxCallFrameSize);
-
- // Include call frame size in total.
- FrameSize += maxCallFrameSize;
-
- // Make sure the frame is aligned.
- FrameSize = (FrameSize + AlignMask) & ~AlignMask;
-
- // Update frame info.
- MFI->setStackSize(FrameSize);
-}
-
-void
-PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- // Save and clear the LR state.
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- unsigned LR = getRARegister();
- FI->setMustSaveLR(MustSaveLR(MF, LR));
- MF.getRegInfo().setPhysRegUnused(LR);
-
- // Save R31 if necessary
- int FPSI = FI->getFramePointerSaveIndex();
- bool isPPC64 = Subtarget.isPPC64();
- bool isDarwinABI = Subtarget.isDarwinABI();
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // If the frame pointer save index hasn't been defined yet.
- if (!FPSI && needsFP(MF)) {
- // Find out what the fix offset of the frame pointer save area.
- int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
- isDarwinABI);
- // Allocate the frame index for frame pointer save area.
- FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
- // Save the result.
- FI->setFramePointerSaveIndex(FPSI);
- }
-
- // Reserve stack space to move the linkage area to in case of a tail call.
- int TCSPDelta = 0;
- if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
- MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
- }
-
- // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
- // a large stack, which will require scavenging a register to materialize a
- // large offset.
- // FIXME: this doesn't actually check stack size, so is a bit pessimistic
- // FIXME: doesn't detect whether or not we need to spill vXX, which requires
- // r0 for now.
-
- if (EnableRegisterScavenging) // FIXME (64-bit): Enable.
- if (needsFP(MF) || spillsCR(MF)) {
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
-}
-
-void
-PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
- // Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI()) {
- return;
- }
-
- // Get callee saved register information.
- MachineFrameInfo *FFI = MF.getFrameInfo();
- const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
-
- // Early exit if no callee saved registers are modified!
- if (CSI.empty() && !needsFP(MF)) {
- return;
- }
-
- unsigned MinGPR = PPC::R31;
- unsigned MinG8R = PPC::X31;
- unsigned MinFPR = PPC::F31;
- unsigned MinVR = PPC::V31;
-
- bool HasGPSaveArea = false;
- bool HasG8SaveArea = false;
- bool HasFPSaveArea = false;
- bool HasCRSaveArea = false;
- bool HasVRSAVESaveArea = false;
- bool HasVRSaveArea = false;
-
- SmallVector<CalleeSavedInfo, 18> GPRegs;
- SmallVector<CalleeSavedInfo, 18> G8Regs;
- SmallVector<CalleeSavedInfo, 18> FPRegs;
- SmallVector<CalleeSavedInfo, 18> VRegs;
-
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (PPC::GPRCRegisterClass->contains(Reg)) {
- HasGPSaveArea = true;
-
- GPRegs.push_back(CSI[i]);
-
- if (Reg < MinGPR) {
- MinGPR = Reg;
- }
- } else if (PPC::G8RCRegisterClass->contains(Reg)) {
- HasG8SaveArea = true;
-
- G8Regs.push_back(CSI[i]);
-
- if (Reg < MinG8R) {
- MinG8R = Reg;
- }
- } else if (PPC::F8RCRegisterClass->contains(Reg)) {
- HasFPSaveArea = true;
-
- FPRegs.push_back(CSI[i]);
-
- if (Reg < MinFPR) {
- MinFPR = Reg;
- }
-// FIXME SVR4: Disable CR save area for now.
- } else if (PPC::CRBITRCRegisterClass->contains(Reg)
- || PPC::CRRCRegisterClass->contains(Reg)) {
-// HasCRSaveArea = true;
- } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
- HasVRSAVESaveArea = true;
- } else if (PPC::VRRCRegisterClass->contains(Reg)) {
- HasVRSaveArea = true;
-
- VRegs.push_back(CSI[i]);
-
- if (Reg < MinVR) {
- MinVR = Reg;
- }
- } else {
- llvm_unreachable("Unknown RegisterClass!");
- }
- }
-
- PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
-
- int64_t LowerBound = 0;
-
- // Take into account stack space reserved for tail calls.
- int TCSPDelta = 0;
- if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
- LowerBound = TCSPDelta;
- }
-
- // The Floating-point register save area is right below the back chain word
- // of the previous stack frame.
- if (HasFPSaveArea) {
- for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
- int FI = FPRegs[i].getFrameIdx();
-
- FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
- }
-
- LowerBound -= (31 - getRegisterNumbering(MinFPR) + 1) * 8;
- }
-
- // Check whether the frame pointer register is allocated. If so, make sure it
- // is spilled to the correct offset.
- if (needsFP(MF)) {
- HasGPSaveArea = true;
-
- int FI = PFI->getFramePointerSaveIndex();
- assert(FI && "No Frame Pointer Save Slot!");
-
- FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
- }
-
- // General register save area starts right below the Floating-point
- // register save area.
- if (HasGPSaveArea || HasG8SaveArea) {
- // Move general register save area spill slots down, taking into account
- // the size of the Floating-point register save area.
- for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
- int FI = GPRegs[i].getFrameIdx();
-
- FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
- }
-
- // Move general register save area spill slots down, taking into account
- // the size of the Floating-point register save area.
- for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
- int FI = G8Regs[i].getFrameIdx();
-
- FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
- }
-
- unsigned MinReg = std::min<unsigned>(getRegisterNumbering(MinGPR),
- getRegisterNumbering(MinG8R));
-
- if (Subtarget.isPPC64()) {
- LowerBound -= (31 - MinReg + 1) * 8;
- } else {
- LowerBound -= (31 - MinReg + 1) * 4;
- }
- }
-
- // The CR save area is below the general register save area.
- if (HasCRSaveArea) {
- // FIXME SVR4: Is it actually possible to have multiple elements in CSI
- // which have the CR/CRBIT register class?
- // Adjust the frame index of the CR spill slot.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
-
- if (PPC::CRBITRCRegisterClass->contains(Reg) ||
- PPC::CRRCRegisterClass->contains(Reg)) {
- int FI = CSI[i].getFrameIdx();
-
- FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
- }
- }
-
- LowerBound -= 4; // The CR save area is always 4 bytes long.
- }
-
- if (HasVRSAVESaveArea) {
- // FIXME SVR4: Is it actually possible to have multiple elements in CSI
- // which have the VRSAVE register class?
- // Adjust the frame index of the VRSAVE spill slot.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
-
- if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
- int FI = CSI[i].getFrameIdx();
-
- FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
- }
- }
-
- LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
- }
-
- if (HasVRSaveArea) {
- // Insert alignment padding, we need 16-byte alignment.
- LowerBound = (LowerBound - 15) & ~(15);
-
- for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
- int FI = VRegs[i].getFrameIdx();
-
- FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
- }
- }
-}
-
-void
-PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineModuleInfo &MMI = MF.getMMI();
- DebugLoc dl;
- bool needsFrameMoves = MMI.hasDebugInfo() ||
- !MF.getFunction()->doesNotThrow() ||
- UnwindTablesMandatory;
-
- // Prepare for frame info.
- MCSymbol *FrameLabel = 0;
-
- // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
- // process it.
- for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
- if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
- HandleVRSaveUpdate(MBBI, TII);
- break;
- }
- }
-
- // Move MBBI back to the beginning of the function.
- MBBI = MBB.begin();
-
- // Work out frame sizes.
- determineFrameLayout(MF);
- unsigned FrameSize = MFI->getStackSize();
-
- int NegFrameSize = -FrameSize;
-
- // Get processor type.
- bool isPPC64 = Subtarget.isPPC64();
- // Get operating system
- bool isDarwinABI = Subtarget.isDarwinABI();
- // Check if the link register (LR) must be saved.
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- bool MustSaveLR = FI->mustSaveLR();
- // Do we have a frame pointer for this function?
- bool HasFP = hasFP(MF) && FrameSize;
-
- int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
-
- int FPOffset = 0;
- if (HasFP) {
- if (Subtarget.isSVR4ABI()) {
- MachineFrameInfo *FFI = MF.getFrameInfo();
- int FPIndex = FI->getFramePointerSaveIndex();
- assert(FPIndex && "No Frame Pointer Save Slot!");
- FPOffset = FFI->getObjectOffset(FPIndex);
- } else {
- FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
- }
- }
-
- if (isPPC64) {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
-
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
- .addReg(PPC::X31)
- .addImm(FPOffset/4)
- .addReg(PPC::X1);
-
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
- .addReg(PPC::X0)
- .addImm(LROffset / 4)
- .addReg(PPC::X1);
- } else {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
-
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
- .addReg(PPC::R31)
- .addImm(FPOffset)
- .addReg(PPC::R1);
-
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
- .addReg(PPC::R0)
- .addImm(LROffset)
- .addReg(PPC::R1);
- }
-
- // Skip if a leaf routine.
- if (!FrameSize) return;
-
- // Get stack alignments.
- unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
- unsigned MaxAlign = MFI->getMaxAlignment();
-
- // Adjust stack pointer: r1 += NegFrameSize.
- // If there is a preferred stack alignment, align R1 now
- if (!isPPC64) {
- // PPC32.
- if (ALIGN_STACK && MaxAlign > TargetAlign) {
- assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
- "Invalid alignment!");
- assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
-
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
- .addReg(PPC::R1)
- .addImm(0)
- .addImm(32 - Log2_32(MaxAlign))
- .addImm(31);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
- .addImm(NegFrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
- .addReg(PPC::R1)
- .addReg(PPC::R1)
- .addReg(PPC::R0);
- } else if (isInt<16>(NegFrameSize)) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
- .addReg(PPC::R1)
- .addImm(NegFrameSize)
- .addReg(PPC::R1);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
- .addReg(PPC::R1)
- .addReg(PPC::R1)
- .addReg(PPC::R0);
- }
- } else { // PPC64.
- if (ALIGN_STACK && MaxAlign > TargetAlign) {
- assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
- "Invalid alignment!");
- assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
-
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
- .addReg(PPC::X1)
- .addImm(0)
- .addImm(64 - Log2_32(MaxAlign));
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
- .addReg(PPC::X0)
- .addImm(NegFrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
- .addReg(PPC::X1)
- .addReg(PPC::X1)
- .addReg(PPC::X0);
- } else if (isInt<16>(NegFrameSize)) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
- .addReg(PPC::X1)
- .addImm(NegFrameSize / 4)
- .addReg(PPC::X1);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
- .addReg(PPC::X0, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
- .addReg(PPC::X1)
- .addReg(PPC::X1)
- .addReg(PPC::X0);
- }
- }
-
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
-
- // Add the "machine moves" for the instructions we generated above, but in
- // reverse order.
- if (needsFrameMoves) {
- // Mark effective beginning of when frame pointer becomes valid.
- FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
-
- // Show update of SP.
- if (NegFrameSize) {
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
- Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
- } else {
- MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
- Moves.push_back(MachineMove(FrameLabel, SP, SP));
- }
-
- if (HasFP) {
- MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
- MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
- Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
- }
-
- if (MustSaveLR) {
- MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
- MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
- Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc));
- }
- }
-
- MCSymbol *ReadyLabel = 0;
-
- // If there is a frame pointer, copy R1 into R31
- if (HasFP) {
- if (!isPPC64) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
- .addReg(PPC::R1)
- .addReg(PPC::R1);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
- .addReg(PPC::X1)
- .addReg(PPC::X1);
- }
-
- if (needsFrameMoves) {
- ReadyLabel = MMI.getContext().CreateTempSymbol();
-
- // Mark effective beginning of when frame pointer is ready.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
-
- MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
- (isPPC64 ? PPC::X1 : PPC::R1));
- MachineLocation FPSrc(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
- }
- }
-
- if (needsFrameMoves) {
- MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel;
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
- unsigned Reg = CSI[I].getReg();
- if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
- MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
- MachineLocation CSSrc(Reg);
- Moves.push_back(MachineMove(Label, CSDst, CSSrc));
- }
- }
-}
-
-void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- unsigned RetOpcode = MBBI->getOpcode();
- DebugLoc dl;
-
- assert( (RetOpcode == PPC::BLR ||
- RetOpcode == PPC::TCRETURNri ||
- RetOpcode == PPC::TCRETURNdi ||
- RetOpcode == PPC::TCRETURNai ||
- RetOpcode == PPC::TCRETURNri8 ||
- RetOpcode == PPC::TCRETURNdi8 ||
- RetOpcode == PPC::TCRETURNai8) &&
- "Can only insert epilog into returning blocks");
-
- // Get alignment info so we know how to restore r1
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
- unsigned MaxAlign = MFI->getMaxAlignment();
-
- // Get the number of bytes allocated from the FrameInfo.
- int FrameSize = MFI->getStackSize();
-
- // Get processor type.
- bool isPPC64 = Subtarget.isPPC64();
- // Get operating system
- bool isDarwinABI = Subtarget.isDarwinABI();
- // Check if the link register (LR) has been saved.
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- bool MustSaveLR = FI->mustSaveLR();
- // Do we have a frame pointer for this function?
- bool HasFP = hasFP(MF) && FrameSize;
-
- int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
-
- int FPOffset = 0;
- if (HasFP) {
- if (Subtarget.isSVR4ABI()) {
- MachineFrameInfo *FFI = MF.getFrameInfo();
- int FPIndex = FI->getFramePointerSaveIndex();
- assert(FPIndex && "No Frame Pointer Save Slot!");
- FPOffset = FFI->getObjectOffset(FPIndex);
- } else {
- FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
- }
- }
-
- bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
- RetOpcode == PPC::TCRETURNdi ||
- RetOpcode == PPC::TCRETURNai ||
- RetOpcode == PPC::TCRETURNri8 ||
- RetOpcode == PPC::TCRETURNdi8 ||
- RetOpcode == PPC::TCRETURNai8;
-
- if (UsesTCRet) {
- int MaxTCRetDelta = FI->getTailCallSPDelta();
- MachineOperand &StackAdjust = MBBI->getOperand(1);
- assert(StackAdjust.isImm() && "Expecting immediate value.");
- // Adjust stack pointer.
- int StackAdj = StackAdjust.getImm();
- int Delta = StackAdj - MaxTCRetDelta;
- assert((Delta >= 0) && "Delta must be positive");
- if (MaxTCRetDelta>0)
- FrameSize += (StackAdj +Delta);
- else
- FrameSize += StackAdj;
- }
-
- if (FrameSize) {
- // The loaded (or persistent) stack pointer value is offset by the 'stwu'
- // on entry to the function. Add this offset back now.
- if (!isPPC64) {
- // If this function contained a fastcc call and GuaranteedTailCallOpt is
- // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
- // call which invalidates the stack pointer value in SP(0). So we use the
- // value of R31 in this case.
- if (FI->hasFastCall() && isInt<16>(FrameSize)) {
- assert(hasFP(MF) && "Expecting a valid the frame pointer.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
- .addReg(PPC::R31).addImm(FrameSize);
- } else if(FI->hasFastCall()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
- .addImm(FrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
- .addImm(FrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
- .addReg(PPC::R1)
- .addReg(PPC::R31)
- .addReg(PPC::R0);
- } else if (isInt<16>(FrameSize) &&
- (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
- !MFI->hasVarSizedObjects()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
- .addReg(PPC::R1).addImm(FrameSize);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
- .addImm(0).addReg(PPC::R1);
- }
- } else {
- if (FI->hasFastCall() && isInt<16>(FrameSize)) {
- assert(hasFP(MF) && "Expecting a valid the frame pointer.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
- .addReg(PPC::X31).addImm(FrameSize);
- } else if(FI->hasFastCall()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
- .addImm(FrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
- .addReg(PPC::X0, RegState::Kill)
- .addImm(FrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
- .addReg(PPC::X1)
- .addReg(PPC::X31)
- .addReg(PPC::X0);
- } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
- !MFI->hasVarSizedObjects()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
- .addReg(PPC::X1).addImm(FrameSize);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
- .addImm(0).addReg(PPC::X1);
- }
- }
- }
-
- if (isPPC64) {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
- .addImm(LROffset/4).addReg(PPC::X1);
-
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
- .addImm(FPOffset/4).addReg(PPC::X1);
-
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
- } else {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
- .addImm(LROffset).addReg(PPC::R1);
-
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
- .addImm(FPOffset).addReg(PPC::R1);
-
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
- }
-
- // Callee pop calling convention. Pop parameter/linkage area. Used for tail
- // call optimization
- if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
- MF.getFunction()->getCallingConv() == CallingConv::Fast) {
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- unsigned CallerAllocatedAmt = FI->getMinReservedArea();
- unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
- unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
- unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
- unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
- unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
- unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
-
- if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
- BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
- .addReg(StackReg).addImm(CallerAllocatedAmt);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
- .addImm(CallerAllocatedAmt >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
- .addReg(TmpReg, RegState::Kill)
- .addImm(CallerAllocatedAmt & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
- .addReg(StackReg)
- .addReg(FPReg)
- .addReg(TmpReg);
- }
- } else if (RetOpcode == PPC::TCRETURNdi) {
- MBBI = prior(MBB.end());
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
- } else if (RetOpcode == PPC::TCRETURNri) {
- MBBI = prior(MBB.end());
- assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
- } else if (RetOpcode == PPC::TCRETURNai) {
- MBBI = prior(MBB.end());
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
- } else if (RetOpcode == PPC::TCRETURNdi8) {
- MBBI = prior(MBB.end());
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
- } else if (RetOpcode == PPC::TCRETURNri8) {
- MBBI = prior(MBB.end());
- assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
- } else if (RetOpcode == PPC::TCRETURNai8) {
- MBBI = prior(MBB.end());
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
- }
-}
-
unsigned PPCRegisterInfo::getRARegister() const {
return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8;
}
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
if (!Subtarget.isPPC64())
- return hasFP(MF) ? PPC::R31 : PPC::R1;
+ return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
else
- return hasFP(MF) ? PPC::X31 : PPC::X1;
-}
-
-void PPCRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
- const {
- // Initial state of the frame pointer is R1.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(PPC::R1, 0);
- Moves.push_back(MachineMove(0, Dst, Src));
+ return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
}
unsigned PPCRegisterInfo::getEHExceptionRegister() const {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 890b24b..aa29ffe 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -44,17 +44,10 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const;
- /// targetHandlesStackFrameRounding - Returns true if the target is
- /// responsible for rounding up the stack frame (probably at emitPrologue
- /// time).
- bool targetHandlesStackFrameRounding() const { return true; }
-
/// requiresRegisterScavenging - We require a register scavenger.
/// FIXME (64-bit): Should be inlined.
bool requiresRegisterScavenging(const MachineFunction &MF) const;
- bool hasFP(const MachineFunction &MF) const;
-
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
@@ -66,21 +59,9 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
- /// determineFrameLayout - Determine the size of the frame and maximum call
- /// frame size.
- void determineFrameLayout(MachineFunction &MF) const;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 8604f54..2639165 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -300,13 +300,14 @@ def GPRC : RegisterClass<"PPC", [i32], 32,
// R31 when the FP is not needed.
// When using the 32-bit SVR4 ABI, r13 is reserved for the Small Data Area
// pointer.
- const PPCSubtarget &Subtarget
- = MF.getTarget().getSubtarget<PPCSubtarget>();
-
+ const PPCSubtarget &Subtarget = MF.getTarget().getSubtarget<PPCSubtarget>();
+ const PPCFrameLowering *PPCFI =
+ static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+
if (Subtarget.isPPC64() || Subtarget.isSVR4ABI())
return end()-5; // don't allocate R13, R31, R0, R1, LR
- if (needsFP(MF))
+ if (PPCFI->needsFP(MF))
return end()-4; // don't allocate R31, R0, R1, LR
else
return end()-3; // don't allocate R0, R1, LR
@@ -331,7 +332,9 @@ def G8RC : RegisterClass<"PPC", [i64], 64,
}
G8RCClass::iterator
G8RCClass::allocation_order_end(const MachineFunction &MF) const {
- if (needsFP(MF))
+ const PPCFrameLowering *PPCFI =
+ static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+ if (PPCFI->needsFP(MF))
return end()-5;
else
return end()-4;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
index 7344763..ad4da1f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
@@ -13,7 +13,7 @@
def G3Itineraries : ProcessorItineraries<
- [IU1, IU2, FPU1, BPU, SRU, SLU], [
+ [IU1, IU2, FPU1, BPU, SRU, SLU], [], [
InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
index 7efc693..03c3b29 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
def G4Itineraries : ProcessorItineraries<
- [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [
+ [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [
InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 15056c0..00cac3c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -15,7 +15,7 @@ def IU3 : FuncUnit; // integer unit 3 (7450 simple)
def IU4 : FuncUnit; // integer unit 4 (7450 simple)
def G4PlusItineraries : ProcessorItineraries<
- [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [
+ [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [
InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
index 2dffc48..1671f22 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
def G5Itineraries : ProcessorItineraries<
- [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [
+ [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [
InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>,
InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>,
InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 5d46065..72a1dee 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -129,7 +129,7 @@ void PPCSubtarget::SetJITMode() {
/// is required to get the address of the global.
bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
const TargetMachine &TM) const {
- // We never hae stubs if HasLazyResolverStubs=false or if in static mode.
+ // We never have stubs if HasLazyResolverStubs=false or if in static mode.
if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
return false;
// If symbol visibility is hidden, the extra load is not needed if
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 10cd10b..212b450 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -15,6 +15,7 @@
#include "PPCMCAsmInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/PassManager.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/FormattedStream.h"
@@ -29,6 +30,21 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+ MCContext &Ctx, TargetAsmBackend &TAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ switch (Triple(TT).getOS()) {
+ case Triple::Darwin:
+ return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
+ default:
+ return NULL;
+ }
+}
+
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
@@ -36,6 +52,19 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterAsmInfoFn C(ThePPC32Target, createMCAsmInfo);
RegisterAsmInfoFn D(ThePPC64Target, createMCAsmInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
+ TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
+
+
+ // Register the asm backend.
+ TargetRegistry::RegisterAsmBackend(ThePPC32Target, createPPCAsmBackend);
+ TargetRegistry::RegisterAsmBackend(ThePPC64Target, createPPCAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterObjectStreamer(ThePPC32Target, createMCStreamer);
+ TargetRegistry::RegisterObjectStreamer(ThePPC64Target, createMCStreamer);
}
@@ -44,7 +73,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS, is64Bit),
DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
- FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit),
+ FrameLowering(Subtarget), JITInfo(*this, is64Bit),
TLInfo(*this), TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index 626ddbb..2d24989 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -14,7 +14,7 @@
#ifndef PPC_TARGETMACHINE_H
#define PPC_TARGETMACHINE_H
-#include "PPCFrameInfo.h"
+#include "PPCFrameLowering.h"
#include "PPCSubtarget.h"
#include "PPCJITInfo.h"
#include "PPCInstrInfo.h"
@@ -33,7 +33,7 @@ class PPCTargetMachine : public LLVMTargetMachine {
PPCSubtarget Subtarget;
const TargetData DataLayout; // Calculates type size & alignment
PPCInstrInfo InstrInfo;
- PPCFrameInfo FrameInfo;
+ PPCFrameLowering FrameLowering;
PPCJITInfo JITInfo;
PPCTargetLowering TLInfo;
PPCSelectionDAGInfo TSInfo;
@@ -43,23 +43,25 @@ public:
PPCTargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool is64Bit);
- virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const PPCFrameInfo *getFrameInfo() const { return &FrameInfo; }
- virtual PPCJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const PPCFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual PPCJITInfo *getJITInfo() { return &JITInfo; }
virtual const PPCTargetLowering *getTargetLowering() const {
return &TLInfo;
}
virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
- virtual const PPCRegisterInfo *getRegisterInfo() const {
+ virtual const PPCRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; }
- virtual const InstrItineraryData getInstrItineraryData() const {
- return InstrItins;
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
}
// Pass Pipeline Configuration
diff --git a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index aae5da8..ee29275 100644
--- a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -7,21 +7,32 @@
//
//===----------------------------------------------------------------------===//
//
-// This is a simple local pass that fills delay slots with NOPs.
-//
+// This is a simple local pass that attempts to fill delay slots with useful
+// instructions. If no instructions can be moved into the delay slot, then a
+// NOP is placed.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "delayslotfiller"
+#define DEBUG_TYPE "delay-slot-filler"
#include "Sparc.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+
using namespace llvm;
STATISTIC(FilledSlots, "Number of delay slots filled");
+static cl::opt<bool> DisableDelaySlotFiller(
+ "disable-sparc-delay-filler",
+ cl::init(false),
+ cl::desc("Disable the Sparc delay slot filler."),
+ cl::Hidden);
+
namespace {
struct Filler : public MachineFunctionPass {
/// Target machine description which we query for reg. names, data
@@ -47,6 +58,28 @@ namespace {
return Changed;
}
+ bool isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate);
+
+ void insertCallUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegUses);
+
+ void insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses);
+
+ bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+ unsigned Reg);
+
+ bool delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad, bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses);
+
+ MachineBasicBlock::iterator
+ findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot);
+
+
};
char Filler::ID = 0;
} // end of anonymous namespace
@@ -59,18 +92,201 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
}
/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
-/// Currently, we fill delay slots with NOPs. We assume there is only one
-/// delay slot per delayed instruction.
+/// We assume there is only one delay slot per delayed instruction.
///
bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
+
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
if (I->getDesc().hasDelaySlot()) {
+ MachineBasicBlock::iterator D = MBB.end();
MachineBasicBlock::iterator J = I;
- ++J;
- BuildMI(MBB, J, DebugLoc(), TII->get(SP::NOP));
+
+ if (!DisableDelaySlotFiller)
+ D = findDelayInstr(MBB, I);
+
++FilledSlots;
Changed = true;
+
+ if (D == MBB.end())
+ BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP));
+ else
+ MBB.splice(++J, &MBB, D);
}
return Changed;
}
+
+MachineBasicBlock::iterator
+Filler::findDelayInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator slot)
+{
+ SmallSet<unsigned, 32> RegDefs;
+ SmallSet<unsigned, 32> RegUses;
+ bool sawLoad = false;
+ bool sawStore = false;
+
+ MachineBasicBlock::iterator I = slot;
+
+ if (slot->getOpcode() == SP::RET)
+ return MBB.end();
+
+ if (slot->getOpcode() == SP::RETL) {
+ --I;
+ if (I->getOpcode() != SP::RESTORErr)
+ return MBB.end();
+ //change retl to ret
+ slot->setDesc(TII->get(SP::RET));
+ return I;
+ }
+
+ //Call's delay filler can def some of call's uses.
+ if (slot->getDesc().isCall())
+ insertCallUses(slot, RegUses);
+ else
+ insertDefsUses(slot, RegDefs, RegUses);
+
+ bool done = false;
+
+ while (!done) {
+ done = (I == MBB.begin());
+
+ if (!done)
+ --I;
+
+ // skip debug value
+ if (I->isDebugValue())
+ continue;
+
+
+ if (I->hasUnmodeledSideEffects()
+ || I->isInlineAsm()
+ || I->isLabel()
+ || I->getDesc().hasDelaySlot()
+ || isDelayFiller(MBB, I))
+ break;
+
+ if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) {
+ insertDefsUses(I, RegDefs, RegUses);
+ continue;
+ }
+
+ return I;
+ }
+ return MBB.end();
+}
+
+bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+ bool &sawLoad,
+ bool &sawStore,
+ SmallSet<unsigned, 32> &RegDefs,
+ SmallSet<unsigned, 32> &RegUses)
+{
+
+ if (candidate->isImplicitDef() || candidate->isKill())
+ return true;
+
+ if (candidate->getDesc().mayLoad()) {
+ sawLoad = true;
+ if (sawStore)
+ return true;
+ }
+
+ if (candidate->getDesc().mayStore()) {
+ if (sawStore)
+ return true;
+ sawStore = true;
+ if (sawLoad)
+ return true;
+ }
+
+ for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
+ const MachineOperand &MO = candidate->getOperand(i);
+ if (!MO.isReg())
+ continue; // skip
+
+ unsigned Reg = MO.getReg();
+
+ if (MO.isDef()) {
+ //check whether Reg is defined or used before delay slot.
+ if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
+ return true;
+ }
+ if (MO.isUse()) {
+ //check whether Reg is defined before delay slot.
+ if (IsRegInSet(RegDefs, Reg))
+ return true;
+ }
+ }
+ return false;
+}
+
+
+void Filler::insertCallUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegUses)
+{
+
+ switch(MI->getOpcode()) {
+ default: llvm_unreachable("Unknown opcode.");
+ case SP::CALL: break;
+ case SP::JMPLrr:
+ case SP::JMPLri:
+ assert(MI->getNumOperands() >= 2);
+ const MachineOperand &Reg = MI->getOperand(0);
+ assert(Reg.isReg() && "JMPL first operand is not a register.");
+ assert(Reg.isUse() && "JMPL first operand is not a use.");
+ RegUses.insert(Reg.getReg());
+
+ const MachineOperand &RegOrImm = MI->getOperand(1);
+ if (RegOrImm.isImm())
+ break;
+ assert(RegOrImm.isReg() && "JMPLrr second operand is not a register.");
+ assert(RegOrImm.isUse() && "JMPLrr second operand is not a use.");
+ RegUses.insert(RegOrImm.getReg());
+ break;
+ }
+}
+
+//Insert Defs and Uses of MI into the sets RegDefs and RegUses.
+void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
+ SmallSet<unsigned, 32>& RegDefs,
+ SmallSet<unsigned, 32>& RegUses)
+{
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (MO.isDef())
+ RegDefs.insert(Reg);
+ if (MO.isUse())
+ RegUses.insert(Reg);
+
+ }
+}
+
+//returns true if the Reg or its alias is in the RegSet.
+bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
+{
+ if (RegSet.count(Reg))
+ return true;
+ // check Aliased Registers
+ for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+ *Alias; ++ Alias)
+ if (RegSet.count(*Alias))
+ return true;
+
+ return false;
+}
+
+// return true if the candidate is a delay filler.
+bool Filler::isDelayFiller(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator candidate)
+{
+ if (candidate == MBB.begin())
+ return false;
+ const TargetInstrDesc &prevdesc = (--candidate)->getDesc();
+ return prevdesc.hasDelaySlot();
+}
diff --git a/contrib/llvm/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index ab948bb..edde842 100644
--- a/contrib/llvm/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -145,6 +145,8 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
case MachineOperand::MO_Register:
assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
"Operand is not a physical register ");
+ assert(MO.getReg() != SP::O7 &&
+ "%o7 is assigned as destination for getpcx!");
operand = "%" + LowercaseString(getRegisterName(MO.getReg()));
break;
}
@@ -156,8 +158,8 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
O << "\t sethi\t"
- << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), "
- << operand << '\n' ;
+ << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
+ << ")), " << operand << '\n' ;
O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
O << "\tor\t" << operand
diff --git a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
index 33ecfdf..856f87a 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -24,9 +24,13 @@ def RetCC_Sparc32 : CallingConv<[
// Sparc 32-bit C Calling convention.
def CC_Sparc32 : CallingConv<[
- // All arguments get passed in integer registers if there is space.
- CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
-
+ //Custom assign SRet to [sp+64].
+ CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
+ // i32 f32 arguments get passed in integer registers if there is space.
+ CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ // f64 arguments are split and passed through registers or through stack.
+ CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
+
// Alternatively, they are assigned to the stack in 4-byte aligned units.
CCAssignToStack<4, 4>
]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
new file mode 100644
index 0000000..320c8ca
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -0,0 +1,80 @@
+//====- SparcFrameLowering.cpp - Sparc Frame Information -------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcFrameLowering.h"
+#include "SparcInstrInfo.h"
+#include "SparcMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ // Emit the correct save instruction based on the number of bytes in
+ // the frame. Minimum stack frame size according to V8 ABI is:
+ // 16 words for register window spill
+ // 1 word for address of returned aggregate-value
+ // + 6 words for passing parameters on the stack
+ // ----------
+ // 23 words * 4 bytes per word = 92 bytes
+ NumBytes += 92;
+
+ // Round up to next doubleword boundary -- a double-word boundary
+ // is required by the ABI.
+ NumBytes = (NumBytes + 7) & ~7;
+ NumBytes = -NumBytes;
+
+ if (NumBytes >= -4096) {
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
+ .addReg(SP::O6).addImm(NumBytes);
+ } else {
+ // Emit this the hard way. This clobbers G1 which we always know is
+ // available here.
+ unsigned OffHi = (unsigned)NumBytes >> 10U;
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+ .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
+ .addReg(SP::O6).addReg(SP::G1);
+ }
+}
+
+void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+ assert(MBBI->getOpcode() == SP::RETL &&
+ "Can only put epilog before 'retl' instruction!");
+ BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+ .addReg(SP::G0);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
new file mode 100644
index 0000000..9a2ddc8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -0,0 +1,41 @@
+//===- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_FRAMEINFO_H
+#define SPARC_FRAMEINFO_H
+
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class SparcSubtarget;
+
+class SparcFrameLowering : public TargetFrameLowering {
+ const SparcSubtarget &STI;
+public:
+ explicit SparcFrameLowering(const SparcSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 4ea94c4..8c6103d 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -44,9 +44,8 @@ public:
SDNode *Select(SDNode *N);
// Complex Pattern Selectors.
- bool SelectADDRrr(SDNode *Op, SDValue N, SDValue &R1, SDValue &R2);
- bool SelectADDRri(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Offset);
+ bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
@@ -71,7 +70,7 @@ SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
-bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr,
+bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
SDValue &Base, SDValue &Offset) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -112,8 +111,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr,
return true;
}
-bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr,
- SDValue &R1, SDValue &R2) {
+bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
if (Addr.getOpcode() == ISD::FrameIndex) return false;
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress)
@@ -160,7 +158,7 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
} else {
TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
}
- TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Flag, TopPart,
+ TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Glue, TopPart,
CurDAG->getRegister(SP::G0, MVT::i32)), 0);
// FIXME: Handle div by immediate.
@@ -174,7 +172,7 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
SDValue MulLHS = N->getOperand(0);
SDValue MulRHS = N->getOperand(1);
unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
- SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Flag,
+ SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
MulLHS, MulRHS);
// The high part is in the Y register.
return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
@@ -196,8 +194,8 @@ SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
switch (ConstraintCode) {
default: return true;
case 'm': // memory
- if (!SelectADDRrr(Op.getNode(), Op, Op0, Op1))
- SelectADDRri(Op.getNode(), Op, Op0, Op1);
+ if (!SelectADDRrr(Op, Op0, Op1))
+ SelectADDRri(Op, Op0, Op1);
break;
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 4099a62..196b87d 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1,3 +1,4 @@
+
//===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
//
// The LLVM Compiler Infrastructure
@@ -32,6 +33,47 @@ using namespace llvm;
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
+static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+ assert (ArgFlags.isSRet());
+
+ //Assign SRet argument
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ 0,
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+ static const unsigned RegList[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+ //Try to get first reg
+ if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ } else {
+ //Assign whole thing in stack
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8,4),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ //Try to get second reg
+ if (unsigned Reg = State.AllocateReg(RegList, 6))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4,4),
+ LocVT, LocInfo));
+ return true;
+}
+
#include "SparcGenCallingConv.inc"
SDValue
@@ -41,6 +83,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
// CCValAssign - represent the assignment of the return value to locations.
SmallVector<CCValAssign, 16> RVLocs;
@@ -53,10 +97,10 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
// If this is the first return lowered for this function, add the regs to the
// liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ if (MF.getRegInfo().liveout_empty()) {
for (unsigned i = 0; i != RVLocs.size(); ++i)
if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
}
SDValue Flag;
@@ -66,12 +110,24 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
OutVals[i], Flag);
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
}
+ // If the function returns a struct, copy the SRetReturnReg to I0
+ if (MF.getFunction()->hasStructRetAttr()) {
+ SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+ unsigned Reg = SFI->getSRetReturnReg();
+ if (!Reg)
+ llvm_unreachable("sret virtual register not created in the entry block");
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+ Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+ Flag = Chain.getValue(1);
+ if (MF.getRegInfo().liveout_empty())
+ MF.getRegInfo().addLiveOut(SP::I0);
+ }
if (Flag.getNode())
return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
@@ -100,135 +156,159 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
- static const unsigned ArgRegs[] = {
- SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
- };
- const unsigned *CurArgReg = ArgRegs, *ArgRegEnd = ArgRegs+6;
- unsigned ArgOffset = 68;
+ const unsigned StackOffset = 92;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- SDValue ArgValue;
CCValAssign &VA = ArgLocs[i];
- // FIXME: We ignore the register assignments of AnalyzeFormalArguments
- // because it doesn't know how to split a double into two i32 registers.
- EVT ObjectVT = VA.getValVT();
- switch (ObjectVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unhandled argument type!");
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- if (!Ins[i].Used) { // Argument is dead.
- if (CurArgReg < ArgRegEnd) ++CurArgReg;
- InVals.push_back(DAG.getUNDEF(ObjectVT));
- } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
- unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
- MF.getRegInfo().addLiveIn(*CurArgReg++, VReg);
- SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
- if (ObjectVT != MVT::i32) {
- unsigned AssertOp = ISD::AssertSext;
- Arg = DAG.getNode(AssertOp, dl, MVT::i32, Arg,
- DAG.getValueType(ObjectVT));
- Arg = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Arg);
- }
- InVals.push_back(Arg);
- } else {
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
- true);
- SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
- SDValue Load;
- if (ObjectVT == MVT::i32) {
- Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
- false, false, 0);
- } else {
- ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
-
- // Sparc is big endian, so add an offset based on the ObjectVT.
- unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8);
- FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
- DAG.getConstant(Offset, MVT::i32));
- Load = DAG.getExtLoad(LoadOp, MVT::i32, dl, Chain, FIPtr,
- NULL, 0, ObjectVT, false, false, 0);
- Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load);
- }
- InVals.push_back(Load);
- }
- ArgOffset += 4;
- break;
- case MVT::f32:
- if (!Ins[i].Used) { // Argument is dead.
- if (CurArgReg < ArgRegEnd) ++CurArgReg;
- InVals.push_back(DAG.getUNDEF(ObjectVT));
- } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
- // FP value is passed in an integer register.
- unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
- MF.getRegInfo().addLiveIn(*CurArgReg++, VReg);
- SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
-
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg);
- InVals.push_back(Arg);
- } else {
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
- true);
- SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
- SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0,
- false, false, 0);
- InVals.push_back(Load);
- }
- ArgOffset += 4;
- break;
+ if (i == 0 && Ins[i].Flags.isSRet()) {
+ //Get SRet from [%fp+64]
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ InVals.push_back(Arg);
+ continue;
+ }
- case MVT::i64:
- case MVT::f64:
- if (!Ins[i].Used) { // Argument is dead.
- if (CurArgReg < ArgRegEnd) ++CurArgReg;
- if (CurArgReg < ArgRegEnd) ++CurArgReg;
- InVals.push_back(DAG.getUNDEF(ObjectVT));
- } else {
- SDValue HiVal;
- if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
- unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
- MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi);
- HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
- } else {
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
- true);
- SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
- HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
- false, false, 0);
- }
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+
+ if (VA.needsCustom()) {
+ assert(VA.getLocVT() == MVT::f64);
+ unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
+ SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
+
+ assert(i+1 < e);
+ CCValAssign &NextVA = ArgLocs[++i];
SDValue LoVal;
- if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
- unsigned VRegLo = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
- MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo);
- LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32);
- } else {
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4,
- true);
+ if (NextVA.isMemLoc()) {
+ int FrameIdx = MF.getFrameInfo()->
+ CreateFixedObject(4, StackOffset+NextVA.getLocMemOffset(),true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
- LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+ LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+ MachinePointerInfo(),
false, false, 0);
+ } else {
+ unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
+ &SP::IntRegsRegClass, dl);
+ LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
}
-
- // Compose the two halves together into an i64 unit.
SDValue WholeValue =
DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+ WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+ InVals.push_back(WholeValue);
+ continue;
+ }
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg);
+ SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ if (VA.getLocVT() == MVT::f32)
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Arg);
+ else if (VA.getLocVT() != MVT::i32) {
+ Arg = DAG.getNode(ISD::AssertSext, dl, MVT::i32, Arg,
+ DAG.getValueType(VA.getLocVT()));
+ Arg = DAG.getNode(ISD::TRUNCATE, dl, VA.getLocVT(), Arg);
+ }
+ InVals.push_back(Arg);
+ continue;
+ }
- // If we want a double, do a bit convert.
- if (ObjectVT == MVT::f64)
- WholeValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, WholeValue);
+ assert(VA.isMemLoc());
- InVals.push_back(WholeValue);
+ unsigned Offset = VA.getLocMemOffset()+StackOffset;
+
+ if (VA.needsCustom()) {
+ assert(VA.getValVT() == MVT::f64);
+ //If it is double-word aligned, just load.
+ if (Offset % 8 == 0) {
+ int FI = MF.getFrameInfo()->CreateFixedObject(8,
+ Offset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false,false, 0);
+ InVals.push_back(Load);
+ continue;
}
- ArgOffset += 8;
- break;
+
+ int FI = MF.getFrameInfo()->CreateFixedObject(4,
+ Offset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
+ Offset+4,
+ true);
+ SDValue FIPtr2 = DAG.getFrameIndex(FI2, getPointerTy());
+
+ SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2,
+ MachinePointerInfo(),
+ false, false, 0);
+
+ SDValue WholeValue =
+ DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+ WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+ InVals.push_back(WholeValue);
+ continue;
+ }
+
+ int FI = MF.getFrameInfo()->CreateFixedObject(4,
+ Offset,
+ true);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue Load ;
+ if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
+ Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+ MachinePointerInfo(),
+ false, false, 0);
+ } else {
+ ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
+ // Sparc is big endian, so add an offset based on the ObjectVT.
+ unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8);
+ FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
+ DAG.getConstant(Offset, MVT::i32));
+ Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
+ MachinePointerInfo(),
+ VA.getValVT(), false, false,0);
+ Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
}
+ InVals.push_back(Load);
+ }
+
+ if (MF.getFunction()->hasStructRetAttr()) {
+ //Copy the SRet Argument to SRetReturnReg
+ SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+ unsigned Reg = SFI->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(&SP::IntRegsRegClass);
+ SFI->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
}
// Store remaining ArgRegs to the stack if this is a varargs function.
if (isVarArg) {
+ static const unsigned ArgRegs[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+ unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
+ const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+ unsigned ArgOffset = CCInfo.getNextStackOffset();
+ if (NumAllocated == 6)
+ ArgOffset += StackOffset;
+ else {
+ assert(!ArgOffset);
+ ArgOffset = 68+4*NumAllocated;
+ }
+
// Remember the vararg offset for the va_start implementation.
FuncInfo->setVarArgsFrameOffset(ArgOffset);
@@ -243,7 +323,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
- OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0,
+ OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr,
+ MachinePointerInfo(),
false, false, 0));
ArgOffset += 4;
}
@@ -270,191 +351,180 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Sparc target does not yet support tail call optimization.
isTailCall = false;
-#if 0
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs);
+ CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
// Get the size of the outgoing arguments stack space requirement.
unsigned ArgsSize = CCInfo.getNextStackOffset();
- // FIXME: We can't use this until f64 is known to take two GPRs.
-#else
- (void)CC_Sparc32;
-
- // Count the size of the outgoing arguments.
- unsigned ArgsSize = 0;
- for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- switch (Outs[i].VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unknown value type!");
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::f32:
- ArgsSize += 4;
- break;
- case MVT::i64:
- case MVT::f64:
- ArgsSize += 8;
- break;
- }
- }
- if (ArgsSize > 4*6)
- ArgsSize -= 4*6; // Space for first 6 arguments is prereserved.
- else
- ArgsSize = 0;
-#endif
// Keep stack frames 8-byte aligned.
ArgsSize = (ArgsSize+7) & ~7;
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+
+ //Create local copies for byval args.
+ SmallVector<SDValue, 8> ByValArgs;
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (!Flags.isByVal())
+ continue;
+
+ SDValue Arg = OutVals[i];
+ unsigned Size = Flags.getByValSize();
+ unsigned Align = Flags.getByValAlign();
+
+ int FI = MFI->CreateStackObject(Size, Align, false);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+
+ Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
+ false, //isVolatile,
+ (Size <= 32), //AlwaysInline if size <= 32
+ MachinePointerInfo(), MachinePointerInfo());
+ ByValArgs.push_back(FIPtr);
+ }
+
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
-#if 0
+ const unsigned StackOffset = 92;
// Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = OutVals[i];
+ SDValue Arg = OutVals[realArgIdx];
+
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+
+ //Use local copy if it is a byval arg.
+ if (Flags.isByVal())
+ Arg = ByValArgs[byvalArgIdx++];
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
break;
}
- // Arguments that can be passed on register must be kept at
- // RegsToPass vector
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ if (Flags.isSRet()) {
+ assert(VA.needsCustom());
+ // store SRet argument in %sp+64
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(64);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
continue;
}
- assert(VA.isMemLoc());
-
- // Create a store off the stack pointer for this argument.
- SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
- // FIXME: VERIFY THAT 68 IS RIGHT.
- SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68);
- PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
- MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0,
- false, false, 0));
- }
-
-#else
- static const unsigned ArgRegs[] = {
- SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
- };
- unsigned ArgOffset = 68;
-
- for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- SDValue Val = OutVals[i];
- EVT ObjectVT = Outs[i].VT;
- SDValue ValToStore(0, 0);
- unsigned ObjSize;
- switch (ObjectVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unhandled argument type!");
- case MVT::i32:
- ObjSize = 4;
-
- if (RegsToPass.size() >= 6) {
- ValToStore = Val;
- } else {
- RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val));
- }
- break;
- case MVT::f32:
- ObjSize = 4;
- if (RegsToPass.size() >= 6) {
- ValToStore = Val;
- } else {
- // Convert this to a FP value in an int reg.
- Val = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Val);
- RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val));
- }
- break;
- case MVT::f64: {
- ObjSize = 8;
- if (RegsToPass.size() >= 6) {
- ValToStore = Val; // Whole thing is passed in memory.
- break;
+ if (VA.needsCustom()) {
+ assert(VA.getLocVT() == MVT::f64);
+
+ if (VA.isMemLoc()) {
+ unsigned Offset = VA.getLocMemOffset() + StackOffset;
+ //if it is double-word aligned, just store.
+ if (Offset % 8 == 0) {
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ continue;
+ }
}
- // Break into top and bottom parts by storing to the stack and loading
- // out the parts as integers. Top part goes in a reg.
SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
- Val, StackPtr, NULL, 0,
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Arg, StackPtr, MachinePointerInfo(),
false, false, 0);
// Sparc is big-endian, so the high part comes first.
- SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Increment the pointer to the other half.
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
DAG.getIntPtrConstant(4));
// Load the low part.
- SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
- false, false, 0);
-
- RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
-
- if (RegsToPass.size() >= 6) {
- ValToStore = Lo;
- ArgOffset += 4;
- ObjSize = 4;
+ SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
+ assert(i+1 != e);
+ CCValAssign &NextVA = ArgLocs[++i];
+ if (NextVA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
+ } else {
+ //Store the low part in stack.
+ unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
} else {
- RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo));
+ unsigned Offset = VA.getLocMemOffset() + StackOffset;
+ // Store the high part.
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ // Store the low part.
+ PtrOff = DAG.getIntPtrConstant(Offset+4);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
}
- break;
+ continue;
}
- case MVT::i64: {
- ObjSize = 8;
- if (RegsToPass.size() >= 6) {
- ValToStore = Val; // Whole thing is passed in memory.
- break;
- }
- // Split the value into top and bottom part. Top part goes in a reg.
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val,
- DAG.getConstant(1, MVT::i32));
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val,
- DAG.getConstant(0, MVT::i32));
- RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
-
- if (RegsToPass.size() >= 6) {
- ValToStore = Lo;
- ArgOffset += 4;
- ObjSize = 4;
- } else {
- RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo));
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ if (VA.getLocVT() != MVT::f32) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
}
- break;
- }
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
}
- if (ValToStore.getNode()) {
- SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
- SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32);
- PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
- MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore,
- PtrOff, NULL, 0,
- false, false, 0));
- }
- ArgOffset += ObjSize;
+ assert(VA.isMemLoc());
+
+ // Create a store off the stack pointer for this argument.
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+StackOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
}
-#endif
+
// Emit all stores, make sure the occur before any copies into physregs.
if (!MemOpChains.empty())
@@ -484,11 +554,22 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
- std::vector<EVT> NodeTys;
- NodeTys.push_back(MVT::Other); // Returns a chain
- NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
- SDValue Ops[] = { Chain, Callee, InFlag };
- Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops, InFlag.getNode() ? 3 : 2);
+ // Returns a chain & a flag for retval copy to use
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ unsigned Reg = RegsToPass[i].first;
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
+ }
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
@@ -610,8 +691,8 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
- setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
// Sparc has no select or setcc: expand to SELECT_CC.
setOperationAction(ISD::SELECT, MVT::i32, Expand);
@@ -701,6 +782,8 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
case SPISD::ITOF: return "SPISD::ITOF";
case SPISD::CALL: return "SPISD::CALL";
case SPISD::RET_FLAG: return "SPISD::RET_FLAG";
+ case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
+ case SPISD::FLUSHW: return "SPISD::FLUSHW";
}
}
@@ -756,7 +839,7 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
}
}
-SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
+SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
// FIXME there isn't really any debug info here
@@ -765,16 +848,16 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-
+
SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
getPointerTy());
SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
- SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
GlobalBase, RelAddr);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- AbsAddr, NULL, 0, false, false, 0);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ AbsAddr, MachinePointerInfo(), false, false, 0);
}
SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
@@ -786,16 +869,16 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
- SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
+ SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
getPointerTy());
SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
GlobalBase, RelAddr);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- AbsAddr, NULL, 0, false, false, 0);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ AbsAddr, MachinePointerInfo(), false, false, 0);
}
static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -803,13 +886,13 @@ static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
// Convert the fp value to integer in an FP register.
assert(Op.getValueType() == MVT::i32);
Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
}
static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
assert(Op.getOperand(0).getValueType() == MVT::i32);
- SDValue Tmp = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
+ SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
// Convert the int value to FP in an FP register.
return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp);
}
@@ -832,13 +915,13 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
if (LHS.getValueType() == MVT::i32) {
std::vector<EVT> VTs;
VTs.push_back(MVT::i32);
- VTs.push_back(MVT::Flag);
+ VTs.push_back(MVT::Glue);
SDValue Ops[2] = { LHS, RHS };
CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
Opc = SPISD::BRICC;
} else {
- CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS);
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
Opc = SPISD::BRFCC;
}
@@ -863,13 +946,13 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
if (LHS.getValueType() == MVT::i32) {
std::vector<EVT> VTs;
VTs.push_back(LHS.getValueType()); // subcc returns a value
- VTs.push_back(MVT::Flag);
+ VTs.push_back(MVT::Glue);
SDValue Ops[2] = { LHS, RHS };
CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
Opc = SPISD::SELECT_ICC;
if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
} else {
- CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS);
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
Opc = SPISD::SELECT_FCC;
if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
}
@@ -891,8 +974,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
MVT::i32));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0,
- false, false, 0);
+ return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
}
static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
@@ -902,27 +985,28 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
SDValue VAListPtr = Node->getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
DebugLoc dl = Node->getDebugLoc();
- SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0,
- false, false, 0);
+ SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+ MachinePointerInfo(SV), false, false, 0);
// Increment the pointer, VAList, to the next vaarg
SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
DAG.getConstant(VT.getSizeInBits()/8,
MVT::i32));
// Store the incremented VAList to the legalized pointer
InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
- VAListPtr, SV, 0, false, false, 0);
+ VAListPtr, MachinePointerInfo(SV), false, false, 0);
// Load the actual argument out of the pointer VAList, unless this is an
// f64 load.
if (VT != MVT::f64)
- return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0, false, false, 0);
+ return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
+ false, false, 0);
// Otherwise, load it as i64, then do a bitconvert.
- SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0,
+ SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
false, false, 0);
// Bit-Convert the value to f64.
SDValue Ops[2] = {
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, V),
+ DAG.getNode(ISD::BITCAST, dl, MVT::f64, V),
V.getValue(1)
};
return DAG.getMergeValues(Ops, 2, dl);
@@ -947,13 +1031,82 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
}
+static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Chain = DAG.getNode(SPISD::FLUSHW,
+ dl, MVT::Other, DAG.getEntryNode());
+ return Chain;
+}
+
+static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned FrameReg = SP::I6;
+
+ uint64_t depth = Op.getConstantOperandVal(0);
+
+ SDValue FrameAddr;
+ if (depth == 0)
+ FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ else {
+ // flush first to make sure the windowed registers' values are in stack
+ SDValue Chain = getFLUSHW(Op, DAG);
+ FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+
+ for (uint64_t i = 0; i != depth; ++i) {
+ SDValue Ptr = DAG.getNode(ISD::ADD,
+ dl, MVT::i32,
+ FrameAddr, DAG.getIntPtrConstant(56));
+ FrameAddr = DAG.getLoad(MVT::i32, dl,
+ Chain,
+ Ptr,
+ MachinePointerInfo(), false, false, 0);
+ }
+ }
+ return FrameAddr;
+}
+
+static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned RetReg = SP::I7;
+
+ uint64_t depth = Op.getConstantOperandVal(0);
+
+ SDValue RetAddr;
+ if (depth == 0)
+ RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
+ else {
+ // flush first to make sure the windowed registers' values are in stack
+ SDValue Chain = getFLUSHW(Op, DAG);
+ RetAddr = DAG.getCopyFromReg(Chain, dl, SP::I6, VT);
+
+ for (uint64_t i = 0; i != depth; ++i) {
+ SDValue Ptr = DAG.getNode(ISD::ADD,
+ dl, MVT::i32,
+ RetAddr,
+ DAG.getIntPtrConstant((i == depth-1)?60:56));
+ RetAddr = DAG.getLoad(MVT::i32, dl,
+ Chain,
+ Ptr,
+ MachinePointerInfo(), false, false, 0);
+ }
+ }
+ return RetAddr;
+}
+
SDValue SparcTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
- // Frame & Return address. Currently unimplemented
- case ISD::RETURNADDR: return SDValue();
- case ISD::FRAMEADDR: return SDValue();
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::GlobalTLSAddress:
llvm_unreachable("TLS not implemented for Sparc.");
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
@@ -1009,6 +1162,8 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
@@ -1021,8 +1176,6 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(sinkMBB);
BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
// copy0MBB:
// %FalseValue = ...
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
index db39e08..849e401 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -36,7 +36,8 @@ namespace llvm {
CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
- GLOBAL_BASE_REG // Global base reg for PIC
+ GLOBAL_BASE_REG, // Global base reg for PIC
+ FLUSHW // FLUSH register windows to stack
};
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 7ede8e7..afa3c1f 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -66,15 +66,200 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
+static bool IsIntegerCC(unsigned CC)
+{
+ return (CC <= SPCC::ICC_VC);
+}
+
+
+static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
+{
+ switch(CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case SPCC::ICC_NE: return SPCC::ICC_E;
+ case SPCC::ICC_E: return SPCC::ICC_NE;
+ case SPCC::ICC_G: return SPCC::ICC_LE;
+ case SPCC::ICC_LE: return SPCC::ICC_G;
+ case SPCC::ICC_GE: return SPCC::ICC_L;
+ case SPCC::ICC_L: return SPCC::ICC_GE;
+ case SPCC::ICC_GU: return SPCC::ICC_LEU;
+ case SPCC::ICC_LEU: return SPCC::ICC_GU;
+ case SPCC::ICC_CC: return SPCC::ICC_CS;
+ case SPCC::ICC_CS: return SPCC::ICC_CC;
+ case SPCC::ICC_POS: return SPCC::ICC_NEG;
+ case SPCC::ICC_NEG: return SPCC::ICC_POS;
+ case SPCC::ICC_VC: return SPCC::ICC_VS;
+ case SPCC::ICC_VS: return SPCC::ICC_VC;
+
+ case SPCC::FCC_U: return SPCC::FCC_O;
+ case SPCC::FCC_O: return SPCC::FCC_U;
+ case SPCC::FCC_G: return SPCC::FCC_LE;
+ case SPCC::FCC_LE: return SPCC::FCC_G;
+ case SPCC::FCC_UG: return SPCC::FCC_ULE;
+ case SPCC::FCC_ULE: return SPCC::FCC_UG;
+ case SPCC::FCC_L: return SPCC::FCC_GE;
+ case SPCC::FCC_GE: return SPCC::FCC_L;
+ case SPCC::FCC_UL: return SPCC::FCC_UGE;
+ case SPCC::FCC_UGE: return SPCC::FCC_UL;
+ case SPCC::FCC_LG: return SPCC::FCC_UE;
+ case SPCC::FCC_UE: return SPCC::FCC_LG;
+ case SPCC::FCC_NE: return SPCC::FCC_E;
+ case SPCC::FCC_E: return SPCC::FCC_NE;
+ }
+}
+
+
+bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const
+{
+
+ MachineBasicBlock::iterator I = MBB.end();
+ MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+
+ if (I->isDebugValue())
+ continue;
+
+ //When we see a non-terminator, we are done
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ //Terminator is not a branch
+ if (!I->getDesc().isBranch())
+ return true;
+
+ //Handle Unconditional branches
+ if (I->getOpcode() == SP::BA) {
+ UnCondBrIter = I;
+
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+
+ Cond.clear();
+ FBB = 0;
+
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ UnCondBrIter = MBB.end();
+ continue;
+ }
+
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ unsigned Opcode = I->getOpcode();
+ if (Opcode != SP::BCOND && Opcode != SP::FBCOND)
+ return true; //Unknown Opcode
+
+ SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm();
+
+ if (Cond.empty()) {
+ MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+ if (AllowModify && UnCondBrIter != MBB.end() &&
+ MBB.isLayoutSuccessor(TargetBB)) {
+
+ //Transform the code
+ //
+ // brCC L1
+ // ba L2
+ // L1:
+ // ..
+ // L2:
+ //
+ // into
+ //
+ // brnCC L2
+ // L1:
+ // ...
+ // L2:
+ //
+ BranchCode = GetOppositeBranchCondition(BranchCode);
+ MachineBasicBlock::iterator OldInst = I;
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode))
+ .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
+ .addMBB(TargetBB);
+ MBB.addSuccessor(TargetBB);
+ OldInst->eraseFromParent();
+ UnCondBrIter->eraseFromParent();
+
+ UnCondBrIter = MBB.end();
+ I = MBB.end();
+ continue;
+ }
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+ //FIXME: Handle subsequent conditional branches
+ //For now, we can't handle multiple conditional branches
+ return true;
+ }
+ return false;
+}
+
unsigned
SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL)const{
- // Can only insert uncond branches so far.
- assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
- BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
- return 1;
+ DebugLoc DL) const {
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "Sparc branch conditions should have one component!");
+
+ if (Cond.empty()) {
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
+ return 1;
+ }
+
+ //Conditional branch
+ unsigned CC = Cond[0].getImm();
+
+ if (IsIntegerCC(CC))
+ BuildMI(&MBB, DL, get(SP::BCOND)).addMBB(TBB).addImm(CC);
+ else
+ BuildMI(&MBB, DL, get(SP::FBCOND)).addMBB(TBB).addImm(CC);
+ if (!FBB)
+ return 1;
+
+ BuildMI(&MBB, DL, get(SP::BA)).addMBB(FBB);
+ return 2;
+}
+
+unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+ while (I != MBB.begin()) {
+ --I;
+
+ if (I->isDebugValue())
+ continue;
+
+ if (I->getOpcode() != SP::BA
+ && I->getOpcode() != SP::BCOND
+ && I->getOpcode() != SP::FBCOND)
+ break; // Not a branch
+
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+ return Count;
}
void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
index c00bd21..b2d24f5 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -58,8 +58,15 @@ public:
/// any side effects other than storing to the stack slot.
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
-
-
+
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const ;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 467ed48..1072323 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -95,10 +95,10 @@ SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
def SDTSPITOF :
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
-def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutFlag]>;
-def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutFlag]>;
-def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>;
-def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>;
+def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>;
+def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
+def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
@@ -106,8 +106,8 @@ def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>;
def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>;
-def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInFlag]>;
-def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInFlag]>;
+def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
// These are target-independent nodes, but have target-specific formats.
def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
@@ -115,16 +115,20 @@ def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
def call : SDNode<"SPISD::CALL", SDT_SPCall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def retflag : SDNode<"SPISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
+ [SDNPHasChain]>;
def getPCX : Operand<i32> {
let PrintMethod = "printGetPCX";
@@ -204,7 +208,7 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSP<outs, ins, asmstr, pattern>;
// GETPCX for PIC
-let Defs = [O7], Uses = [O7] in {
+let Defs = [O7] in {
def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >;
}
@@ -217,6 +221,17 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
+let hasSideEffects = 1, mayStore = 1 in {
+ let rd = 0, rs1 = 0, rs2 = 0 in
+ def FLUSHW : F3_1<0b10, 0b101011, (outs), (ins),
+ "flushw",
+ [(flushw)]>, Requires<[HasV9]>;
+ let rd = 0, rs1 = 1, simm13 = 3 in
+ def TA3 : F3_2<0b10, 0b111010, (outs), (ins),
+ "ta 3",
+ [(flushw)]>;
+}
+
// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the
// fpmover pass.
let Predicates = [HasNoV9] in { // Only emit these in V8 mode.
@@ -233,32 +248,39 @@ let Predicates = [HasNoV9] in { // Only emit these in V8 mode.
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence. This has to handle all
// permutations of selection between i32/f32/f64 on ICC and FCC.
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
+ // Expanded after instruction selection.
+let Uses = [ICC], usesCustomInserter = 1 in {
def SELECT_CC_Int_ICC
: Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
"; SELECT_CC_Int_ICC PSEUDO!",
[(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
imm:$Cond))]>;
- def SELECT_CC_Int_FCC
- : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
- "; SELECT_CC_Int_FCC PSEUDO!",
- [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
- imm:$Cond))]>;
def SELECT_CC_FP_ICC
: Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
"; SELECT_CC_FP_ICC PSEUDO!",
[(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
imm:$Cond))]>;
- def SELECT_CC_FP_FCC
- : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
- "; SELECT_CC_FP_FCC PSEUDO!",
- [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
- imm:$Cond))]>;
+
def SELECT_CC_DFP_ICC
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
"; SELECT_CC_DFP_ICC PSEUDO!",
[(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
imm:$Cond))]>;
+}
+
+let usesCustomInserter = 1, Uses = [FCC] in {
+
+ def SELECT_CC_Int_FCC
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_FCC PSEUDO!",
+ [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
+ imm:$Cond))]>;
+
+ def SELECT_CC_FP_FCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_FCC PSEUDO!",
+ [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
+ imm:$Cond))]>;
def SELECT_CC_DFP_FCC
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
"; SELECT_CC_DFP_FCC PSEUDO!",
@@ -272,6 +294,9 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection.
let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in
def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>;
+
+ let rd = I7.Num, rs1 = G0.Num, simm13 = 8 in
+ def RET: F3_2<2, 0b111000, (outs), (ins), "ret", []>;
}
// Section B.1 - Load Integer Instructions, p. 90
@@ -436,28 +461,34 @@ def LEA_ADDri : F3_2<2, 0b000000,
let Defs = [ICC] in
defm ADDCC : F3_12<"addcc", 0b010000, addc>;
-defm ADDX : F3_12<"addx", 0b001000, adde>;
+let Uses = [ICC] in
+ defm ADDX : F3_12<"addx", 0b001000, adde>;
// Section B.15 - Subtract Instructions, p. 110
defm SUB : F3_12 <"sub" , 0b000100, sub>;
-defm SUBX : F3_12 <"subx" , 0b001100, sube>;
+let Uses = [ICC] in
+ defm SUBX : F3_12 <"subx" , 0b001100, sube>;
-let Defs = [ICC] in {
+let Defs = [ICC] in
defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>;
+let Uses = [ICC], Defs = [ICC] in
def SUBXCCrr: F3_1<2, 0b011100,
(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
"subxcc $b, $c, $dst", []>;
-}
-// Section B.18 - Multiply Instructions, p. 113
-defm UMUL : F3_12np<"umul", 0b001010>;
-defm SMUL : F3_12 <"smul", 0b001011, mul>;
+// Section B.18 - Multiply Instructions, p. 113
+let Defs = [Y] in {
+ defm UMUL : F3_12np<"umul", 0b001010>;
+ defm SMUL : F3_12 <"smul", 0b001011, mul>;
+}
// Section B.19 - Divide Instructions, p. 115
-defm UDIV : F3_12np<"udiv", 0b001110>;
-defm SDIV : F3_12np<"sdiv", 0b001111>;
+let Defs = [Y] in {
+ defm UDIV : F3_12np<"udiv", 0b001110>;
+ defm SDIV : F3_12np<"sdiv", 0b001111>;
+}
// Section B.20 - SAVE and RESTORE, p. 117
defm SAVE : F3_12np<"save" , 0b111100>;
@@ -504,11 +535,12 @@ let Uses = [FCC] in
// Section B.24 - Call and Link Instruction, p. 125
// This is the only Format 1 instruction
-let Uses = [O0, O1, O2, O3, O4, O5],
+let Uses = [O6],
hasDelaySlot = 1, isCall = 1,
Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
- D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15] in {
- def CALL : InstSP<(outs), (ins calltarget:$dst),
+ D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
+ ICC, FCC, Y] in {
+ def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops),
"call $dst", []> {
bits<30> disp;
let op = 1;
@@ -517,28 +549,30 @@ let Uses = [O0, O1, O2, O3, O4, O5],
// indirect calls
def JMPLrr : F3_1<2, 0b111000,
- (outs), (ins MEMrr:$ptr),
+ (outs), (ins MEMrr:$ptr, variable_ops),
"call $ptr",
[(call ADDRrr:$ptr)]>;
def JMPLri : F3_2<2, 0b111000,
- (outs), (ins MEMri:$ptr),
+ (outs), (ins MEMri:$ptr, variable_ops),
"call $ptr",
[(call ADDRri:$ptr)]>;
}
// Section B.28 - Read State Register Instructions
-def RDY : F3_1<2, 0b101000,
- (outs IntRegs:$dst), (ins),
- "rd %y, $dst", []>;
+let Uses = [Y] in
+ def RDY : F3_1<2, 0b101000,
+ (outs IntRegs:$dst), (ins),
+ "rd %y, $dst", []>;
// Section B.29 - Write State Register Instructions
-def WRYrr : F3_1<2, 0b110000,
- (outs), (ins IntRegs:$b, IntRegs:$c),
- "wr $b, $c, %y", []>;
-def WRYri : F3_2<2, 0b110000,
- (outs), (ins IntRegs:$b, i32imm:$c),
- "wr $b, $c, %y", []>;
-
+let Defs = [Y] in {
+ def WRYrr : F3_1<2, 0b110000,
+ (outs), (ins IntRegs:$b, IntRegs:$c),
+ "wr $b, $c, %y", []>;
+ def WRYri : F3_2<2, 0b110000,
+ (outs), (ins IntRegs:$b, i32imm:$c),
+ "wr $b, $c, %y", []>;
+}
// Convert Integer to Floating-point Instructions, p. 141
def FITOS : F3_3<2, 0b110100, 0b011000100,
(outs FPRegs:$dst), (ins FPRegs:$src),
@@ -660,48 +694,57 @@ let Defs = [FCC] in {
let Predicates = [HasV9], Constraints = "$T = $dst" in {
// Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
// FIXME: Add instruction encodings for the JIT some day.
- def MOVICCrr
- : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
- "mov$cc %icc, $F, $dst",
- [(set IntRegs:$dst,
- (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
- def MOVICCri
- : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
- "mov$cc %icc, $F, $dst",
- [(set IntRegs:$dst,
- (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
-
- def MOVFCCrr
- : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
- "mov$cc %fcc0, $F, $dst",
- [(set IntRegs:$dst,
- (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
- def MOVFCCri
- : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
- "mov$cc %fcc0, $F, $dst",
- [(set IntRegs:$dst,
- (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
-
- def FMOVS_ICC
- : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
- "fmovs$cc %icc, $F, $dst",
- [(set FPRegs:$dst,
- (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
- def FMOVD_ICC
- : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
- "fmovd$cc %icc, $F, $dst",
- [(set DFPRegs:$dst,
- (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
- def FMOVS_FCC
- : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
- "fmovs$cc %fcc0, $F, $dst",
- [(set FPRegs:$dst,
- (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
- def FMOVD_FCC
- : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
- "fmovd$cc %fcc0, $F, $dst",
- [(set DFPRegs:$dst,
- (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ let Uses = [ICC] in {
+ def MOVICCrr
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ def MOVICCri
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+ }
+
+ let Uses = [FCC] in {
+ def MOVFCCrr
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ def MOVFCCri
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+ }
+
+ let Uses = [ICC] in {
+ def FMOVS_ICC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %icc, $F, $dst",
+ [(set FPRegs:$dst,
+ (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ def FMOVD_ICC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %icc, $F, $dst",
+ [(set DFPRegs:$dst,
+ (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ }
+
+ let Uses = [FCC] in {
+ def FMOVS_FCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %fcc0, $F, $dst",
+ [(set FPRegs:$dst,
+ (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ def FMOVD_FCC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %fcc0, $F, $dst",
+ [(set DFPRegs:$dst,
+ (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ }
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
index e34c131..0b74308 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -24,16 +24,23 @@ namespace llvm {
/// VarArgsFrameOffset - Frame offset to start of varargs area.
int VarArgsFrameOffset;
+ /// SRetReturnReg - Holds the virtual register into which the sret
+ /// argument is passed.
+ unsigned SRetReturnReg;
public:
- SparcMachineFunctionInfo() : GlobalBaseReg(0), VarArgsFrameOffset(0) {}
+ SparcMachineFunctionInfo()
+ : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
explicit SparcMachineFunctionInfo(MachineFunction &MF)
- : GlobalBaseReg(0), VarArgsFrameOffset(0) {}
+ : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
};
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index c85db20..b010d04 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -52,10 +52,6 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const {
- return false;
-}
-
void SparcRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@@ -112,55 +108,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
void SparcRegisterInfo::
processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
-void SparcRegisterInfo::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- // Get the number of bytes to allocate from the FrameInfo
- int NumBytes = (int) MFI->getStackSize();
-
- // Emit the correct save instruction based on the number of bytes in
- // the frame. Minimum stack frame size according to V8 ABI is:
- // 16 words for register window spill
- // 1 word for address of returned aggregate-value
- // + 6 words for passing parameters on the stack
- // ----------
- // 23 words * 4 bytes per word = 92 bytes
- NumBytes += 92;
-
- // Round up to next doubleword boundary -- a double-word boundary
- // is required by the ABI.
- NumBytes = (NumBytes + 7) & ~7;
- NumBytes = -NumBytes;
-
- if (NumBytes >= -4096) {
- BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
- .addReg(SP::O6).addImm(NumBytes);
- } else {
- // Emit this the hard way. This clobbers G1 which we always know is
- // available here.
- unsigned OffHi = (unsigned)NumBytes >> 10U;
- BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
- // Emit G1 = G1 + I6
- BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
- .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
- BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
- .addReg(SP::O6).addReg(SP::G1);
- }
-}
-
-void SparcRegisterInfo::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- DebugLoc dl = MBBI->getDebugLoc();
- assert(MBBI->getOpcode() == SP::RETL &&
- "Can only put epilog before 'retl' instruction!");
- BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
- .addReg(SP::G0);
-}
-
unsigned SparcRegisterInfo::getRARegister() const {
return SP::I7;
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index 020ce56..d930b53 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -26,16 +26,14 @@ class Type;
struct SparcRegisterInfo : public SparcGenRegisterInfo {
SparcSubtarget &Subtarget;
const TargetInstrInfo &TII;
-
+
SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
- /// Code Generation virtual methods...
+ /// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasFP(const MachineFunction &MF) const;
-
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
@@ -45,9 +43,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
index fede929..5ef4dae 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -45,6 +45,9 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
def ICC : SparcCtrlReg<"ICC">;
def FCC : SparcCtrlReg<"FCC">;
+// Y register
+def Y : SparcCtrlReg<"Y">;
+
// Integer registers
def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index b58d6ba..b84eab5 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -10,9 +10,9 @@
//
//===----------------------------------------------------------------------===//
+#include "Sparc.h"
#include "SparcMCAsmInfo.h"
#include "SparcTargetMachine.h"
-#include "Sparc.h"
#include "llvm/PassManager.h"
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
@@ -34,8 +34,8 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT,
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS, is64bit),
DataLayout(Subtarget.getDataLayout()),
- TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
- FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
+ TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
+ FrameLowering(Subtarget) {
}
bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
index 322c82a..c4bb6bd 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -14,13 +14,14 @@
#ifndef SPARCTARGETMACHINE_H
#define SPARCTARGETMACHINE_H
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "SparcInstrInfo.h"
-#include "SparcSubtarget.h"
#include "SparcISelLowering.h"
+#include "SparcFrameLowering.h"
#include "SparcSelectionDAGInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -30,13 +31,15 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcTargetLowering TLInfo;
SparcSelectionDAGInfo TSInfo;
SparcInstrInfo InstrInfo;
- TargetFrameInfo FrameInfo;
+ SparcFrameLowering FrameLowering;
public:
SparcTargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool is64bit);
virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
virtual const SparcSubtarget *getSubtargetImpl() const{ return &Subtarget; }
virtual const SparcRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
diff --git a/contrib/llvm/lib/Target/SubtargetFeature.cpp b/contrib/llvm/lib/Target/SubtargetFeature.cpp
index b35190a..3cf95b5 100644
--- a/contrib/llvm/lib/Target/SubtargetFeature.cpp
+++ b/contrib/llvm/lib/Target/SubtargetFeature.cpp
@@ -18,6 +18,7 @@
#include <algorithm>
#include <cassert>
#include <cctype>
+#include <cstdlib>
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -162,7 +163,7 @@ static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
<< "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
- exit(1);
+ std::exit(1);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index d7ac8f5..fd4d8b7 100644
--- a/contrib/llvm/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -55,9 +55,15 @@ namespace {
void printS16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
O << (int16_t)MI->getOperand(OpNum).getImm();
}
+ void printU16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+ O << (uint16_t)MI->getOperand(OpNum).getImm();
+ }
void printS32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
O << (int32_t)MI->getOperand(OpNum).getImm();
}
+ void printU32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+ O << (uint32_t)MI->getOperand(OpNum).getImm();
+ }
void printInstruction(const MachineInstr *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
new file mode 100644
index 0000000..2ad84a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -0,0 +1,386 @@
+//=====- SystemZFrameLowering.cpp - SystemZ Frame Information ------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZFrameLowering.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+SystemZFrameLowering::SystemZFrameLowering(const SystemZSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, -160), STI(sti) {
+ // Fill the spill offsets map
+ static const unsigned SpillOffsTab[][2] = {
+ { SystemZ::R2D, 0x10 },
+ { SystemZ::R3D, 0x18 },
+ { SystemZ::R4D, 0x20 },
+ { SystemZ::R5D, 0x28 },
+ { SystemZ::R6D, 0x30 },
+ { SystemZ::R7D, 0x38 },
+ { SystemZ::R8D, 0x40 },
+ { SystemZ::R9D, 0x48 },
+ { SystemZ::R10D, 0x50 },
+ { SystemZ::R11D, 0x58 },
+ { SystemZ::R12D, 0x60 },
+ { SystemZ::R13D, 0x68 },
+ { SystemZ::R14D, 0x70 },
+ { SystemZ::R15D, 0x78 }
+ };
+
+ RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+
+ for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i)
+ RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1];
+}
+
+/// needsFP - Return true if the specified function should have a dedicated
+/// frame pointer register. This is true if the function has variable sized
+/// allocas or if frame pointer elimination is disabled.
+bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ int64_t NumBytes, const TargetInstrInfo &TII) {
+ unsigned Opc; uint64_t Chunk;
+ bool isSub = NumBytes < 0;
+ uint64_t Offset = isSub ? -NumBytes : NumBytes;
+
+ if (Offset >= (1LL << 15) - 1) {
+ Opc = SystemZ::ADD64ri32;
+ Chunk = (1LL << 31) - 1;
+ } else {
+ Opc = SystemZ::ADD64ri16;
+ Chunk = (1LL << 15) - 1;
+ }
+
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ while (Offset) {
+ uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
+ .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal);
+ // The PSW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ Offset -= ThisVal;
+ }
+}
+
+void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SystemZInstrInfo &TII =
+ *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+ SystemZMachineFunctionInfo *SystemZMFI =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ // Note that area for callee-saved stuff is already allocated, thus we need to
+ // 'undo' the stack movement.
+ uint64_t StackSize = MFI->getStackSize();
+ StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+ uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
+
+ // Skip the callee-saved push instructions.
+ while (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == SystemZ::MOV64mr ||
+ MBBI->getOpcode() == SystemZ::MOV64mrm))
+ ++MBBI;
+
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ // adjust stack pointer: R15 -= numbytes
+ if (StackSize || MFI->hasCalls()) {
+ assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+ "Invalid stack frame calculation!");
+ emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII);
+ }
+
+ if (hasFP(MF)) {
+ // Update R11 with the new base value...
+ BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D)
+ .addReg(SystemZ::R15D);
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(SystemZ::R11D);
+
+ }
+}
+
+void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SystemZInstrInfo &TII =
+ *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+ SystemZMachineFunctionInfo *SystemZMFI =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ unsigned RetOpcode = MBBI->getOpcode();
+
+ switch (RetOpcode) {
+ case SystemZ::RET: break; // These are ok
+ default:
+ assert(0 && "Can only insert epilog into returning blocks");
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo
+ // Note that area for callee-saved stuff is already allocated, thus we need to
+ // 'undo' the stack movement.
+ uint64_t StackSize =
+ MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
+
+ // Skip the final terminator instruction.
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ --MBBI;
+ if (!PI->getDesc().isTerminator())
+ break;
+ }
+
+ // During callee-saved restores emission stack frame was not yet finialized
+ // (and thus - the stack size was unknown). Tune the offset having full stack
+ // size in hands.
+ if (StackSize || MFI->hasCalls()) {
+ assert((MBBI->getOpcode() == SystemZ::MOV64rmm ||
+ MBBI->getOpcode() == SystemZ::MOV64rm) &&
+ "Expected to see callee-save register restore code");
+ assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+ "Invalid stack frame calculation!");
+
+ unsigned i = 0;
+ MachineInstr &MI = *MBBI;
+ while (!MI.getOperand(i).isImm()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Unexpected restore code!");
+ }
+
+ uint64_t Offset = NumBytes + MI.getOperand(i).getImm();
+ // If Offset does not fit into 20-bit signed displacement field we need to
+ // emit some additional code...
+ if (Offset > 524287) {
+ // Fold the displacement into load instruction as much as possible.
+ NumBytes = Offset - 524287;
+ Offset = 524287;
+ emitSPUpdate(MBB, MBBI, NumBytes, TII);
+ }
+
+ MI.getOperand(i).ChangeToImmediate(Offset);
+ }
+}
+
+int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SystemZMachineFunctionInfo *SystemZMFI =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment();
+ uint64_t StackSize = MFI->getStackSize();
+
+ // Fixed objects are really located in the "previous" frame.
+ if (FI < 0)
+ StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+ Offset += StackSize - getOffsetOfLocalArea();
+
+ // Skip the register save area if we generated the stack frame.
+ if (StackSize || MFI->hasCalls())
+ Offset -= getOffsetOfLocalArea();
+
+ return Offset;
+}
+
+bool
+SystemZFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ unsigned CalleeFrameSize = 0;
+
+ // Scan the callee-saved and find the bounds of register spill area.
+ unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (!SystemZ::FP64RegClass.contains(Reg)) {
+ unsigned Offset = RegSpillOffsets[Reg];
+ CalleeFrameSize += 8;
+ if (StartOffset > Offset) {
+ LowReg = Reg; StartOffset = Offset;
+ }
+ if (EndOffset < Offset) {
+ HighReg = Reg; EndOffset = RegSpillOffsets[Reg];
+ }
+ }
+ }
+
+ // Save information for epilogue inserter.
+ MFI->setCalleeSavedFrameSize(CalleeFrameSize);
+ MFI->setLowReg(LowReg); MFI->setHighReg(HighReg);
+
+ // Save GPRs
+ if (StartOffset) {
+ // Build a store instruction. Use STORE MULTIPLE instruction if there are many
+ // registers to store, otherwise - just STORE.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
+ SystemZ::MOV64mr : SystemZ::MOV64mrm)));
+
+ // Add store operands.
+ MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+ if (LowReg == HighReg)
+ MIB.addReg(0);
+ MIB.addReg(LowReg, RegState::Kill);
+ if (LowReg != HighReg)
+ MIB.addReg(HighReg, RegState::Kill);
+
+ // Do a second scan adding regs as being killed by instruction
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ if (Reg != LowReg && Reg != HighReg)
+ MIB.addReg(Reg, RegState::ImplicitKill);
+ }
+ }
+
+ // Save FPRs
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (SystemZ::FP64RegClass.contains(Reg)) {
+ MBB.addLiveIn(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(),
+ &SystemZ::FP64RegClass, TRI);
+ }
+ }
+
+ return true;
+}
+
+bool
+SystemZFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+
+ // Restore FP registers
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (SystemZ::FP64RegClass.contains(Reg))
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ &SystemZ::FP64RegClass, TRI);
+ }
+
+ // Restore GP registers
+ unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg();
+ unsigned StartOffset = RegSpillOffsets[LowReg];
+
+ if (StartOffset) {
+ // Build a load instruction. Use LOAD MULTIPLE instruction if there are many
+ // registers to load, otherwise - just LOAD.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
+ SystemZ::MOV64rm : SystemZ::MOV64rmm)));
+ // Add store operands.
+ MIB.addReg(LowReg, RegState::Define);
+ if (LowReg != HighReg)
+ MIB.addReg(HighReg, RegState::Define);
+
+ MIB.addReg(hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+ MIB.addImm(StartOffset);
+ if (LowReg == HighReg)
+ MIB.addReg(0);
+
+ // Do a second scan adding regs as being defined by instruction
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (Reg != LowReg && Reg != HighReg)
+ MIB.addReg(Reg, RegState::ImplicitDefine);
+ }
+ }
+
+ return true;
+}
+
+void
+SystemZFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Determine whether R15/R14 will ever be clobbered inside the function. And
+ // if yes - mark it as 'callee' saved.
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Check whether high FPRs are ever used, if yes - we need to save R15 as
+ // well.
+ static const unsigned HighFPRs[] = {
+ SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L,
+ SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
+ SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
+ SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
+ };
+
+ bool HighFPRsUsed = false;
+ for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i)
+ HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]);
+
+ if (FFI->hasCalls())
+ /* FIXME: function is varargs */
+ /* FIXME: function grabs RA */
+ /* FIXME: function calls eh_return */
+ MRI.setPhysRegUsed(SystemZ::R14D);
+
+ if (HighFPRsUsed ||
+ FFI->hasCalls() ||
+ FFI->getObjectIndexEnd() != 0 || // Contains automatic variables
+ FFI->hasVarSizedObjects() // Function calls dynamic alloca's
+ /* FIXME: function is varargs */)
+ MRI.setPhysRegUsed(SystemZ::R15D);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
new file mode 100644
index 0000000..1284b68
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -0,0 +1,57 @@
+//=- SystemZFrameLowering.h - Define frame lowering for z/System -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZ_FRAMEINFO_H
+#define SYSTEMZ_FRAMEINFO_H
+
+#include "SystemZ.h"
+#include "SystemZSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/ADT/IndexedMap.h"
+
+namespace llvm {
+ class SystemZSubtarget;
+
+class SystemZFrameLowering : public TargetFrameLowering {
+ IndexedMap<unsigned> RegSpillOffsets;
+protected:
+ const SystemZSubtarget &STI;
+
+public:
+ explicit SystemZFrameLowering(const SystemZSubtarget &sti);
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const;
+
+ bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
+ bool hasFP(const MachineFunction &MF) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index ed290ca..2186ff1 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -120,18 +120,17 @@ namespace {
#include "SystemZGenDAGISel.inc"
private:
- bool SelectAddrRI12Only(SDNode *Op, SDValue& Addr,
+ bool SelectAddrRI12Only(SDValue& Addr,
SDValue &Base, SDValue &Disp);
- bool SelectAddrRI12(SDNode *Op, SDValue& Addr,
+ bool SelectAddrRI12(SDValue& Addr,
SDValue &Base, SDValue &Disp,
bool is12BitOnly = false);
- bool SelectAddrRI(SDNode *Op, SDValue& Addr,
- SDValue &Base, SDValue &Disp);
- bool SelectAddrRRI12(SDNode *Op, SDValue Addr,
+ bool SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp);
+ bool SelectAddrRRI12(SDValue Addr,
SDValue &Base, SDValue &Disp, SDValue &Index);
- bool SelectAddrRRI20(SDNode *Op, SDValue Addr,
+ bool SelectAddrRRI20(SDValue Addr,
SDValue &Base, SDValue &Disp, SDValue &Index);
- bool SelectLAAddr(SDNode *Op, SDValue Addr,
+ bool SelectLAAddr(SDValue Addr,
SDValue &Base, SDValue &Disp, SDValue &Index);
SDNode *Select(SDNode *Node);
@@ -142,8 +141,6 @@ namespace {
bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
bool is12Bit, unsigned Depth = 0);
bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
- bool MatchAddressRI(SDValue N, SystemZRRIAddressMode &AM,
- bool is12Bit);
};
} // end anonymous namespace
@@ -355,12 +352,12 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM,
/// Returns true if the address can be represented by a base register plus
/// an unsigned 12-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDNode *Op, SDValue& Addr,
+bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue &Addr,
SDValue &Base, SDValue &Disp) {
- return SelectAddrRI12(Op, Addr, Base, Disp, /*is12BitOnly*/true);
+ return SelectAddrRI12(Addr, Base, Disp, /*is12BitOnly*/true);
}
-bool SystemZDAGToDAGISel::SelectAddrRI12(SDNode *Op, SDValue& Addr,
+bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue &Addr,
SDValue &Base, SDValue &Disp,
bool is12BitOnly) {
SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
@@ -410,7 +407,7 @@ bool SystemZDAGToDAGISel::SelectAddrRI12(SDNode *Op, SDValue& Addr,
/// Returns true if the address can be represented by a base register plus
/// a signed 20-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI(SDNode *Op, SDValue& Addr,
+bool SystemZDAGToDAGISel::SelectAddrRI(SDValue& Addr,
SDValue &Base, SDValue &Disp) {
SystemZRRIAddressMode AM(/*isRI*/true);
bool Done = false;
@@ -453,7 +450,7 @@ bool SystemZDAGToDAGISel::SelectAddrRI(SDNode *Op, SDValue& Addr,
/// Returns true if the address can be represented by a base register plus
/// index register plus an unsigned 12-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI12(SDNode *Op, SDValue Addr,
+bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Addr,
SDValue &Base, SDValue &Disp, SDValue &Index) {
SystemZRRIAddressMode AM20, AM12;
bool Done = false;
@@ -502,7 +499,7 @@ bool SystemZDAGToDAGISel::SelectAddrRRI12(SDNode *Op, SDValue Addr,
/// Returns true if the address can be represented by a base register plus
/// index register plus a signed 20-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI20(SDNode *Op, SDValue Addr,
+bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Addr,
SDValue &Base, SDValue &Disp, SDValue &Index) {
SystemZRRIAddressMode AM;
bool Done = false;
@@ -546,7 +543,7 @@ bool SystemZDAGToDAGISel::SelectAddrRRI20(SDNode *Op, SDValue Addr,
/// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LA/LAY instruction.
-bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr,
+bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Addr,
SDValue &Base, SDValue &Disp, SDValue &Index) {
SystemZRRIAddressMode AM;
@@ -583,7 +580,7 @@ bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Disp, SDValue &Index) {
if (ISD::isNON_EXTLoad(N.getNode()) &&
IsLegalToFold(N, P, P, OptLevel))
- return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index);
+ return SelectAddrRRI20(N.getOperand(1), Base, Disp, Index);
return false;
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 67f739f..d694f2e 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -147,8 +147,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
setOperationAction(ISD::FREM, MVT::f64, Expand);
// We have only 64-bit bitconverts
- setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
- setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
@@ -341,7 +341,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
// from this parameter
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0);
}
@@ -377,8 +377,8 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
-
MachineFunction &MF = DAG.getMachineFunction();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
// Offset to first argument stack slot.
const unsigned FirstArgOffset = 160;
@@ -431,7 +431,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
if (StackPtr.getNode() == 0)
StackPtr =
DAG.getCopyFromReg(Chain, dl,
- (RegInfo->hasFP(MF) ?
+ (TFI->hasFP(MF) ?
SystemZ::R11D : SystemZ::R15D),
getPointerTy());
@@ -441,7 +441,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
DAG.getIntPtrConstant(Offset));
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), Offset,
+ MachinePointerInfo(),
false, false, 0));
}
}
@@ -471,7 +471,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
// Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
@@ -710,7 +710,7 @@ SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op,
SDValue SystemZCC;
SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SmallVector<SDValue, 4> Ops;
Ops.push_back(TrueV);
Ops.push_back(FalseV);
@@ -747,7 +747,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
if (ExtraLoadRequired)
Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
- PseudoSourceValue::getGOT(), 0, false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
index fa87061..2f2ef08 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -115,9 +115,9 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
if (TID.mayStore())
Flags |= MachineMemOperand::MOStore;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- Flags, Offset,
- MFI.getObjectSize(FI),
+ MF.getMachineMemOperand(MachinePointerInfo(
+ PseudoSourceValue::getFixedStack(FI), Offset),
+ Flags, MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
return addOffset(MIB.addFrameIndex(FI), Offset)
.addMemOperand(MMO);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 367bed3..be52803 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -28,28 +28,6 @@ using namespace llvm;
SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
: TargetInstrInfoImpl(SystemZInsts, array_lengthof(SystemZInsts)),
RI(tm, *this), TM(tm) {
- // Fill the spill offsets map
- static const unsigned SpillOffsTab[][2] = {
- { SystemZ::R2D, 0x10 },
- { SystemZ::R3D, 0x18 },
- { SystemZ::R4D, 0x20 },
- { SystemZ::R5D, 0x28 },
- { SystemZ::R6D, 0x30 },
- { SystemZ::R7D, 0x38 },
- { SystemZ::R8D, 0x40 },
- { SystemZ::R9D, 0x48 },
- { SystemZ::R10D, 0x50 },
- { SystemZ::R11D, 0x58 },
- { SystemZ::R12D, 0x60 },
- { SystemZ::R13D, 0x68 },
- { SystemZ::R14D, 0x70 },
- { SystemZ::R15D, 0x78 }
- };
-
- RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
-
- for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i)
- RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1];
}
/// isGVStub - Return true if the GV requires an extra load to get the
@@ -211,134 +189,6 @@ unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
-bool
-SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
- unsigned CalleeFrameSize = 0;
-
- // Scan the callee-saved and find the bounds of register spill area.
- unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (!SystemZ::FP64RegClass.contains(Reg)) {
- unsigned Offset = RegSpillOffsets[Reg];
- CalleeFrameSize += 8;
- if (StartOffset > Offset) {
- LowReg = Reg; StartOffset = Offset;
- }
- if (EndOffset < Offset) {
- HighReg = Reg; EndOffset = RegSpillOffsets[Reg];
- }
- }
- }
-
- // Save information for epilogue inserter.
- MFI->setCalleeSavedFrameSize(CalleeFrameSize);
- MFI->setLowReg(LowReg); MFI->setHighReg(HighReg);
-
- // Save GPRs
- if (StartOffset) {
- // Build a store instruction. Use STORE MULTIPLE instruction if there are many
- // registers to store, otherwise - just STORE.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, get((LowReg == HighReg ?
- SystemZ::MOV64mr : SystemZ::MOV64mrm)));
-
- // Add store operands.
- MIB.addReg(SystemZ::R15D).addImm(StartOffset);
- if (LowReg == HighReg)
- MIB.addReg(0);
- MIB.addReg(LowReg, RegState::Kill);
- if (LowReg != HighReg)
- MIB.addReg(HighReg, RegState::Kill);
-
- // Do a second scan adding regs as being killed by instruction
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- // Add the callee-saved register as live-in. It's killed at the spill.
- MBB.addLiveIn(Reg);
- if (Reg != LowReg && Reg != HighReg)
- MIB.addReg(Reg, RegState::ImplicitKill);
- }
- }
-
- // Save FPRs
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (SystemZ::FP64RegClass.contains(Reg)) {
- MBB.addLiveIn(Reg);
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(),
- &SystemZ::FP64RegClass, &RI);
- }
- }
-
- return true;
-}
-
-bool
-SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- const TargetRegisterInfo *RegInfo= MF.getTarget().getRegisterInfo();
- SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
-
- // Restore FP registers
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (SystemZ::FP64RegClass.contains(Reg))
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
- &SystemZ::FP64RegClass, &RI);
- }
-
- // Restore GP registers
- unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg();
- unsigned StartOffset = RegSpillOffsets[LowReg];
-
- if (StartOffset) {
- // Build a load instruction. Use LOAD MULTIPLE instruction if there are many
- // registers to load, otherwise - just LOAD.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, get((LowReg == HighReg ?
- SystemZ::MOV64rm : SystemZ::MOV64rmm)));
- // Add store operands.
- MIB.addReg(LowReg, RegState::Define);
- if (LowReg != HighReg)
- MIB.addReg(HighReg, RegState::Define);
-
- MIB.addReg((RegInfo->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D));
- MIB.addImm(StartOffset);
- if (LowReg == HighReg)
- MIB.addReg(0);
-
- // Do a second scan adding regs as being defined by instruction
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (Reg != LowReg && Reg != HighReg)
- MIB.addReg(Reg, RegState::ImplicitDefine);
- }
- }
-
- return true;
-}
-
bool SystemZInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 1 && "Invalid Xbranch condition!");
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index c248f24..6cb7200 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -50,7 +50,6 @@ namespace SystemZII {
class SystemZInstrInfo : public TargetInstrInfoImpl {
const SystemZRegisterInfo RI;
SystemZTargetMachine &TM;
- IndexedMap<unsigned> RegSpillOffsets;
public:
explicit SystemZInstrInfo(SystemZTargetMachine &TM);
@@ -80,15 +79,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
- virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 8df07c0..11a39fc 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -46,15 +46,15 @@ def SDT_Address : SDTypeProfile<1, 1,
// SystemZ Specific Node Definitions.
//===----------------------------------------------------------------------===//
def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOptInGlue]>;
def SystemZcall : SDNode<"SystemZISD::CALL", SDT_SystemZCall,
- [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>;
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
def SystemZcallseq_start :
SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def SystemZcallseq_end :
SDNode<"ISD::CALLSEQ_END", SDT_SystemZCallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def SystemZcmp : SDNode<"SystemZISD::CMP", SDT_CmpTest>;
def SystemZucmp : SDNode<"SystemZISD::UCMP", SDT_CmpTest>;
def SystemZbrcond : SDNode<"SystemZISD::BRCOND", SDT_BrCond,
@@ -229,19 +229,19 @@ def MOV64ri16 : RII<0x9A7,
[(set GR64:$dst, immSExt16:$src)]>;
def MOV64rill16 : RII<0xFA5,
- (outs GR64:$dst), (ins i64imm:$src),
+ (outs GR64:$dst), (ins u16imm:$src),
"llill\t{$dst, $src}",
[(set GR64:$dst, i64ll16:$src)]>;
def MOV64rilh16 : RII<0xEA5,
- (outs GR64:$dst), (ins i64imm:$src),
+ (outs GR64:$dst), (ins u16imm:$src),
"llilh\t{$dst, $src}",
[(set GR64:$dst, i64lh16:$src)]>;
def MOV64rihl16 : RII<0xDA5,
- (outs GR64:$dst), (ins i64imm:$src),
+ (outs GR64:$dst), (ins u16imm:$src),
"llihl\t{$dst, $src}",
[(set GR64:$dst, i64hl16:$src)]>;
def MOV64rihh16 : RII<0xCA5,
- (outs GR64:$dst), (ins i64imm:$src),
+ (outs GR64:$dst), (ins u16imm:$src),
"llihh\t{$dst, $src}",
[(set GR64:$dst, i64hh16:$src)]>;
@@ -250,10 +250,10 @@ def MOV64ri32 : RILI<0x1C0,
"lgfi\t{$dst, $src}",
[(set GR64:$dst, immSExt32:$src)]>;
def MOV64rilo32 : RILI<0xFC0,
- (outs GR64:$dst), (ins i64imm:$src),
+ (outs GR64:$dst), (ins u32imm:$src),
"llilf\t{$dst, $src}",
[(set GR64:$dst, i64lo32:$src)]>;
-def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins i64imm:$src),
+def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins u32imm:$src),
"llihf\t{$dst, $src}",
[(set GR64:$dst, i64hi32:$src)]>;
}
@@ -642,42 +642,42 @@ def AND64rm : RXYI<0xE360, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
(implicit PSW)]>;
def AND32rill16 : RII<0xA57,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
"nill\t{$dst, $src2}",
[(set GR32:$dst, (and GR32:$src1, i32ll16c:$src2))]>;
def AND64rill16 : RII<0xA57,
- (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
"nill\t{$dst, $src2}",
[(set GR64:$dst, (and GR64:$src1, i64ll16c:$src2))]>;
def AND32rilh16 : RII<0xA56,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
"nilh\t{$dst, $src2}",
[(set GR32:$dst, (and GR32:$src1, i32lh16c:$src2))]>;
def AND64rilh16 : RII<0xA56,
- (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
"nilh\t{$dst, $src2}",
[(set GR64:$dst, (and GR64:$src1, i64lh16c:$src2))]>;
def AND64rihl16 : RII<0xA55,
- (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
"nihl\t{$dst, $src2}",
[(set GR64:$dst, (and GR64:$src1, i64hl16c:$src2))]>;
def AND64rihh16 : RII<0xA54,
- (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
"nihh\t{$dst, $src2}",
[(set GR64:$dst, (and GR64:$src1, i64hh16c:$src2))]>;
def AND32ri : RILI<0xC0B,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
"nilf\t{$dst, $src2}",
[(set GR32:$dst, (and GR32:$src1, imm:$src2))]>;
def AND64rilo32 : RILI<0xC0B,
- (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+ (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
"nilf\t{$dst, $src2}",
[(set GR64:$dst, (and GR64:$src1, i64lo32c:$src2))]>;
def AND64rihi32 : RILI<0xC0A,
- (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+ (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
"nihf\t{$dst, $src2}",
[(set GR64:$dst, (and GR64:$src1, i64hi32c:$src2))]>;
@@ -707,41 +707,41 @@ def OR64rm : RXYI<0xE381, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
// FIXME: Provide proper encoding!
def OR32ri16 : RII<0xA5B,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
"oill\t{$dst, $src2}",
[(set GR32:$dst, (or GR32:$src1, i32ll16:$src2))]>;
def OR32ri16h : RII<0xA5A,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
"oilh\t{$dst, $src2}",
[(set GR32:$dst, (or GR32:$src1, i32lh16:$src2))]>;
def OR32ri : RILI<0xC0D,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
"oilf\t{$dst, $src2}",
[(set GR32:$dst, (or GR32:$src1, imm:$src2))]>;
def OR64rill16 : RII<0xA5B,
- (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
"oill\t{$dst, $src2}",
[(set GR64:$dst, (or GR64:$src1, i64ll16:$src2))]>;
def OR64rilh16 : RII<0xA5A,
- (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
"oilh\t{$dst, $src2}",
[(set GR64:$dst, (or GR64:$src1, i64lh16:$src2))]>;
def OR64rihl16 : RII<0xA59,
- (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
"oihl\t{$dst, $src2}",
[(set GR64:$dst, (or GR64:$src1, i64hl16:$src2))]>;
def OR64rihh16 : RII<0xA58,
- (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+ (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
"oihh\t{$dst, $src2}",
[(set GR64:$dst, (or GR64:$src1, i64hh16:$src2))]>;
def OR64rilo32 : RILI<0xC0D,
- (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+ (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
"oilf\t{$dst, $src2}",
[(set GR64:$dst, (or GR64:$src1, i64lo32:$src2))]>;
def OR64rihi32 : RILI<0xC0C,
- (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2),
+ (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
"oihf\t{$dst, $src2}",
[(set GR64:$dst, (or GR64:$src1, i64hi32:$src2))]>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
index 4f7f70b..2dc7e7b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
@@ -14,6 +14,7 @@
#include "SystemZMCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
@@ -24,6 +25,6 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
const MCSection *SystemZMCAsmInfo::
getNonexecutableStackSection(MCContext &Ctx) const{
- return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
- 0, SectionKind::getMetadata(), false);
+ return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+ 0, SectionKind::getMetadata());
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
index 0de50fd..8b835cc 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -246,6 +246,14 @@ def s16imm : Operand<i32> {
def s16imm64 : Operand<i64> {
let PrintMethod = "printS16ImmOperand";
}
+// Unsigned i16
+def u16imm : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
// Signed i20
def s20imm : Operand<i32> {
let PrintMethod = "printS20ImmOperand";
@@ -260,6 +268,13 @@ def s32imm : Operand<i32> {
def s32imm64 : Operand<i64> {
let PrintMethod = "printS32ImmOperand";
}
+// Unsigned i32
+def u32imm : Operand<i32> {
+ let PrintMethod = "printU32ImmOperand";
+}
+def u32imm64 : Operand<i64> {
+ let PrintMethod = "printU32ImmOperand";
+}
def imm_pcrel : Operand<i64> {
let PrintMethod = "printPCRelImmOperand";
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index f8d3e6a..28f94f4 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -20,7 +20,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -49,49 +49,21 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- if (hasFP(MF))
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF))
Reserved.set(SystemZ::R11D);
Reserved.set(SystemZ::R14D);
Reserved.set(SystemZ::R15D);
return Reserved;
}
-/// needsFP - Return true if the specified function should have a dedicated
-/// frame pointer register. This is true if the function has variable sized
-/// allocas or if frame pointer elimination is disabled.
-bool SystemZRegisterInfo::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
-}
-
void SystemZRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
MBB.erase(I);
}
-int SystemZRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const SystemZMachineFunctionInfo *SystemZMFI =
- MF.getInfo<SystemZMachineFunctionInfo>();
- int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment();
- uint64_t StackSize = MFI->getStackSize();
-
- // Fixed objects are really located in the "previous" frame.
- if (FI < 0)
- StackSize -= SystemZMFI->getCalleeSavedFrameSize();
-
- Offset += StackSize - TFI.getOffsetOfLocalArea();
-
- // Skip the register save area if we generated the stack frame.
- if (StackSize || MFI->hasCalls())
- Offset -= TFI.getOffsetOfLocalArea();
-
- return Offset;
-}
-
void
SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS) const {
@@ -100,6 +72,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned i = 0;
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
while (!MI.getOperand(i).isFI()) {
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
@@ -107,7 +81,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FrameIndex = MI.getOperand(i).getIndex();
- unsigned BasePtr = (hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+ unsigned BasePtr = (TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
// This must be part of a rri or ri operand memory reference. Replace the
// FrameIndex with base register with BasePtr. Add an offset to the
@@ -117,7 +91,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Offset is a either 12-bit unsigned or 20-bit signed integer.
// FIXME: handle "too long" displacements.
int Offset =
- getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
+ TFI->getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
// Check whether displacement is too long to fit into 12 bit zext field.
MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
@@ -125,178 +99,6 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i+1).ChangeToImmediate(Offset);
}
-void
-SystemZRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- // Determine whether R15/R14 will ever be clobbered inside the function. And
- // if yes - mark it as 'callee' saved.
- MachineFrameInfo *FFI = MF.getFrameInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- // Check whether high FPRs are ever used, if yes - we need to save R15 as
- // well.
- static const unsigned HighFPRs[] = {
- SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L,
- SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
- SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
- SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
- };
-
- bool HighFPRsUsed = false;
- for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i)
- HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]);
-
- if (FFI->hasCalls())
- /* FIXME: function is varargs */
- /* FIXME: function grabs RA */
- /* FIXME: function calls eh_return */
- MRI.setPhysRegUsed(SystemZ::R14D);
-
- if (HighFPRsUsed ||
- FFI->hasCalls() ||
- FFI->getObjectIndexEnd() != 0 || // Contains automatic variables
- FFI->hasVarSizedObjects() // Function calls dynamic alloca's
- /* FIXME: function is varargs */)
- MRI.setPhysRegUsed(SystemZ::R15D);
-}
-
-/// emitSPUpdate - Emit a series of instructions to increment / decrement the
-/// stack pointer by a constant value.
-static
-void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- int64_t NumBytes, const TargetInstrInfo &TII) {
- unsigned Opc; uint64_t Chunk;
- bool isSub = NumBytes < 0;
- uint64_t Offset = isSub ? -NumBytes : NumBytes;
-
- if (Offset >= (1LL << 15) - 1) {
- Opc = SystemZ::ADD64ri32;
- Chunk = (1LL << 31) - 1;
- } else {
- Opc = SystemZ::ADD64ri16;
- Chunk = (1LL << 15) - 1;
- }
-
- DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- while (Offset) {
- uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
- .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal);
- // The PSW implicit def is dead.
- MI->getOperand(3).setIsDead();
- Offset -= ThisVal;
- }
-}
-
-void SystemZRegisterInfo::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- SystemZMachineFunctionInfo *SystemZMFI =
- MF.getInfo<SystemZMachineFunctionInfo>();
- MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- // Get the number of bytes to allocate from the FrameInfo.
- // Note that area for callee-saved stuff is already allocated, thus we need to
- // 'undo' the stack movement.
- uint64_t StackSize = MFI->getStackSize();
- StackSize -= SystemZMFI->getCalleeSavedFrameSize();
-
- uint64_t NumBytes = StackSize - TFI.getOffsetOfLocalArea();
-
- // Skip the callee-saved push instructions.
- while (MBBI != MBB.end() &&
- (MBBI->getOpcode() == SystemZ::MOV64mr ||
- MBBI->getOpcode() == SystemZ::MOV64mrm))
- ++MBBI;
-
- if (MBBI != MBB.end())
- DL = MBBI->getDebugLoc();
-
- // adjust stack pointer: R15 -= numbytes
- if (StackSize || MFI->hasCalls()) {
- assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
- "Invalid stack frame calculation!");
- emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII);
- }
-
- if (hasFP(MF)) {
- // Update R11 with the new base value...
- BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D)
- .addReg(SystemZ::R15D);
-
- // Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
- I != E; ++I)
- I->addLiveIn(SystemZ::R11D);
-
- }
-}
-
-void SystemZRegisterInfo::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- SystemZMachineFunctionInfo *SystemZMFI =
- MF.getInfo<SystemZMachineFunctionInfo>();
- unsigned RetOpcode = MBBI->getOpcode();
-
- switch (RetOpcode) {
- case SystemZ::RET: break; // These are ok
- default:
- assert(0 && "Can only insert epilog into returning blocks");
- }
-
- // Get the number of bytes to allocate from the FrameInfo
- // Note that area for callee-saved stuff is already allocated, thus we need to
- // 'undo' the stack movement.
- uint64_t StackSize =
- MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize();
- uint64_t NumBytes = StackSize - TFI.getOffsetOfLocalArea();
-
- // Skip the final terminator instruction.
- while (MBBI != MBB.begin()) {
- MachineBasicBlock::iterator PI = prior(MBBI);
- --MBBI;
- if (!PI->getDesc().isTerminator())
- break;
- }
-
- // During callee-saved restores emission stack frame was not yet finialized
- // (and thus - the stack size was unknown). Tune the offset having full stack
- // size in hands.
- if (StackSize || MFI->hasCalls()) {
- assert((MBBI->getOpcode() == SystemZ::MOV64rmm ||
- MBBI->getOpcode() == SystemZ::MOV64rm) &&
- "Expected to see callee-save register restore code");
- assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
- "Invalid stack frame calculation!");
-
- unsigned i = 0;
- MachineInstr &MI = *MBBI;
- while (!MI.getOperand(i).isImm()) {
- ++i;
- assert(i < MI.getNumOperands() && "Unexpected restore code!");
- }
-
- uint64_t Offset = NumBytes + MI.getOperand(i).getImm();
- // If Offset does not fit into 20-bit signed displacement field we need to
- // emit some additional code...
- if (Offset > 524287) {
- // Fold the displacement into load instruction as much as possible.
- NumBytes = Offset - 524287;
- Offset = 524287;
- emitSPUpdate(MBB, MBBI, NumBytes, TII);
- }
-
- MI.getOperand(i).ChangeToImmediate(Offset);
- }
-}
-
unsigned SystemZRegisterInfo::getRARegister() const {
assert(0 && "What is the return address register");
return 0;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 5dae865..b450798 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -34,11 +34,6 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
- bool hasFP(const MachineFunction &MF) const;
-
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
@@ -46,13 +41,6 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const;
-
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index 33be8dd..0028c85 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -190,8 +190,8 @@ def GR32 : RegisterClass<"SystemZ", [i32], 32,
GR32Class::iterator
GR32Class::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return SystemZ_REG32_nofp;
else
return SystemZ_REG32;
@@ -199,8 +199,8 @@ def GR32 : RegisterClass<"SystemZ", [i32], 32,
GR32Class::iterator
GR32Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return SystemZ_REG32_nofp + (sizeof(SystemZ_REG32_nofp) / sizeof(unsigned));
else
return SystemZ_REG32 + (sizeof(SystemZ_REG32) / sizeof(unsigned));
@@ -237,8 +237,8 @@ def ADDR32 : RegisterClass<"SystemZ", [i32], 32,
ADDR32Class::iterator
ADDR32Class::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return SystemZ_ADDR32_nofp;
else
return SystemZ_ADDR32;
@@ -246,8 +246,8 @@ def ADDR32 : RegisterClass<"SystemZ", [i32], 32,
ADDR32Class::iterator
ADDR32Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return SystemZ_ADDR32_nofp + (sizeof(SystemZ_ADDR32_nofp) / sizeof(unsigned));
else
return SystemZ_ADDR32 + (sizeof(SystemZ_ADDR32) / sizeof(unsigned));
@@ -284,8 +284,8 @@ def GR64 : RegisterClass<"SystemZ", [i64], 64,
GR64Class::iterator
GR64Class::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return SystemZ_REG64_nofp;
else
return SystemZ_REG64;
@@ -293,8 +293,8 @@ def GR64 : RegisterClass<"SystemZ", [i64], 64,
GR64Class::iterator
GR64Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return SystemZ_REG64_nofp + (sizeof(SystemZ_REG64_nofp) / sizeof(unsigned));
else
return SystemZ_REG64 + (sizeof(SystemZ_REG64) / sizeof(unsigned));
@@ -331,8 +331,8 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
ADDR64Class::iterator
ADDR64Class::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return SystemZ_ADDR64_nofp;
else
return SystemZ_ADDR64;
@@ -340,8 +340,8 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
ADDR64Class::iterator
ADDR64Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return SystemZ_ADDR64_nofp + (sizeof(SystemZ_ADDR64_nofp) / sizeof(unsigned));
else
return SystemZ_ADDR64 + (sizeof(SystemZ_ADDR64) / sizeof(unsigned));
@@ -368,8 +368,8 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
GR64PClass::iterator
GR64PClass::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return SystemZ_REG64P_nofp;
else
return SystemZ_REG64P;
@@ -377,8 +377,8 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
GR64PClass::iterator
GR64PClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return SystemZ_REG64P_nofp + (sizeof(SystemZ_REG64P_nofp) / sizeof(unsigned));
else
return SystemZ_REG64P + (sizeof(SystemZ_REG64P) / sizeof(unsigned));
@@ -405,8 +405,8 @@ def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
GR128Class::iterator
GR128Class::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return SystemZ_REG128_nofp;
else
return SystemZ_REG128;
@@ -414,8 +414,8 @@ def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
GR128Class::iterator
GR128Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return SystemZ_REG128_nofp + (sizeof(SystemZ_REG128_nofp) / sizeof(unsigned));
else
return SystemZ_REG128 + (sizeof(SystemZ_REG128) / sizeof(unsigned));
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index f45827b..1603899 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -30,7 +30,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T,
DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
"-f64:64:64-f128:128:128-a0:16:16-n32:64"),
InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) {
+ FrameLowering(Subtarget) {
if (getRelocationModel() == Reloc::Default)
setRelocationModel(Reloc::Static);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
index 6af829b..524f83d 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -17,11 +17,12 @@
#include "SystemZInstrInfo.h"
#include "SystemZISelLowering.h"
+#include "SystemZFrameLowering.h"
#include "SystemZSelectionDAGInfo.h"
#include "SystemZRegisterInfo.h"
#include "SystemZSubtarget.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -34,15 +35,14 @@ class SystemZTargetMachine : public LLVMTargetMachine {
SystemZInstrInfo InstrInfo;
SystemZTargetLowering TLInfo;
SystemZSelectionDAGInfo TSInfo;
-
- // SystemZ does not have any call stack frame, therefore not having
- // any SystemZ specific FrameInfo class.
- TargetFrameInfo FrameInfo;
+ SystemZFrameLowering FrameLowering;
public:
SystemZTargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
- virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
virtual const SystemZInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetData *getTargetData() const { return &DataLayout;}
virtual const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp
index f5c969a..0919fe4 100644
--- a/contrib/llvm/lib/Target/Target.cpp
+++ b/contrib/llvm/lib/Target/Target.cpp
@@ -7,12 +7,14 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the C bindings for libLLVMTarget.a, which implements
-// target information.
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMTarget.a, which implements target information.
//
//===----------------------------------------------------------------------===//
#include "llvm-c/Target.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
#include "llvm/Target/TargetData.h"
#include "llvm/LLVMContext.h"
@@ -20,6 +22,15 @@
using namespace llvm;
+void llvm::initializeTarget(PassRegistry &Registry) {
+ initializeTargetDataPass(Registry);
+ initializeTargetLibraryInfoPass(Registry);
+}
+
+void LLVMInitializeTarget(LLVMPassRegistryRef R) {
+ initializeTarget(*unwrap(R));
+}
+
LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
return wrap(new TargetData(StringRep));
}
diff --git a/contrib/llvm/lib/Target/TargetAsmInfo.cpp b/contrib/llvm/lib/Target/TargetAsmInfo.cpp
new file mode 100644
index 0000000..6fa5420
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetAsmInfo.cpp
@@ -0,0 +1,27 @@
+//===-- llvm/Target/TargetAsmInfo.cpp - Target Assembly Info --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) {
+ TLOF = &TM.getTargetLowering()->getObjFileLowering();
+ const TargetData &TD = *TM.getTargetData();
+ IsLittleEndian = TD.isLittleEndian();
+ PointerSize = TD.getPointerSize();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
+ StackDir = TFI.getStackGrowthDirection();
+ TRI = TM.getRegisterInfo();
+ TFI.getInitialFrameState(InitialFrameState);
+}
diff --git a/contrib/llvm/lib/Target/TargetData.cpp b/contrib/llvm/lib/Target/TargetData.cpp
index f35c96d..c628df0 100644
--- a/contrib/llvm/lib/Target/TargetData.cpp
+++ b/contrib/llvm/lib/Target/TargetData.cpp
@@ -25,7 +25,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include "llvm/ADT/DenseMap.h"
#include <algorithm>
#include <cstdlib>
@@ -34,7 +34,7 @@ using namespace llvm;
// Handle the Pass registration stuff necessary to use TargetData's.
// Register the default SparcV9 implementation...
-INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true);
+INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true)
char TargetData::ID = 0;
//===----------------------------------------------------------------------===//
@@ -83,7 +83,7 @@ unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
(SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
"Upper bound didn't work!");
-
+
// Multiple fields can have the same offset if any of them are zero sized.
// For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
// at the i32 element, because it is the last element at that offset. This is
@@ -131,6 +131,8 @@ static unsigned getInt(StringRef R) {
}
void TargetData::init(StringRef Desc) {
+ initializeTargetDataPass(*PassRegistry::getPassRegistry());
+
LayoutMap = 0;
LittleEndian = false;
PointerMemSize = 8;
@@ -153,16 +155,16 @@ void TargetData::init(StringRef Desc) {
std::pair<StringRef, StringRef> Split = Desc.split('-');
StringRef Token = Split.first;
Desc = Split.second;
-
+
if (Token.empty())
continue;
-
+
Split = Token.split(':');
StringRef Specifier = Split.first;
Token = Split.second;
-
+
assert(!Specifier.empty() && "Can't be empty here");
-
+
switch (Specifier[0]) {
case 'E':
LittleEndian = false;
@@ -197,7 +199,7 @@ void TargetData::init(StringRef Desc) {
unsigned Size = getInt(Specifier.substr(1));
Split = Token.split(':');
unsigned ABIAlign = getInt(Split.first) / 8;
-
+
Split = Split.second.split(':');
unsigned PrefAlign = getInt(Split.first) / 8;
if (PrefAlign == 0)
@@ -215,7 +217,7 @@ void TargetData::init(StringRef Desc) {
Token = Split.second;
} while (!Specifier.empty() || !Token.empty());
break;
-
+
default:
break;
}
@@ -231,7 +233,7 @@ TargetData::TargetData() : ImmutablePass(ID) {
"Tool did not specify a TargetData to use?");
}
-TargetData::TargetData(const Module *M)
+TargetData::TargetData(const Module *M)
: ImmutablePass(ID) {
init(M->getDataLayout());
}
@@ -249,14 +251,14 @@ TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
return;
}
}
-
+
Alignments.push_back(TargetAlignElem::get(align_type, abi_align,
pref_align, bit_width));
}
-/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
+/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
/// preferred if ABIInfo = false) the target wants for the specified datatype.
-unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
+unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
uint32_t BitWidth, bool ABIInfo,
const Type *Ty) const {
// Check to see if we have an exact match and remember the best match we see.
@@ -266,18 +268,18 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
if (Alignments[i].AlignType == AlignType &&
Alignments[i].TypeBitWidth == BitWidth)
return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
-
+
// The best match so far depends on what we're looking for.
- if (AlignType == INTEGER_ALIGN &&
+ if (AlignType == INTEGER_ALIGN &&
Alignments[i].AlignType == INTEGER_ALIGN) {
// The "best match" for integers is the smallest size that is larger than
// the BitWidth requested.
- if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
+ if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
BestMatchIdx = i;
// However, if there isn't one that's larger, then we must use the
// largest one we have (see below)
- if (LargestInt == -1 ||
+ if (LargestInt == -1 ||
Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
LargestInt = i;
}
@@ -322,8 +324,8 @@ class StructLayoutMap : public AbstractTypeUser {
I->first->removeAbstractTypeUser(this);
LayoutInfo.erase(I);
}
-
-
+
+
/// refineAbstractType - The callback method invoked when an abstract type is
/// resolved to another type. An object must override this method to update
/// its internal state to reference NewType instead of OldType.
@@ -385,21 +387,21 @@ TargetData::~TargetData() {
const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
if (!LayoutMap)
LayoutMap = new StructLayoutMap();
-
+
StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
StructLayout *&SL = (*STM)[Ty];
if (SL) return SL;
- // Otherwise, create the struct layout. Because it is variable length, we
+ // Otherwise, create the struct layout. Because it is variable length, we
// malloc it, then use placement new.
int NumElts = Ty->getNumElements();
StructLayout *L =
(StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
-
+
// Set SL before calling StructLayout's ctor. The ctor could cause other
// entries to be added to TheMap, invalidating our reference.
SL = L;
-
+
new (L) StructLayout(Ty, *this);
if (Ty->isAbstract())
@@ -414,14 +416,14 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
/// avoid a dangling pointer in this cache.
void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
if (!LayoutMap) return; // No cache.
-
+
static_cast<StructLayoutMap*>(LayoutMap)->InvalidateEntry(Ty);
}
std::string TargetData::getStringRepresentation() const {
std::string Result;
raw_string_ostream OS(Result);
-
+
OS << (LittleEndian ? "e" : "E")
<< "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8
<< ':' << PointerPrefAlign*8;
@@ -430,10 +432,10 @@ std::string TargetData::getStringRepresentation() const {
OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
<< AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
}
-
+
if (!LegalIntWidths.empty()) {
OS << "-n" << (unsigned)LegalIntWidths[0];
-
+
for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
OS << ':' << (unsigned)LegalIntWidths[i];
}
@@ -461,6 +463,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
case Type::FloatTyID:
return 32;
case Type::DoubleTyID:
+ case Type::X86_MMXTyID:
return 64;
case Type::PPC_FP128TyID:
case Type::FP128TyID:
@@ -523,6 +526,7 @@ unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
case Type::X86_FP80TyID:
AlignType = FLOAT_ALIGN;
break;
+ case Type::X86_MMXTyID:
case Type::VectorTyID:
AlignType = VECTOR_ALIGN;
break;
diff --git a/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp b/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp
index 3631b35..a661ee9 100644
--- a/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp
+++ b/contrib/llvm/lib/Target/TargetELFWriterInfo.cpp
@@ -17,9 +17,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-TargetELFWriterInfo::TargetELFWriterInfo(TargetMachine &tm) : TM(tm) {
- is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
- isLittleEndian = TM.getTargetData()->isLittleEndian();
+TargetELFWriterInfo::TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_) :
+ is64Bit(is64Bit_), isLittleEndian(isLittleEndian_) {
}
TargetELFWriterInfo::~TargetELFWriterInfo() {}
diff --git a/contrib/llvm/lib/Target/TargetFrameLowering.cpp b/contrib/llvm/lib/Target/TargetFrameLowering.cpp
new file mode 100644
index 0000000..19fd581
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetFrameLowering.cpp
@@ -0,0 +1,53 @@
+//===----- TargetFrameLowering.cpp - Implement target frame interface ------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() {
+}
+
+/// getInitialFrameState - Returns a list of machine moves that are assumed
+/// on entry to a function.
+void
+TargetFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Default is to do nothing.
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is overridden for some targets.
+int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg) const {
+ const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+ // By default, assume all frame indices are referenced via whatever
+ // getFrameRegister() says. The target can override this if it's doing
+ // something different.
+ FrameReg = RI->getFrameRegister(MF);
+ return getFrameIndexOffset(MF, FI);
+}
diff --git a/contrib/llvm/lib/Target/TargetInstrInfo.cpp b/contrib/llvm/lib/Target/TargetInstrInfo.cpp
index c099a7e..97f3bf6 100644
--- a/contrib/llvm/lib/Target/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/TargetInstrInfo.cpp
@@ -12,9 +12,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include <cctype>
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -47,9 +50,85 @@ TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc,
TargetInstrInfo::~TargetInstrInfo() {
}
+unsigned
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Class = MI->getDesc().getSchedClass();
+ unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps)
+ return UOps;
+
+ // The # of u-ops is dynamically determined. The specific target should
+ // override this function to return the right number.
+ return 1;
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ unsigned UseClass = UseMI->getDesc().getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ if (!DefNode->isMachineOpcode())
+ return -1;
+
+ unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+ unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *N) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ if (!N->isMachineOpcode())
+ return 1;
+
+ return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
+bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI,
+ unsigned DefIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 1);
+}
+
/// insertNoop - Insert a noop into the instruction stream at the specified
/// point.
-void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
llvm_unreachable("Target didn't implement insertNoop!");
}
@@ -58,7 +137,7 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
const TargetInstrDesc &TID = MI->getDesc();
if (!TID.isTerminator()) return false;
-
+
// Conditional branch is a special case.
if (TID.isBranch() && !TID.isBarrier())
return true;
@@ -78,15 +157,15 @@ bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
/// may be overloaded in the target code to do that.
unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
const MCAsmInfo &MAI) const {
-
-
+
+
// Count the number of instructions in the asm.
bool atInsnStart = true;
unsigned Length = 0;
for (; *Str; ++Str) {
if (*Str == '\n' || *Str == MAI.getSeparatorChar())
atInsnStart = true;
- if (atInsnStart && !isspace(*Str)) {
+ if (atInsnStart && !std::isspace(*Str)) {
Length += MAI.getMaxInstLength();
atInsnStart = false;
}
@@ -94,6 +173,6 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
strlen(MAI.getCommentString())) == 0)
atInsnStart = false;
}
-
+
return Length;
}
diff --git a/contrib/llvm/lib/Target/TargetLibraryInfo.cpp b/contrib/llvm/lib/Target/TargetLibraryInfo.cpp
new file mode 100644
index 0000000..c8bed18
--- /dev/null
+++ b/contrib/llvm/lib/Target/TargetLibraryInfo.cpp
@@ -0,0 +1,55 @@
+//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetLibraryInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+
+// Register the default implementation.
+INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo",
+ "Target Library Information", false, true)
+char TargetLibraryInfo::ID = 0;
+
+/// initialize - Initialize the set of available library functions based on the
+/// specified target triple. This should be carefully written so that a missing
+/// target triple gets a sane set of defaults.
+static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
+ initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+
+
+ // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
+ if (T.getOS() != Triple::Darwin || T.getDarwinMajorNumber() < 9)
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+
+}
+
+
+TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) {
+ // Default to everything being available.
+ memset(AvailableArray, -1, sizeof(AvailableArray));
+
+ initialize(*this, Triple());
+}
+
+TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
+ // Default to everything being available.
+ memset(AvailableArray, -1, sizeof(AvailableArray));
+
+ initialize(*this, T);
+}
+
+/// disableAllFunctions - This disables all builtins, which is used for options
+/// like -fno-builtin.
+void TargetLibraryInfo::disableAllFunctions() {
+ memset(AvailableArray, 0, sizeof(AvailableArray));
+}
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
index dd7b532..5d34c7d 100644
--- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -43,8 +43,8 @@ TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) {
StaticCtorSection = 0;
StaticDtorSection = 0;
LSDASection = 0;
- EHFrameSection = 0;
+ CommDirectiveSupportsAlignment = true;
DwarfAbbrevSection = 0;
DwarfInfoSection = 0;
DwarfLineSection = 0;
@@ -168,6 +168,12 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
switch (C->getRelocationInfo()) {
default: assert(0 && "unknown relocation info kind");
case Constant::NoRelocation:
+ // If the global is required to have a unique address, it can't be put
+ // into a mergable section: just drop it into the general read-only
+ // section instead.
+ if (!GVar->hasUnnamedAddr())
+ return SectionKind::getReadOnly();
+
// If initializer is a null-terminated string, put it in a "cstring"
// section of the right width.
if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
index 705b1c0..d579d95 100644
--- a/contrib/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -219,7 +219,9 @@ FunctionSections("ffunction-sections",
TargetMachine::TargetMachine(const Target &T)
: TheTarget(T), AsmInfo(0),
- MCRelaxAll(false) {
+ MCRelaxAll(false),
+ MCNoExecStack(false),
+ MCUseLoc(true) {
// Typically it will be subtargets that will adjust FloatABIType from Default
// to Soft or Hard.
if (UseSoftFloat)
diff --git a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp
index 55f222c..4811ba5 100644
--- a/contrib/llvm/lib/Target/TargetRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/TargetRegisterInfo.cpp
@@ -13,10 +13,10 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -30,7 +30,7 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
AliasesHash(aliases), AliasesHashSize(aliasessize),
Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR),
RegClassBegin(RCB), RegClassEnd(RCE) {
- assert(NumRegs < FirstVirtualRegister &&
+ assert(isPhysicalRegister(NumRegs) &&
"Target has too many physical registers!");
CallFrameSetupOpcode = CFSO;
@@ -39,6 +39,25 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
TargetRegisterInfo::~TargetRegisterInfo() {}
+void PrintReg::print(raw_ostream &OS) const {
+ if (!Reg)
+ OS << "%noreg";
+ else if (TargetRegisterInfo::isStackSlot(Reg))
+ OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+ else if (TargetRegisterInfo::isVirtualRegister(Reg))
+ OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+ else if (TRI && Reg < TRI->getNumRegs())
+ OS << '%' << TRI->getName(Reg);
+ else
+ OS << "%physreg" << Reg;
+ if (SubIdx) {
+ if (TRI)
+ OS << ':' << TRI->getSubRegIndexName(SubIdx);
+ else
+ OS << ":sub(" << SubIdx << ')';
+ }
+}
+
/// getMinimalPhysRegClass - Returns the Register Class of a physical
/// register of the given type, picking the most sub register class of
/// the right type that contains this physreg.
@@ -82,29 +101,11 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
// Mask out the reserved registers
BitVector Reserved = getReservedRegs(MF);
- Allocatable ^= Reserved & Allocatable;
+ Allocatable &= Reserved.flip();
return Allocatable;
}
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index. This is the default implementation
-/// which is overridden for some targets.
-int TargetRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->getObjectOffset(FI) + MFI->getStackSize() -
- TFI.getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
-}
-
-/// getInitialFrameState - Returns a list of machine moves that are assumed
-/// on entry to a function.
-void
-TargetRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const{
- // Default is to do nothing.
-}
-
const TargetRegisterClass *
llvm::getCommonSubClass(const TargetRegisterClass *A,
const TargetRegisterClass *B) {
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp
index 26797ab..ec73087 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -65,9 +65,10 @@ public:
}
};
-}
+} // end anonymous namespace
-static unsigned MatchRegisterName(StringRef Name);
+#define GET_REGISTER_MATCHER
+#include "X86GenAsmMatcher.inc"
AsmToken X86AsmLexer::LexTokenATT() {
AsmToken lexedToken = lexDefinite();
@@ -162,7 +163,3 @@ extern "C" void LLVMInitializeX86AsmLexer() {
RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target);
RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target);
}
-
-#define REGISTERS_ONLY
-#include "X86GenAsmMatcher.inc"
-#undef REGISTERS_ONLY
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index f8588d8..1cac07a 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -10,20 +10,21 @@
#include "llvm/Target/TargetAsmParser.h"
#include "X86.h"
#include "X86Subtarget.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/TargetAsmParser.h"
using namespace llvm;
namespace {
@@ -43,35 +44,32 @@ private:
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
- bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
-
X86Operand *ParseOperand();
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
- bool MatchInstruction(SMLoc IDLoc,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst);
+ bool MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out);
/// @name Auto-generated Matcher Functions
/// {
- unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
-
- bool MatchInstructionImpl(
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
+#define GET_ASSEMBLER_HEADER
+#include "X86GenAsmMatcher.inc"
/// }
public:
- X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
- : TargetAsmParser(T), Parser(_Parser), TM(TM) {
+ X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM)
+ : TargetAsmParser(T), Parser(parser), TM(TM) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(
&TM.getSubtarget<X86Subtarget>()));
}
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -81,16 +79,16 @@ public:
class X86_32ATTAsmParser : public X86ATTAsmParser {
public:
- X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
- : X86ATTAsmParser(T, _Parser, TM) {
+ X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
+ : X86ATTAsmParser(T, Parser, TM) {
Is64Bit = false;
}
};
class X86_64ATTAsmParser : public X86ATTAsmParser {
public:
- X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
- : X86ATTAsmParser(T, _Parser, TM) {
+ X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
+ : X86ATTAsmParser(T, Parser, TM) {
Is64Bit = true;
}
};
@@ -375,14 +373,18 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
// validation later, so maybe there is no need for this here.
RegNo = MatchRegisterName(Tok.getString());
+ // If the match failed, try the register name as lowercase.
+ if (RegNo == 0)
+ RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
+
// FIXME: This should be done using Requires<In32BitMode> and
// Requires<In64BitMode> so "eiz" usage in 64-bit instructions
// can be also checked.
if (RegNo == X86::RIZ && !Is64Bit)
return Error(Tok.getLoc(), "riz register in 64-bit mode only");
- // Parse %st(1) and "%st" as "%st(0)"
- if (RegNo == 0 && Tok.getString() == "st") {
+ // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
+ if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
RegNo = X86::ST0;
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat 'st'
@@ -617,88 +619,13 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
bool X86ATTAsmParser::
ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // The various flavors of pushf and popf use Requires<In32BitMode> and
- // Requires<In64BitMode>, but the assembler doesn't yet implement that.
- // For now, just do a manual check to prevent silent misencoding.
- if (Is64Bit) {
- if (Name == "popfl")
- return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
- else if (Name == "pushfl")
- return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
- else if (Name == "pusha")
- return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
- } else {
- if (Name == "popfq")
- return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
- else if (Name == "pushfq")
- return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
- }
-
- // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and
- // the form jrcxz is not allowed in 32-bit mode.
- if (Is64Bit) {
- if (Name == "jcxz")
- return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode");
- } else {
- if (Name == "jrcxz")
- return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode");
- }
-
- // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
- // represent alternative syntaxes in the .td file, without requiring
- // instruction duplication.
- StringRef PatchedName = StringSwitch<StringRef>(Name)
- .Case("sal", "shl")
- .Case("salb", "shlb")
- .Case("sall", "shll")
- .Case("salq", "shlq")
- .Case("salw", "shlw")
- .Case("repe", "rep")
- .Case("repz", "rep")
- .Case("repnz", "repne")
- .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
- .Case("popf", Is64Bit ? "popfq" : "popfl")
- .Case("retl", Is64Bit ? "retl" : "ret")
- .Case("retq", Is64Bit ? "ret" : "retq")
- .Case("setz", "sete")
- .Case("setnz", "setne")
- .Case("jz", "je")
- .Case("jnz", "jne")
- .Case("jc", "jb")
- // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode
- // jecxz requires an AdSize prefix but jecxz does not have a prefix in
- // 32-bit mode.
- .Case("jecxz", "jcxz")
- .Case("jrcxz", "jcxz")
- .Case("jna", "jbe")
- .Case("jnae", "jb")
- .Case("jnb", "jae")
- .Case("jnbe", "ja")
- .Case("jnc", "jae")
- .Case("jng", "jle")
- .Case("jnge", "jl")
- .Case("jnl", "jge")
- .Case("jnle", "jg")
- .Case("jpe", "jp")
- .Case("jpo", "jnp")
- .Case("cmovcl", "cmovbl")
- .Case("cmovcl", "cmovbl")
- .Case("cmovnal", "cmovbel")
- .Case("cmovnbl", "cmovael")
- .Case("cmovnbel", "cmoval")
- .Case("cmovncl", "cmovael")
- .Case("cmovngl", "cmovlel")
- .Case("cmovnl", "cmovgel")
- .Case("cmovngl", "cmovlel")
- .Case("cmovngel", "cmovll")
- .Case("cmovnll", "cmovgel")
- .Case("cmovnlel", "cmovgl")
- .Case("cmovnzl", "cmovnel")
- .Case("cmovzl", "cmovel")
- .Case("fwait", "wait")
- .Case("movzx", "movzb")
- .Default(Name);
+ StringRef PatchedName = Name;
+ // FIXME: Hack to recognize setneb as setne.
+ if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
+ PatchedName != "setb" && PatchedName != "setnb")
+ PatchedName = PatchedName.substr(0, Name.size()-1);
+
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
const MCExpr *ExtraImmOp = 0;
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
@@ -773,12 +700,26 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
PatchedName = "vpclmulqdq";
}
}
+
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
if (ExtraImmOp)
Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+ // Determine whether this is an instruction prefix.
+ bool isPrefix =
+ Name == "lock" || Name == "rep" ||
+ Name == "repe" || Name == "repz" ||
+ Name == "repne" || Name == "repnz" ||
+ Name == "rex64" || Name == "data16";
+
+
+ // This does the actual operand parsing. Don't parse any more if we have a
+ // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
+ // just want to parse the "lock" as the first instruction and the "incl" as
+ // the next one.
+ if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
// Parse '*' modifier.
if (getLexer().is(AsmToken::Star)) {
@@ -790,8 +731,10 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
// Read the first operand.
if (X86Operand *Op = ParseOperand())
Operands.push_back(Op);
- else
+ else {
+ Parser.EatToEndOfStatement();
return true;
+ }
while (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
@@ -799,23 +742,27 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
// Parse and remember the operand.
if (X86Operand *Op = ParseOperand())
Operands.push_back(Op);
- else
+ else {
+ Parser.EatToEndOfStatement();
return true;
+ }
}
- }
- // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
- if ((Name.startswith("shr") || Name.startswith("sar") ||
- Name.startswith("shl")) &&
- Operands.size() == 3 &&
- static_cast<X86Operand*>(Operands[1])->isImm() &&
- isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
- cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) {
- delete Operands[1];
- Operands.erase(Operands.begin() + 1);
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.EatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
}
- // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx".
+ if (getLexer().is(AsmToken::EndOfStatement))
+ Parser.Lex(); // Consume the EndOfStatement
+ else if (isPrefix && getLexer().is(AsmToken::Slash))
+ Parser.Lex(); // Consume the prefix separator Slash
+
+ // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
+ // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
+ // documented form in various unofficial manuals, so a lot of code uses it.
if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
Operands.size() == 3) {
X86Operand &Op = *(X86Operand*)Operands.back();
@@ -829,76 +776,80 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
- // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
- // "f{mul*,add*,sub*,div*} $op"
- if ((Name.startswith("fmul") || Name.startswith("fadd") ||
- Name.startswith("fsub") || Name.startswith("fdiv")) &&
- Operands.size() == 3 &&
- static_cast<X86Operand*>(Operands[2])->isReg() &&
- static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
- delete Operands[2];
- Operands.erase(Operands.begin() + 2);
- }
-
- // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
- // B".
- if (Name.startswith("imul") && Operands.size() == 3 &&
- static_cast<X86Operand*>(Operands[1])->isImm() &&
- static_cast<X86Operand*>(Operands.back())->isReg()) {
- X86Operand *Op = static_cast<X86Operand*>(Operands.back());
- Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
- Op->getEndLoc()));
- }
-
- return false;
-}
-
-bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
- StringRef IDVal = DirectiveID.getIdentifier();
- if (IDVal == ".word")
- return ParseDirectiveWord(2, DirectiveID.getLoc());
- return true;
-}
-
-/// ParseDirectiveWord
-/// ::= .word [ expression (, expression)* ]
-bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- for (;;) {
- const MCExpr *Value;
- if (getParser().ParseExpression(Value))
- return true;
-
- getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
-
- if (getLexer().is(AsmToken::EndOfStatement))
- break;
-
- // FIXME: Improve diagnostic.
- if (getLexer().isNot(AsmToken::Comma))
- return Error(L, "unexpected token in directive");
- Parser.Lex();
+ // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
+ // "shift <op>".
+ if ((Name.startswith("shr") || Name.startswith("sar") ||
+ Name.startswith("shl") || Name.startswith("sal") ||
+ Name.startswith("rcl") || Name.startswith("rcr") ||
+ Name.startswith("rol") || Name.startswith("ror")) &&
+ Operands.size() == 3) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
}
}
- Parser.Lex();
return false;
}
-
-bool
-X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
- const SmallVectorImpl<MCParsedAsmOperand*>
- &Operands,
- MCInst &Inst) {
+bool X86ATTAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out) {
assert(!Operands.empty() && "Unexpect empty operand list!");
-
X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+ // First, handle aliases that expand to multiple instructions.
+ // FIXME: This should be replaced with a real .td file alias mechanism.
+ // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
+ // call.
+ if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
+ Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
+ Op->getToken() == "finit" || Op->getToken() == "fsave" ||
+ Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
+ MCInst Inst;
+ Inst.setOpcode(X86::WAIT);
+ Out.EmitInstruction(Inst);
+
+ const char *Repl =
+ StringSwitch<const char*>(Op->getToken())
+ .Case("finit", "fninit")
+ .Case("fsave", "fnsave")
+ .Case("fstcw", "fnstcw")
+ .Case("fstcww", "fnstcw")
+ .Case("fstenv", "fnstenv")
+ .Case("fstsw", "fnstsw")
+ .Case("fstsww", "fnstsw")
+ .Case("fclex", "fnclex")
+ .Default(0);
+ assert(Repl && "Unknown wait-prefixed instruction");
+ delete Operands[0];
+ Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
+ }
+
+ bool WasOriginallyInvalidOperand = false;
+ unsigned OrigErrorInfo;
+ MCInst Inst;
+
// First, try a direct match.
- if (!MatchInstructionImpl(Operands, Inst))
+ switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+ case Match_Success:
+ Out.EmitInstruction(Inst);
return false;
+ case Match_MissingFeature:
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ case Match_ConversionFail:
+ return Error(IDLoc, "unable to convert operands to instruction");
+ case Match_InvalidOperand:
+ WasOriginallyInvalidOperand = true;
+ break;
+ case Match_MnemonicFail:
+ break;
+ }
// FIXME: Ideally, we would only attempt suffix matches for things which are
// valid prefixes, and we could just infer the right unambiguous
@@ -912,15 +863,26 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
Tmp += ' ';
Op->setTokenValue(Tmp.str());
+ // If this instruction starts with an 'f', then it is a floating point stack
+ // instruction. These come in up to three forms for 32-bit, 64-bit, and
+ // 80-bit floating point, which use the suffixes s,l,t respectively.
+ //
+ // Otherwise, we assume that this may be an integer instruction, which comes
+ // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
+ const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
+
// Check for the various suffix matches.
- Tmp[Base.size()] = 'b';
- bool MatchB = MatchInstructionImpl(Operands, Inst);
- Tmp[Base.size()] = 'w';
- bool MatchW = MatchInstructionImpl(Operands, Inst);
- Tmp[Base.size()] = 'l';
- bool MatchL = MatchInstructionImpl(Operands, Inst);
- Tmp[Base.size()] = 'q';
- bool MatchQ = MatchInstructionImpl(Operands, Inst);
+ Tmp[Base.size()] = Suffixes[0];
+ unsigned ErrorInfoIgnore;
+ MatchResultTy Match1, Match2, Match3, Match4;
+
+ Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Tmp[Base.size()] = Suffixes[1];
+ Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Tmp[Base.size()] = Suffixes[2];
+ Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+ Tmp[Base.size()] = Suffixes[3];
+ Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
// Restore the old token.
Op->setTokenValue(Base);
@@ -928,24 +890,25 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
- if (MatchB + MatchW + MatchL + MatchQ == 3)
+ unsigned NumSuccessfulMatches =
+ (Match1 == Match_Success) + (Match2 == Match_Success) +
+ (Match3 == Match_Success) + (Match4 == Match_Success);
+ if (NumSuccessfulMatches == 1) {
+ Out.EmitInstruction(Inst);
return false;
+ }
- // Otherwise, the match failed.
+ // Otherwise, the match failed, try to produce a decent error message.
// If we had multiple suffix matches, then identify this as an ambiguous
// match.
- if (MatchB + MatchW + MatchL + MatchQ != 4) {
+ if (NumSuccessfulMatches > 1) {
char MatchChars[4];
unsigned NumMatches = 0;
- if (!MatchB)
- MatchChars[NumMatches++] = 'b';
- if (!MatchW)
- MatchChars[NumMatches++] = 'w';
- if (!MatchL)
- MatchChars[NumMatches++] = 'l';
- if (!MatchQ)
- MatchChars[NumMatches++] = 'q';
+ if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
+ if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
+ if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
+ if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
SmallString<126> Msg;
raw_svector_ostream OS(Msg);
@@ -959,14 +922,90 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
}
OS << ")";
Error(IDLoc, OS.str());
- } else {
- // FIXME: We should give nicer diagnostics about the exact failure.
- Error(IDLoc, "unrecognized instruction");
+ return true;
}
+ // Okay, we know that none of the variants matched successfully.
+
+ // If all of the instructions reported an invalid mnemonic, then the original
+ // mnemonic was invalid.
+ if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
+ (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
+ if (!WasOriginallyInvalidOperand) {
+ Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
+ return true;
+ }
+
+ // Recover location info for the operand if we know which was the problem.
+ SMLoc ErrorLoc = IDLoc;
+ if (OrigErrorInfo != ~0U) {
+ if (OrigErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+
+ // If one instruction matched with a missing feature, report this as a
+ // missing feature.
+ if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
+ (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ }
+
+ // If one instruction matched with an invalid operand, report this as an
+ // operand failure.
+ if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
+ (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
+ Error(IDLoc, "invalid operand for instruction");
+ return true;
+ }
+
+ // If all of these were an outright failure, report it in a useless way.
+ // FIXME: We should give nicer diagnostics about the exact failure.
+ Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
+ return true;
+}
+
+
+bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
return true;
}
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+
+
extern "C" void LLVMInitializeX86AsmLexer();
@@ -977,4 +1016,6 @@ extern "C" void LLVMInitializeX86AsmParser() {
LLVMInitializeX86AsmLexer();
}
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
#include "X86GenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 09f1584..691e2d7 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -157,9 +157,8 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
/// @param immediate - The immediate value to append.
/// @param operand - The operand, as stored in the descriptor table.
/// @param insn - The internal instruction.
-static void translateImmediate(MCInst &mcInst,
- uint64_t immediate,
- OperandSpecifier &operand,
+static void translateImmediate(MCInst &mcInst, uint64_t immediate,
+ const OperandSpecifier &operand,
InternalInstruction &insn) {
// Sign-extend the immediate if necessary.
@@ -392,9 +391,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
/// @param insn - The instruction to extract Mod, R/M, and SIB fields
/// from.
/// @return - 0 on success; nonzero otherwise
-static bool translateRM(MCInst &mcInst,
- OperandSpecifier &operand,
- InternalInstruction &insn) {
+static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
+ InternalInstruction &insn) {
switch (operand.type) {
default:
debug("Unexpected type for a R/M operand");
@@ -461,9 +459,8 @@ static bool translateFPRegister(MCInst &mcInst,
/// @param operand - The operand, as stored in the descriptor table.
/// @param insn - The internal instruction.
/// @return - false on success; true otherwise.
-static bool translateOperand(MCInst &mcInst,
- OperandSpecifier &operand,
- InternalInstruction &insn) {
+static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
+ InternalInstruction &insn) {
switch (operand.encoding) {
default:
debug("Unhandled operand encoding during translation");
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
index 9c54262..550cf9d 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -78,7 +78,7 @@
const char* name;
#define INSTRUCTION_IDS \
- InstrUID* instructionIDs;
+ const InstrUID *instructionIDs;
#include "X86DisassemblerDecoderCommon.h"
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 6c3ff6b..b6546fc 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -27,12 +27,6 @@
typedef int8_t bool;
-#ifdef __GNUC__
-#define NORETURN __attribute__((noreturn))
-#else
-#define NORETURN
-#endif
-
#ifndef NDEBUG
#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
#else
@@ -103,7 +97,7 @@ static InstrUID decode(OpcodeType type,
InstructionContext insnContext,
uint8_t opcode,
uint8_t modRM) {
- struct ModRMDecision* dec;
+ const struct ModRMDecision* dec;
switch (type) {
default:
@@ -147,7 +141,7 @@ static InstrUID decode(OpcodeType type,
* decode(); specifierForUID will not check bounds.
* @return - A pointer to the specification for that instruction.
*/
-static struct InstructionSpecifier* specifierForUID(InstrUID uid) {
+static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
return &INSTRUCTIONS_SYM[uid];
}
@@ -296,7 +290,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
BOOL isPrefix = TRUE;
BOOL prefixGroups[4] = { FALSE };
uint64_t prefixLocation;
- uint8_t byte;
+ uint8_t byte = 0;
BOOL hasAdSize = FALSE;
BOOL hasOpSize = FALSE;
@@ -394,6 +388,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
}
} else {
unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
if (insn->mode == MODE_16BIT) {
@@ -405,7 +400,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->registerSize = (hasOpSize ? 2 : 4);
insn->addressSize = (hasAdSize ? 2 : 4);
insn->displacementSize = (hasAdSize ? 2 : 4);
- insn->immediateSize = (hasAdSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
} else if (insn->mode == MODE_64BIT) {
if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
insn->registerSize = 8;
@@ -517,7 +512,8 @@ static int getIDWithAttrMask(uint16_t* instructionID,
insn->opcode);
if (hasModRMExtension) {
- readModRM(insn);
+ if (readModRM(insn))
+ return -1;
*instructionID = decode(insn->opcodeType,
instructionClass,
@@ -632,9 +628,9 @@ static int getID(struct InternalInstruction* insn) {
* instead of F2 changes a 32 to a 64, we adopt the new encoding.
*/
- struct InstructionSpecifier* spec;
+ const struct InstructionSpecifier *spec;
uint16_t instructionIDWithREXw;
- struct InstructionSpecifier* specWithREXw;
+ const struct InstructionSpecifier *specWithREXw;
spec = specifierForUID(instructionID);
@@ -672,9 +668,9 @@ static int getID(struct InternalInstruction* insn) {
* in the right place we check if there's a 16-bit operation.
*/
- struct InstructionSpecifier* spec;
+ const struct InstructionSpecifier *spec;
uint16_t instructionIDWithOpsize;
- struct InstructionSpecifier* specWithOpsize;
+ const struct InstructionSpecifier *specWithOpsize;
spec = specifierForUID(instructionID);
@@ -866,7 +862,8 @@ static int readModRM(struct InternalInstruction* insn) {
if (insn->consumedModRM)
return 0;
- consumeByte(insn, &insn->modRM);
+ if (consumeByte(insn, &insn->modRM))
+ return -1;
insn->consumedModRM = TRUE;
mod = modFromModRM(insn->modRM);
@@ -1067,7 +1064,7 @@ GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
* invalid for its class.
*/
static int fixupReg(struct InternalInstruction *insn,
- struct OperandSpecifier *op) {
+ const struct OperandSpecifier *op) {
uint8_t valid;
dbgprintf(insn, "fixupReg()");
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 28ba86b..4f4fbcd 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -24,7 +24,7 @@ extern "C" {
const char* name;
#define INSTRUCTION_IDS \
- InstrUID* instructionIDs;
+ const InstrUID *instructionIDs;
#include "X86DisassemblerDecoderCommon.h"
@@ -423,7 +423,7 @@ struct InternalInstruction {
/* The instruction ID, extracted from the decode table */
uint16_t instructionID;
/* The specifier for the instruction, from the instruction info table */
- struct InstructionSpecifier* spec;
+ const struct InstructionSpecifier *spec;
/* state for additional bytes, consumed during operand decode. Pattern:
consumed___ indicates that the byte was already consumed and does not
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 0f33f52..1425b86 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -22,7 +22,7 @@
#ifndef X86DISASSEMBLERDECODERCOMMON_H
#define X86DISASSEMBLERDECODERCOMMON_H
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
#define CONTEXTS_SYM x86DisassemblerContexts
@@ -248,6 +248,7 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_M64, "8-byte") \
ENUM_ENTRY(TYPE_LEA, "Effective address") \
ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
+ ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \
ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt b/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000..033973e
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86AsmPrinter
+ X86ATTInstPrinter.cpp
+ X86IntelInstPrinter.cpp
+ X86InstComments.cpp
+ )
+add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/Makefile b/contrib/llvm/lib/Target/X86/InstPrinter/Makefile
new file mode 100644
index 0000000..c82aa33
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86AsmPrinter
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 554b96c..d6950f4 100644
--- a/contrib/llvm/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -25,10 +25,8 @@
using namespace llvm;
// Include the auto-generated portion of the assembly writer.
-#define MachineInstr MCInst
#define GET_INSTRUCTION_NAME
#include "X86GenAsmWriter.inc"
-#undef MachineInstr
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
printInstruction(MI, OS);
diff --git a/contrib/llvm/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index eb98664..eb98664 100644
--- a/contrib/llvm/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
diff --git a/contrib/llvm/lib/Target/X86/AsmPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index da9d5a3..12144e3 100644
--- a/contrib/llvm/lib/Target/X86/AsmPrinter/X86InstComments.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -16,7 +16,7 @@
#include "X86GenInstrNames.inc"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/raw_ostream.h"
-#include "../X86ShuffleDecode.h"
+#include "../Utils/X86ShuffleDecode.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/AsmPrinter/X86InstComments.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
index 6b86db4..6b86db4 100644
--- a/contrib/llvm/lib/Target/X86/AsmPrinter/X86InstComments.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
diff --git a/contrib/llvm/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 5625b0e..0484529 100644
--- a/contrib/llvm/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -21,13 +21,12 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "X86GenInstrNames.inc"
+#include <cctype>
using namespace llvm;
// Include the auto-generated portion of the assembly writer.
-#define MachineInstr MCInst
#define GET_INSTRUCTION_NAME
#include "X86GenAsmWriter1.inc"
-#undef MachineInstr
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
printInstruction(MI, OS);
diff --git a/contrib/llvm/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 6f12032..6f12032 100644
--- a/contrib/llvm/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
diff --git a/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt b/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt
new file mode 100644
index 0000000..3ad5f99
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Utils
+ X86ShuffleDecode.cpp
+ )
+add_dependencies(LLVMX86Utils X86CodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/X86/Utils/Makefile b/contrib/llvm/lib/Target/X86/Utils/Makefile
new file mode 100644
index 0000000..1df6f0f
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Utils
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index df04052..1287977 100644
--- a/contrib/llvm/lib/Target/X86/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -12,21 +12,14 @@
//
//===----------------------------------------------------------------------===//
-#ifndef X86_SHUFFLE_DECODE_H
-#define X86_SHUFFLE_DECODE_H
-
-#include "llvm/ADT/SmallVector.h"
-using namespace llvm;
+#include "X86ShuffleDecode.h"
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
//===----------------------------------------------------------------------===//
-enum {
- SM_SentinelZero = ~0U
-};
+namespace llvm {
-static inline
void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
// Defaults the copying the dest value.
ShuffleMask.push_back(0);
@@ -51,8 +44,8 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
}
// <3,1> or <6,7,2,3>
-static void DecodeMOVHLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = NElts/2; i != NElts; ++i)
ShuffleMask.push_back(NElts+i);
@@ -61,8 +54,8 @@ static void DecodeMOVHLPSMask(unsigned NElts,
}
// <0,2> or <0,1,4,5>
-static void DecodeMOVLHPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i)
ShuffleMask.push_back(i);
@@ -70,16 +63,16 @@ static void DecodeMOVLHPSMask(unsigned NElts,
ShuffleMask.push_back(NElts+i);
}
-static void DecodePSHUFMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts; ++i) {
ShuffleMask.push_back(Imm % NElts);
Imm /= NElts;
}
}
-static void DecodePSHUFHWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
ShuffleMask.push_back(0);
ShuffleMask.push_back(1);
ShuffleMask.push_back(2);
@@ -90,8 +83,8 @@ static void DecodePSHUFHWMask(unsigned Imm,
}
}
-static void DecodePSHUFLWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != 4; ++i) {
ShuffleMask.push_back((Imm & 3));
Imm >>= 2;
@@ -102,24 +95,24 @@ static void DecodePSHUFLWMask(unsigned Imm,
ShuffleMask.push_back(7);
}
-static void DecodePUNPCKLMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePUNPCKLMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(i);
ShuffleMask.push_back(i+NElts);
}
}
-static void DecodePUNPCKHMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(i+NElts/2);
ShuffleMask.push_back(i+NElts+NElts/2);
}
}
-static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
// Part that reads from dest.
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(Imm % NElts);
@@ -132,8 +125,8 @@ static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
}
}
-static void DecodeUNPCKHPMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(i+NElts/2); // Reads from dest
ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src
@@ -144,12 +137,12 @@ static void DecodeUNPCKHPMask(unsigned NElts,
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// etc. NElts indicates the number of elements in the vector allowing it to
/// handle different datatypes and vector widths.
-static void DecodeUNPCKLPMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKLPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i) {
ShuffleMask.push_back(i); // Reads from dest
ShuffleMask.push_back(i+NElts); // Reads from src
}
}
-#endif
+} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
new file mode 100644
index 0000000..50d9ccb
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -0,0 +1,69 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_SHUFFLE_DECODE_H
+#define X86_SHUFFLE_DECODE_H
+
+#include "llvm/ADT/SmallVector.h"
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+enum {
+ SM_SentinelZero = ~0U
+};
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc. NElts indicates the number of elements in the vector allowing it to
+/// handle different datatypes and vector widths.
+void DecodeUNPCKLPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+} // llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index 27e8850..0ca4366 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -15,6 +15,7 @@
#ifndef TARGET_X86_H
#define TARGET_X86_H
+#include "llvm/Support/DataTypes.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -23,11 +24,13 @@ class FunctionPass;
class JITCodeEmitter;
class MCCodeEmitter;
class MCContext;
+class MCObjectWriter;
class MachineCodeEmitter;
class Target;
class TargetAsmBackend;
class X86TargetMachine;
class formatted_raw_ostream;
+class raw_ostream;
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
@@ -74,6 +77,13 @@ FunctionPass *createEmitX86CodeToMemory();
///
FunctionPass *createX86MaxStackAlignmentHeuristicPass();
+
+/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+
extern Target TheX86_32Target, TheX86_64Target;
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index a19f1ac..efb6c8c 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -23,6 +23,9 @@ include "llvm/Target/Target.td"
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
"Enable conditional move instructions">;
+def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
+ "Support POPCNT instruction">;
+
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
"Enable MMX instructions">;
@@ -45,7 +48,7 @@ def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
[FeatureSSSE3]>;
def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
- [FeatureSSE41]>;
+ [FeatureSSE41, FeaturePOPCNT]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
"Enable 3DNow! instructions">;
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
@@ -63,7 +66,8 @@ def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
"IsUAMemFast", "true",
"Fast unaligned memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
- "Support SSE 4a instructions">;
+ "Support SSE 4a instructions",
+ [FeaturePOPCNT]>;
def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
"Enable AVX instructions">;
@@ -112,11 +116,13 @@ def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
FeatureFastUAMem]>;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
- FeatureFastUAMem, FeatureAES]>;
-// Sandy Bridge does not have FMA
-// FIXME: Wikipedia says it does... it should have AES as well.
-def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
+def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem, FeatureAES, FeatureCLMUL]>;
+// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
+// rather than a superset.
+// FIXME: Disabling AVX for now since it's not ready.
+def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit,
+ FeatureAES, FeatureCLMUL]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
@@ -176,7 +182,7 @@ include "X86CallingConv.td"
//===----------------------------------------------------------------------===//
-// Assembly Printers
+// Assembly Parser
//===----------------------------------------------------------------------===//
// Currently the X86 assembly parser only supports ATT syntax.
@@ -191,15 +197,21 @@ def ATTAsmParser : AsmParser {
string RegisterPrefix = "%";
}
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
// The X86 target supports two different syntaxes for emitting machine code.
// This is controlled by the -x86-asm-syntax={att|intel}
def ATTAsmWriter : AsmWriter {
string AsmWriterClassName = "ATTInstPrinter";
int Variant = 0;
+ bit isMCAsmWriter = 1;
}
def IntelAsmWriter : AsmWriter {
string AsmWriterClassName = "IntelInstPrinter";
int Variant = 1;
+ bit isMCAsmWriter = 1;
}
def X86 : Target {
diff --git a/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp
index 69dc967..da5f5b1 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp
@@ -11,50 +11,83 @@
#include "X86.h"
#include "X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/MC/ELFObjectWriter.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MachObjectWriter.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
-
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
default: assert(0 && "invalid fixup kind!");
- case X86::reloc_pcrel_1byte:
+ case FK_PCRel_1:
case FK_Data_1: return 0;
- case X86::reloc_pcrel_2byte:
+ case FK_PCRel_2:
case FK_Data_2: return 1;
- case X86::reloc_pcrel_4byte:
+ case FK_PCRel_4:
case X86::reloc_riprel_4byte:
case X86::reloc_riprel_4byte_movq_load:
+ case X86::reloc_signed_4byte:
+ case X86::reloc_global_offset_table:
case FK_Data_4: return 2;
+ case FK_PCRel_8:
case FK_Data_8: return 3;
}
}
namespace {
+
+class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
+ bool HasRelocationAddend)
+ : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {}
+};
+
class X86AsmBackend : public TargetAsmBackend {
public:
X86AsmBackend(const Target &T)
- : TargetAsmBackend(T) {}
+ : TargetAsmBackend() {}
+
+ unsigned getNumFixupKinds() const {
+ return X86::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
+ { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel},
+ { "reloc_signed_4byte", 0, 4 * 8, 0},
+ { "reloc_global_offset_table", 0, 4 * 8, 0}
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return TargetAsmBackend::getFixupKindInfo(Kind);
- void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF,
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+ }
+
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const {
unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
- assert(Fixup.getOffset() + Size <= DF.getContents().size() &&
+ assert(Fixup.getOffset() + Size <= DataSize &&
"Invalid fixup offset!");
for (unsigned i = 0; i != Size; ++i)
- DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
+ Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
}
bool MayNeedRelaxation(const MCInst &Inst) const;
@@ -63,9 +96,9 @@ public:
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
};
-} // end anonymous namespace
+} // end anonymous namespace
-static unsigned getRelaxedOpcode(unsigned Op) {
+static unsigned getRelaxedOpcodeBranch(unsigned Op) {
switch (Op) {
default:
return Op;
@@ -90,16 +123,104 @@ static unsigned getRelaxedOpcode(unsigned Op) {
}
}
+static unsigned getRelaxedOpcodeArith(unsigned Op) {
+ switch (Op) {
+ default:
+ return Op;
+
+ // IMUL
+ case X86::IMUL16rri8: return X86::IMUL16rri;
+ case X86::IMUL16rmi8: return X86::IMUL16rmi;
+ case X86::IMUL32rri8: return X86::IMUL32rri;
+ case X86::IMUL32rmi8: return X86::IMUL32rmi;
+ case X86::IMUL64rri8: return X86::IMUL64rri32;
+ case X86::IMUL64rmi8: return X86::IMUL64rmi32;
+
+ // AND
+ case X86::AND16ri8: return X86::AND16ri;
+ case X86::AND16mi8: return X86::AND16mi;
+ case X86::AND32ri8: return X86::AND32ri;
+ case X86::AND32mi8: return X86::AND32mi;
+ case X86::AND64ri8: return X86::AND64ri32;
+ case X86::AND64mi8: return X86::AND64mi32;
+
+ // OR
+ case X86::OR16ri8: return X86::OR16ri;
+ case X86::OR16mi8: return X86::OR16mi;
+ case X86::OR32ri8: return X86::OR32ri;
+ case X86::OR32mi8: return X86::OR32mi;
+ case X86::OR64ri8: return X86::OR64ri32;
+ case X86::OR64mi8: return X86::OR64mi32;
+
+ // XOR
+ case X86::XOR16ri8: return X86::XOR16ri;
+ case X86::XOR16mi8: return X86::XOR16mi;
+ case X86::XOR32ri8: return X86::XOR32ri;
+ case X86::XOR32mi8: return X86::XOR32mi;
+ case X86::XOR64ri8: return X86::XOR64ri32;
+ case X86::XOR64mi8: return X86::XOR64mi32;
+
+ // ADD
+ case X86::ADD16ri8: return X86::ADD16ri;
+ case X86::ADD16mi8: return X86::ADD16mi;
+ case X86::ADD32ri8: return X86::ADD32ri;
+ case X86::ADD32mi8: return X86::ADD32mi;
+ case X86::ADD64ri8: return X86::ADD64ri32;
+ case X86::ADD64mi8: return X86::ADD64mi32;
+
+ // SUB
+ case X86::SUB16ri8: return X86::SUB16ri;
+ case X86::SUB16mi8: return X86::SUB16mi;
+ case X86::SUB32ri8: return X86::SUB32ri;
+ case X86::SUB32mi8: return X86::SUB32mi;
+ case X86::SUB64ri8: return X86::SUB64ri32;
+ case X86::SUB64mi8: return X86::SUB64mi32;
+
+ // CMP
+ case X86::CMP16ri8: return X86::CMP16ri;
+ case X86::CMP16mi8: return X86::CMP16mi;
+ case X86::CMP32ri8: return X86::CMP32ri;
+ case X86::CMP32mi8: return X86::CMP32mi;
+ case X86::CMP64ri8: return X86::CMP64ri32;
+ case X86::CMP64mi8: return X86::CMP64mi32;
+
+ // PUSH
+ case X86::PUSHi8: return X86::PUSHi32;
+ }
+}
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+ unsigned R = getRelaxedOpcodeArith(Op);
+ if (R != Op)
+ return R;
+ return getRelaxedOpcodeBranch(Op);
+}
+
bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ // Branches can always be relaxed.
+ if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
+ return true;
+
// Check if this instruction is ever relaxable.
- if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+ if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
return false;
- // If so, just assume it can be relaxed. Once we support relaxing more complex
- // instructions we should check that the instruction actually has symbolic
- // operands before doing this, but we need to be careful about things like
- // PCrel.
- return true;
+
+ // Check if it has an expression and is not RIP relative.
+ bool hasExp = false;
+ bool hasRIP = false;
+ for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+ const MCOperand &Op = Inst.getOperand(i);
+ if (Op.isExpr())
+ hasExp = true;
+
+ if (Op.isReg() && Op.getReg() == X86::RIP)
+ hasRIP = true;
+ }
+
+ // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
+ // how we do relaxations?
+ return hasExp && !hasRIP;
}
// FIXME: Can tblgen help at all here to verify there aren't other instructions
@@ -123,10 +244,8 @@ void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
/// WriteNopData - Write optimal nops to the output file for the \arg Count
/// bytes. This returns the number of bytes written. It may return 0 if
/// the \arg Count is more than the maximum optimal nops.
-///
-/// FIXME this is X86 32-bit specific and should move to a better place.
bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
- static const uint8_t Nops[16][16] = {
+ static const uint8_t Nops[10][10] = {
// nop
{0x90},
// xchg %ax,%ax
@@ -147,32 +266,16 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
{0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
// nopw %cs:0L(%[re]ax,%[re]ax,1)
{0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
- // nopl 0(%[re]ax,%[re]ax,1)
- // nopw 0(%[re]ax,%[re]ax,1)
- {0x0f, 0x1f, 0x44, 0x00, 0x00,
- 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
- // nopw 0(%[re]ax,%[re]ax,1)
- // nopw 0(%[re]ax,%[re]ax,1)
- {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
- 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
- // nopw 0(%[re]ax,%[re]ax,1)
- // nopl 0L(%[re]ax) */
- {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
- 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
- // nopl 0L(%[re]ax)
- // nopl 0L(%[re]ax)
- {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
- 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
- // nopl 0L(%[re]ax)
- // nopl 0L(%[re]ax,%[re]ax,1)
- {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
- 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
};
// Write an optimal sequence for the first 15 bytes.
- uint64_t OptimalCount = (Count < 16) ? Count : 15;
- for (uint64_t i = 0, e = OptimalCount; i != e; i++)
- OW->Write8(Nops[OptimalCount - 1][i]);
+ const uint64_t OptimalCount = (Count < 16) ? Count : 15;
+ const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10;
+ for (uint64_t i = 0, e = Prefixes; i != e; i++)
+ OW->Write8(0x66);
+ const uint64_t Rest = OptimalCount - Prefixes;
+ for (uint64_t i = 0, e = Rest; i != e; i++)
+ OW->Write8(Nops[Rest - 1][i]);
// Finish with single byte nops.
for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
@@ -186,75 +289,60 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
namespace {
class ELFX86AsmBackend : public X86AsmBackend {
public:
- ELFX86AsmBackend(const Target &T)
- : X86AsmBackend(T) {
- HasAbsolutizedSet = true;
- HasScatteredSymbols = true;
+ Triple::OSType OSType;
+ ELFX86AsmBackend(const Target &T, Triple::OSType _OSType)
+ : X86AsmBackend(T), OSType(_OSType) {
+ HasReliableSymbolDifference = true;
}
- bool isVirtualSection(const MCSection &Section) const {
- const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
- return SE.getType() == MCSectionELF::SHT_NOBITS;;
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+ const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section);
+ return ES.getFlags() & ELF::SHF_MERGE;
}
};
class ELFX86_32AsmBackend : public ELFX86AsmBackend {
public:
- ELFX86_32AsmBackend(const Target &T)
- : ELFX86AsmBackend(T) {}
+ ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType)
+ : ELFX86AsmBackend(T, OSType) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return new ELFObjectWriter(OS, /*Is64Bit=*/false,
- /*IsLittleEndian=*/true,
- /*HasRelocationAddend=*/false);
+ return createELFObjectWriter(new X86ELFObjectWriter(false, OSType,
+ ELF::EM_386, false),
+ OS, /*IsLittleEndian*/ true);
}
};
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
public:
- ELFX86_64AsmBackend(const Target &T)
- : ELFX86AsmBackend(T) {}
+ ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType)
+ : ELFX86AsmBackend(T, OSType) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return new ELFObjectWriter(OS, /*Is64Bit=*/true,
- /*IsLittleEndian=*/true,
- /*HasRelocationAddend=*/true);
+ return createELFObjectWriter(new X86ELFObjectWriter(true, OSType,
+ ELF::EM_X86_64, true),
+ OS, /*IsLittleEndian*/ true);
}
};
class WindowsX86AsmBackend : public X86AsmBackend {
bool Is64Bit;
+
public:
WindowsX86AsmBackend(const Target &T, bool is64Bit)
: X86AsmBackend(T)
, Is64Bit(is64Bit) {
- HasScatteredSymbols = true;
}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createWinCOFFObjectWriter(OS, Is64Bit);
}
-
- bool isVirtualSection(const MCSection &Section) const {
- const MCSectionCOFF &SE = static_cast<const MCSectionCOFF&>(Section);
- return SE.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
- }
};
class DarwinX86AsmBackend : public X86AsmBackend {
public:
DarwinX86AsmBackend(const Target &T)
- : X86AsmBackend(T) {
- HasAbsolutizedSet = true;
- HasScatteredSymbols = true;
- }
-
- bool isVirtualSection(const MCSection &Section) const {
- const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
- return (SMO.getType() == MCSectionMachO::S_ZEROFILL ||
- SMO.getType() == MCSectionMachO::S_GB_ZEROFILL ||
- SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
- }
+ : X86AsmBackend(T) { }
};
class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
@@ -263,7 +351,9 @@ public:
: DarwinX86AsmBackend(T) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return new MachObjectWriter(OS, /*Is64Bit=*/false);
+ return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
+ object::mach::CTM_i386,
+ object::mach::CSX86_ALL);
}
};
@@ -275,7 +365,9 @@ public:
}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return new MachObjectWriter(OS, /*Is64Bit=*/true);
+ return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
+ object::mach::CTM_x86_64,
+ object::mach::CSX86_ALL);
}
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -312,7 +404,7 @@ public:
}
};
-} // end anonymous namespace
+} // end anonymous namespace
TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
const std::string &TT) {
@@ -322,9 +414,12 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
case Triple::MinGW32:
case Triple::Cygwin:
case Triple::Win32:
- return new WindowsX86AsmBackend(T, false);
+ if (Triple(TT).getEnvironment() == Triple::MachO)
+ return new DarwinX86_32AsmBackend(T);
+ else
+ return new WindowsX86AsmBackend(T, false);
default:
- return new ELFX86_32AsmBackend(T);
+ return new ELFX86_32AsmBackend(T, Triple(TT).getOS());
}
}
@@ -333,11 +428,14 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
switch (Triple(TT).getOS()) {
case Triple::Darwin:
return new DarwinX86_64AsmBackend(T);
- case Triple::MinGW64:
+ case Triple::MinGW32:
case Triple::Cygwin:
case Triple::Win32:
- return new WindowsX86AsmBackend(T, true);
+ if (Triple(TT).getEnvironment() == Triple::MachO)
+ return new DarwinX86_64AsmBackend(T);
+ else
+ return new WindowsX86AsmBackend(T, true);
default:
- return new ELFX86_64AsmBackend(T);
+ return new ELFX86_64AsmBackend(T, Triple(TT).getOS());
}
}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 20110ad..99b4479 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
-#include "AsmPrinter/X86ATTInstPrinter.h"
-#include "AsmPrinter/X86IntelInstPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86.h"
#include "X86COFFMachineModuleInfo.h"
@@ -48,21 +48,15 @@ using namespace llvm;
// Primitive Helper Functions.
//===----------------------------------------------------------------------===//
-void X86AsmPrinter::PrintPICBaseSymbol(raw_ostream &O) const {
- const TargetLowering *TLI = TM.getTargetLowering();
- O << *static_cast<const X86TargetLowering*>(TLI)->getPICBaseSymbol(MF,
- OutContext);
-}
-
/// runOnMachineFunction - Emit the function body.
///
bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
- if (Subtarget->isTargetCOFF()) {
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
bool Intrn = MF.getFunction()->hasInternalLinkage();
OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
+ OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
: COFF::IMAGE_SYM_CLASS_EXTERNAL);
OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
<< COFF::SCT_COMPLEX_TYPE_SHIFT);
@@ -95,7 +89,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
break;
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
-
+
MCSymbol *GVSym;
if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
GVSym = GetSymbolWithGlobalValueBase(GV, "$stub");
@@ -109,11 +103,11 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
// Handle dllimport linkage.
if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
-
+
if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
+ MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
@@ -133,7 +127,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
StubSym = MachineModuleInfoImpl::
StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
}
-
+
// If the name begins with a dollar-sign, enclose it in parens. We do this
// to avoid having it look like an integer immediate to the assembler.
if (GVSym->getName()[0] != '$')
@@ -149,7 +143,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
SmallString<128> TempNameStr;
TempNameStr += StringRef(MO.getSymbolName());
TempNameStr += StringRef("$stub");
-
+
MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
@@ -163,17 +157,17 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
} else {
SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName());
}
-
+
// If the name begins with a dollar-sign, enclose it in parens. We do this
// to avoid having it look like an integer immediate to the assembler.
- if (SymToPrint->getName()[0] != '$')
+ if (SymToPrint->getName()[0] != '$')
O << *SymToPrint;
else
O << '(' << *SymToPrint << '(';
break;
}
}
-
+
switch (MO.getTargetFlags()) {
default:
llvm_unreachable("Unknown target flag on GV operand");
@@ -185,15 +179,12 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
// These affect the name of the symbol, not any suffix.
break;
case X86II::MO_GOT_ABSOLUTE_ADDRESS:
- O << " + [.-";
- PrintPICBaseSymbol(O);
- O << ']';
- break;
+ O << " + [.-" << *MF->getPICBaseSymbol() << ']';
+ break;
case X86II::MO_PIC_BASE_OFFSET:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
- O << '-';
- PrintPICBaseSymbol(O);
+ O << '-' << *MF->getPICBaseSymbol();
break;
case X86II::MO_TLSGD: O << "@TLSGD"; break;
case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
@@ -206,8 +197,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
case X86II::MO_PLT: O << "@PLT"; break;
case X86II::MO_TLVP: O << "@TLVP"; break;
case X86II::MO_TLVP_PIC_BASE:
- O << "@TLVP" << '-';
- PrintPICBaseSymbol(O);
+ O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
break;
}
}
@@ -262,7 +252,7 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
case MachineOperand::MO_JumpTableIndex:
case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol: {
O << '$';
printSymbolOperand(MO, O);
@@ -298,10 +288,10 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
BaseReg.getReg() == X86::RIP)
HasBaseReg = false;
-
+
// HasParenPart - True if we will print out the () part of the mem ref.
bool HasParenPart = IndexReg.getReg() || HasBaseReg;
-
+
if (DispSpec.isImm()) {
int DispVal = DispSpec.getImm();
if (DispVal || !HasParenPart)
@@ -312,6 +302,9 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
printSymbolOperand(MI->getOperand(Op+3), O);
}
+ if (Modifier && strcmp(Modifier, "H") == 0)
+ O << "+8";
+
if (HasParenPart) {
assert(IndexReg.getReg() != X86::ESP &&
"X86 doesn't allow scaling by ESP");
@@ -344,10 +337,8 @@ void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op,
raw_ostream &O) {
- PrintPICBaseSymbol(O);
- O << '\n';
- PrintPICBaseSymbol(O);
- O << ':';
+ O << *MF->getPICBaseSymbol() << '\n';
+ O << *MF->getPICBaseSymbol() << ':';
}
bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
@@ -386,14 +377,14 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
if (ExtraCode[1] != 0) return true; // Unknown modifier.
const MachineOperand &MO = MI->getOperand(OpNo);
-
+
switch (ExtraCode[0]) {
default: return true; // Unknown modifier.
case 'a': // This is an address. Currently only 'i' and 'r' are expected.
if (MO.isImm()) {
O << MO.getImm();
return false;
- }
+ }
if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
printSymbolOperand(MO, O);
if (Subtarget->isPICStyleRIPRel())
@@ -470,6 +461,9 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
case 'q': // Print SImode register
// These only apply to registers, ignore on mem.
break;
+ case 'H':
+ printMemReference(MI, OpNo, O, "H");
+ return false;
case 'P': // Don't print @PLT, but do print as memory.
printMemReference(MI, OpNo, O, "no-rip");
return false;
@@ -480,23 +474,23 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
- if (Subtarget->isTargetDarwin())
+ if (Subtarget->isTargetEnvMacho())
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
}
void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
- if (Subtarget->isTargetDarwin()) {
+ if (Subtarget->isTargetEnvMacho()) {
// All darwin targets use mach-o.
MachineModuleInfoMachO &MMIMacho =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
-
+
// Output stubs for dynamically-linked functions.
MachineModuleInfoMachO::SymbolListTy Stubs;
Stubs = MMIMacho.GetFnStubList();
if (!Stubs.empty()) {
- const MCSection *TheSection =
+ const MCSection *TheSection =
OutContext.getMachOSection("__IMPORT", "__jump_table",
MCSectionMachO::S_SYMBOL_STUBS |
MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
@@ -514,7 +508,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
const char HltInsts[] = { -12, -12, -12, -12, -12 };
OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
}
-
+
Stubs.clear();
OutStreamer.AddBlankLine();
}
@@ -522,7 +516,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// Output stubs for external and common global variables.
Stubs = MMIMacho.GetGVStubList();
if (!Stubs.empty()) {
- const MCSection *TheSection =
+ const MCSection *TheSection =
OutContext.getMachOSection("__IMPORT", "__pointers",
MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
SectionKind::getMetadata());
@@ -580,7 +574,14 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
- if (Subtarget->isTargetCOFF()) {
+ if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() &&
+ MMI->callsExternalVAFunctionWithFloatingPointArguments()) {
+ StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
+ MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
+ OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
+ }
+
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
X86COFFMachineModuleInfo &COFFMMI =
MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
@@ -661,12 +662,12 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
}
-MachineLocation
+MachineLocation
X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
MachineLocation Location;
assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
// Frame address. Currently handles register +- offset only.
-
+
if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
else {
@@ -690,9 +691,9 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
O << V.getName();
O << " <- ";
// Frame address. Currently handles register +- offset only.
- O << '[';
+ O << '[';
if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
- printOperand(MI, 0, O);
+ printOperand(MI, 0, O);
else
O << "undef";
O << '+'; printOperand(MI, 3, O);
@@ -718,10 +719,10 @@ static MCInstPrinter *createX86MCInstPrinter(const Target &T,
}
// Force static initialization.
-extern "C" void LLVMInitializeX86AsmPrinter() {
+extern "C" void LLVMInitializeX86AsmPrinter() {
RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
-
+
TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
index e61be66..3a50435 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -75,8 +75,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O);
- void PrintPICBaseSymbol(raw_ostream &O) const;
-
bool runOnMachineFunction(MachineFunction &F);
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
index e3409ef..a44fb69 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -48,7 +48,7 @@ def RetCC_X86Common : CallingConv<[
// MMX vector types are always returned in MM0. If the target doesn't have
// MM0, it doesn't support these vector types.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>,
+ CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>,
// Long double types are always returned in ST0 (even with SSE).
CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
@@ -61,7 +61,7 @@ def RetCC_X86_32_C : CallingConv<[
// weirdly; this is really the sse-regparm calling convention) in which
// case they use XMM0, otherwise it is the same as the common X86 calling
// conv.
- CCIfInReg<CCIfSubtarget<"hasSSE2()",
+ CCIfInReg<CCIfSubtarget<"hasXMMInt()",
CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
CCDelegateTo<RetCC_X86Common>
@@ -73,8 +73,8 @@ def RetCC_X86_32_Fast : CallingConv<[
// SSE2.
// This can happen when a float, 2 x float, or 3 x float vector is split by
// target lowering, and is returned in 1-3 sse regs.
- CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
- CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
// For integers, ECX can be used as an extra return register
CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
@@ -95,14 +95,14 @@ def RetCC_X86_64_C : CallingConv<[
// returned in RAX. This disagrees with ABI documentation but is bug
// compatible with gcc.
CCIfType<[v1i64], CCAssignToReg<[RAX]>>,
- CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>,
+ CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
CCDelegateTo<RetCC_X86Common>
]>;
// X86-Win64 C return-value convention.
def RetCC_X86_Win64_C : CallingConv<[
// The X86-Win64 calling convention always returns __m64 values in RAX.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>,
+ CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
// And FP in XMM0 only.
CCIfType<[f32], CCAssignToReg<[XMM0]>>,
@@ -161,14 +161,14 @@ def CC_X86_64_C : CallingConv<[
// The first 8 MMX (except for v1i64) vector arguments are passed in XMM
// registers on Darwin.
- CCIfType<[v8i8, v4i16, v2i32],
+ CCIfType<[x86mmx],
CCIfSubtarget<"isTargetDarwin()",
- CCIfSubtarget<"hasSSE2()",
+ CCIfSubtarget<"hasXMMInt()",
CCPromoteToType<v2i64>>>>,
// The first 8 FP/Vector arguments are passed in XMM registers.
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfSubtarget<"hasSSE1()",
+ CCIfSubtarget<"hasXMM()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
// The first 8 256-bit vector arguments are passed in YMM registers.
@@ -192,7 +192,7 @@ def CC_X86_64_C : CallingConv<[
CCAssignToStack<32, 32>>,
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
+ CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
]>;
// Calling convention used on Win64
@@ -210,8 +210,7 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
// The first 4 MMX vector arguments are passed in GPRs.
- CCIfType<[v8i8, v4i16, v2i32, v1i64],
- CCBitConvertToType<i64>>,
+ CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
// The first 4 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
@@ -233,7 +232,7 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[f80], CCAssignToStack<0, 0>>,
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
+ CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
]>;
def CC_X86_64_GHC : CallingConv<[
@@ -246,7 +245,7 @@ def CC_X86_64_GHC : CallingConv<[
// Pass in STG registers: F1, F2, F3, F4, D1, D2
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfSubtarget<"hasSSE1()",
+ CCIfSubtarget<"hasXMM()",
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
]>;
@@ -264,12 +263,12 @@ def CC_X86_32_Common : CallingConv<[
// The first 3 float or double arguments, if marked 'inreg' and if the call
// is not a vararg call and if SSE2 is available, are passed in SSE registers.
CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
- CCIfSubtarget<"hasSSE2()",
+ CCIfSubtarget<"hasXMMInt()",
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
// The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
// registers if the call is not a vararg call.
- CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32],
+ CCIfNotVarArg<CCIfType<[x86mmx],
CCAssignToReg<[MM0, MM1, MM2]>>>,
// Integer/Float values get stored in stack slots that are 4 bytes in
@@ -300,7 +299,7 @@ def CC_X86_32_Common : CallingConv<[
// __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
// passed in the parameter area.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>;
+ CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>;
def CC_X86_32_C : CallingConv<[
// Promote i8/i16 arguments to i32.
@@ -363,7 +362,7 @@ def CC_X86_32_FastCC : CallingConv<[
// The first 3 float or double arguments, if the call is not a vararg
// call and if SSE2 is available, are passed in SSE registers.
CCIfNotVarArg<CCIfType<[f32,f64],
- CCIfSubtarget<"hasSSE2()",
+ CCIfSubtarget<"hasXMMInt()",
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
// Doubles get 8-byte slots that are 8-byte aligned.
@@ -380,3 +379,35 @@ def CC_X86_32_GHC : CallingConv<[
// Pass in STG registers: Base, Sp, Hp, R1
CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Root Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// This is the root argument convention for the X86-32 backend.
+def CC_X86_32 : CallingConv<[
+ CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+ CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
+ CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
+
+ // Otherwise, drop to normal X86-32 CC
+ CCDelegateTo<CC_X86_32_C>
+]>;
+
+// This is the root argument convention for the X86-64 backend.
+def CC_X86_64 : CallingConv<[
+ CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
+
+ // Mingw64 and native Win64 use Win64 CC
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+
+ // Otherwise, drop to normal X86-64 CC
+ CCDelegateTo<CC_X86_64_C>
+]>;
+
+// This is the argument convention used for the entire X86 backend.
+def CC_X86 : CallingConv<[
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
+ CCDelegateTo<CC_X86_32>
+]>;
diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
index 824021c..60d9d4a 100644
--- a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
@@ -68,8 +68,7 @@ namespace {
return "X86 Machine Code Emitter";
}
- void emitInstruction(const MachineInstr &MI,
- const TargetInstrDesc *Desc);
+ void emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc);
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -131,7 +130,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I) {
const TargetInstrDesc &Desc = I->getDesc();
emitInstruction(*I, &Desc);
@@ -598,9 +597,23 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
}
template<class CodeEmitter>
-void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
+void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
const TargetInstrDesc *Desc) {
DEBUG(dbgs() << MI);
+
+ // If this is a pseudo instruction, lower it.
+ switch (Desc->getOpcode()) {
+ case X86::ADD16rr_DB: Desc = &II->get(X86::OR16rr); MI.setDesc(*Desc);break;
+ case X86::ADD32rr_DB: Desc = &II->get(X86::OR32rr); MI.setDesc(*Desc);break;
+ case X86::ADD64rr_DB: Desc = &II->get(X86::OR64rr); MI.setDesc(*Desc);break;
+ case X86::ADD16ri_DB: Desc = &II->get(X86::OR16ri); MI.setDesc(*Desc);break;
+ case X86::ADD32ri_DB: Desc = &II->get(X86::OR32ri); MI.setDesc(*Desc);break;
+ case X86::ADD64ri32_DB:Desc = &II->get(X86::OR64ri32);MI.setDesc(*Desc);break;
+ case X86::ADD16ri8_DB: Desc = &II->get(X86::OR16ri8);MI.setDesc(*Desc);break;
+ case X86::ADD32ri8_DB: Desc = &II->get(X86::OR32ri8);MI.setDesc(*Desc);break;
+ case X86::ADD64ri8_DB: Desc = &II->get(X86::OR64ri8);MI.setDesc(*Desc);break;
+ }
+
MCE.processDebugLoc(MI.getDebugLoc(), true);
diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp
index f84995d..f1d7ede 100644
--- a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -14,6 +14,7 @@
#include "X86ELFWriterInfo.h"
#include "X86Relocations.h"
#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -24,8 +25,8 @@ using namespace llvm;
// Implementation of the X86ELFWriterInfo class
//===----------------------------------------------------------------------===//
-X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM)
- : TargetELFWriterInfo(TM) {
+X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_)
+ : TargetELFWriterInfo(is64Bit_, isLittleEndian_) {
EMachine = is64Bit ? EM_X86_64 : EM_386;
}
@@ -35,13 +36,13 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
if (is64Bit) {
switch(MachineRelTy) {
case X86::reloc_pcrel_word:
- return R_X86_64_PC32;
+ return ELF::R_X86_64_PC32;
case X86::reloc_absolute_word:
- return R_X86_64_32;
+ return ELF::R_X86_64_32;
case X86::reloc_absolute_word_sext:
- return R_X86_64_32S;
+ return ELF::R_X86_64_32S;
case X86::reloc_absolute_dword:
- return R_X86_64_64;
+ return ELF::R_X86_64_64;
case X86::reloc_picrel_word:
default:
llvm_unreachable("unknown x86_64 machine relocation type");
@@ -49,9 +50,9 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
} else {
switch(MachineRelTy) {
case X86::reloc_pcrel_word:
- return R_386_PC32;
+ return ELF::R_386_PC32;
case X86::reloc_absolute_word:
- return R_386_32;
+ return ELF::R_386_32;
case X86::reloc_absolute_word_sext:
case X86::reloc_absolute_dword:
case X86::reloc_picrel_word:
@@ -66,18 +67,18 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
long int Modifier) const {
if (is64Bit) {
switch(RelTy) {
- case R_X86_64_PC32: return Modifier - 4;
- case R_X86_64_32:
- case R_X86_64_32S:
- case R_X86_64_64:
+ case ELF::R_X86_64_PC32: return Modifier - 4;
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ case ELF::R_X86_64_64:
return Modifier;
default:
llvm_unreachable("unknown x86_64 relocation type");
}
} else {
switch(RelTy) {
- case R_386_PC32: return Modifier - 4;
- case R_386_32: return Modifier;
+ case ELF::R_386_PC32: return Modifier - 4;
+ case ELF::R_386_32: return Modifier;
default:
llvm_unreachable("unknown x86 relocation type");
}
@@ -88,19 +89,19 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
if (is64Bit) {
switch(RelTy) {
- case R_X86_64_PC32:
- case R_X86_64_32:
- case R_X86_64_32S:
+ case ELF::R_X86_64_PC32:
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
return 32;
- case R_X86_64_64:
+ case ELF::R_X86_64_64:
return 64;
default:
llvm_unreachable("unknown x86_64 relocation type");
}
} else {
switch(RelTy) {
- case R_386_PC32:
- case R_386_32:
+ case ELF::R_386_PC32:
+ case ELF::R_386_32:
return 32;
default:
llvm_unreachable("unknown x86 relocation type");
@@ -112,20 +113,20 @@ unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
if (is64Bit) {
switch(RelTy) {
- case R_X86_64_PC32:
+ case ELF::R_X86_64_PC32:
return true;
- case R_X86_64_32:
- case R_X86_64_32S:
- case R_X86_64_64:
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ case ELF::R_X86_64_64:
return false;
default:
llvm_unreachable("unknown x86_64 relocation type");
}
} else {
switch(RelTy) {
- case R_386_PC32:
+ case ELF::R_386_PC32:
return true;
- case R_386_32:
+ case ELF::R_386_32:
return false;
default:
llvm_unreachable("unknown x86 relocation type");
@@ -143,7 +144,7 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
unsigned RelOffset,
unsigned RelTy) const {
- if (RelTy == R_X86_64_PC32 || RelTy == R_386_PC32)
+ if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
return SymOffset - (RelOffset + 4);
else
assert("computeRelocation unknown for this relocation type");
diff --git a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h
index 342e6e6..a45b5bb 100644
--- a/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86ELFWriterInfo.h
@@ -20,25 +20,8 @@ namespace llvm {
class X86ELFWriterInfo : public TargetELFWriterInfo {
- // ELF Relocation types for X86
- enum X86RelocationType {
- R_386_NONE = 0,
- R_386_32 = 1,
- R_386_PC32 = 2
- };
-
- // ELF Relocation types for X86_64
- enum X86_64RelocationType {
- R_X86_64_NONE = 0,
- R_X86_64_64 = 1,
- R_X86_64_PC32 = 2,
- R_X86_64_32 = 10,
- R_X86_64_32S = 11,
- R_X86_64_PC64 = 24
- };
-
public:
- X86ELFWriterInfo(TargetMachine &TM);
+ X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
virtual ~X86ELFWriterInfo();
/// getRelocationType - Returns the target specific ELF Relocation type.
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index 0c70eec..9d42ac2 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -36,7 +36,7 @@
using namespace llvm;
namespace {
-
+
class X86FastISel : public FastISel {
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -46,7 +46,7 @@ class X86FastISel : public FastISel {
///
unsigned StackPtr;
- /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
/// floating point ops.
/// When SSE is available, use it for f32 operations.
/// When SSE2 is available, use it for f64 operations.
@@ -63,11 +63,18 @@ public:
virtual bool TargetSelectInstruction(const Instruction *I);
+ /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+ /// vreg is being provided by the specified load instruction. If possible,
+ /// try to fold the load as an operand to the instruction, returning true if
+ /// possible.
+ virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
+
#include "X86GenFastISel.inc"
private:
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
-
+
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
bool X86FastEmitStore(EVT VT, const Value *Val,
@@ -77,12 +84,12 @@ private:
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
-
+
bool X86SelectAddress(const Value *V, X86AddressMode &AM);
bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
bool X86SelectLoad(const Instruction *I);
-
+
bool X86SelectStore(const Instruction *I);
bool X86SelectRet(const Instruction *I);
@@ -98,7 +105,7 @@ private:
bool X86SelectSelect(const Instruction *I);
bool X86SelectTrunc(const Instruction *I);
-
+
bool X86SelectFPExt(const Instruction *I);
bool X86SelectFPTrunc(const Instruction *I);
@@ -107,9 +114,6 @@ private:
bool X86VisitIntrinsicCall(const IntrinsicInst &I);
bool X86SelectCall(const Instruction *I);
- CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
- CCAssignFn *CCAssignFnForRet(CallingConv::ID CC, bool isTailCall = false);
-
const X86InstrInfo *getInstrInfo() const {
return getTargetMachine()->getInstrInfo();
}
@@ -128,17 +132,18 @@ private:
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
- bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false);
+ bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
};
-
+
} // end anonymous namespace.
-bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
- VT = TLI.getValueType(Ty, /*HandleUnknown=*/true);
- if (VT == MVT::Other || !VT.isSimple())
+bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+ EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+ if (evt == MVT::Other || !evt.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
-
+
+ VT = evt.getSimpleVT();
// For now, require SSE/SSE2 for performing floating-point operations,
// since x87 requires additional work.
if (VT == MVT::f64 && !X86ScalarSSEf64)
@@ -157,45 +162,6 @@ bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
#include "X86GenCallingConv.inc"
-/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
-/// convention.
-CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
- bool isTaillCall) {
- if (Subtarget->is64Bit()) {
- if (CC == CallingConv::GHC)
- return CC_X86_64_GHC;
- else if (Subtarget->isTargetWin64())
- return CC_X86_Win64_C;
- else
- return CC_X86_64_C;
- }
-
- if (CC == CallingConv::X86_FastCall)
- return CC_X86_32_FastCall;
- else if (CC == CallingConv::X86_ThisCall)
- return CC_X86_32_ThisCall;
- else if (CC == CallingConv::Fast)
- return CC_X86_32_FastCC;
- else if (CC == CallingConv::GHC)
- return CC_X86_32_GHC;
- else
- return CC_X86_32_C;
-}
-
-/// CCAssignFnForRet - Selects the correct CCAssignFn for a given calling
-/// convention.
-CCAssignFn *X86FastISel::CCAssignFnForRet(CallingConv::ID CC,
- bool isTaillCall) {
- if (Subtarget->is64Bit()) {
- if (Subtarget->isTargetWin64())
- return RetCC_X86_Win64_C;
- else
- return RetCC_X86_64_C;
- }
-
- return RetCC_X86_32_C;
-}
-
/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
@@ -284,7 +250,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
break;
}
-
+
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(Opc)), AM).addReg(Val);
return true;
@@ -295,7 +261,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
-
+
// If this is a store of a simple constant, fold the constant into the store.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
unsigned Opc = 0;
@@ -312,7 +278,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
Opc = X86::MOV64mi32;
break;
}
-
+
if (Opc) {
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(Opc)), AM)
@@ -321,11 +287,11 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
return true;
}
}
-
+
unsigned ValReg = getRegForValue(Val);
if (ValReg == 0)
- return false;
-
+ return false;
+
return X86FastEmitStore(VT, ValReg, AM);
}
@@ -337,7 +303,7 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
unsigned &ResultReg) {
unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
Src, /*TODO: Kill=*/false);
-
+
if (RR != 0) {
ResultReg = RR;
return true;
@@ -354,11 +320,11 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Don't walk into other basic blocks; it's possible we haven't
// visited them yet, so the instructions may not yet be assigned
// virtual registers.
- if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
- return false;
-
- Opcode = I->getOpcode();
- U = I;
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
Opcode = C->getOpcode();
U = C;
@@ -472,7 +438,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
AM.Disp = (uint32_t)Disp;
if (X86SelectAddress(U->getOperand(0), AM))
return true;
-
+
// If we couldn't merge the sub value into this addr mode, revert back to
// our address and just match the value instead of completely failing.
AM = SavedAM;
@@ -501,7 +467,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Okay, we've committed to selecting this global. Set up the basic address.
AM.GV = GV;
-
+
// Allow the subtarget to classify the global.
unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
@@ -510,7 +476,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// FIXME: How do we know Base.Reg is free??
AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
}
-
+
// Unless the ABI requires an extra load, return a direct reference to
// the global.
if (!isGlobalStubReference(GVFlags)) {
@@ -523,7 +489,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
AM.GVOpFlags = GVFlags;
return true;
}
-
+
// Ok, we need to do a load from a stub. If we've already loaded from this
// stub, reuse the loaded pointer, otherwise emit the load now.
DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
@@ -545,14 +511,14 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
if (TLI.getPointerTy() == MVT::i64) {
Opc = X86::MOV64rm;
RC = X86::GR64RegisterClass;
-
+
if (Subtarget->isPICStyleRIPRel())
StubAM.Base.Reg = X86::RIP;
} else {
Opc = X86::MOV32rm;
RC = X86::GR32RegisterClass;
}
-
+
LoadReg = createResultReg(RC);
MachineInstrBuilder LoadMI =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
@@ -564,7 +530,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Prevent loading GV stub multiple times in same MBB.
LocalValueMap[V] = LoadReg;
}
-
+
// Now construct the final address. Note that the Disp, Scale,
// and Index values may already be set here.
AM.Base.Reg = LoadReg;
@@ -638,7 +604,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
// Okay, we've committed to selecting this global. Set up the basic address.
AM.GV = GV;
-
+
// No ABI requires an extra load for anything other than DLLImport, which
// we rejected above. Return a direct reference to the global.
if (Subtarget->isPICStyleRIPRel()) {
@@ -651,7 +617,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
} else if (Subtarget->isPICStyleGOT()) {
AM.GVOpFlags = X86II::MO_GOTOFF;
}
-
+
return true;
}
@@ -674,7 +640,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
/// X86SelectStore - Select and emit code to implement store instructions.
bool X86FastISel::X86SelectStore(const Instruction *I) {
- EVT VT;
+ MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
return false;
@@ -724,7 +690,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
- CCInfo.AnalyzeReturn(Outs, CCAssignFnForRet(CC));
+ CCInfo.AnalyzeReturn(Outs, RetCC_X86);
const Value *RV = Ret->getOperand(0);
unsigned Reg = getRegForValue(RV);
@@ -736,7 +702,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
return false;
CCValAssign &VA = ValLocs[0];
-
+
// Don't bother handling odd stuff for now.
if (VA.getLocInfo() != CCValAssign::Full)
return false;
@@ -745,7 +711,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
return false;
// TODO: For now, don't try to handle cases where getLocInfo()
// says Full but the types don't match.
- if (VA.getValVT() != TLI.getValueType(RV->getType()))
+ if (TLI.getValueType(RV->getType()) != VA.getValVT())
return false;
// The calling-convention tables for x87 returns don't tell
@@ -775,7 +741,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
/// X86SelectLoad - Select and emit code to implement load instructions.
///
bool X86FastISel::X86SelectLoad(const Instruction *I) {
- EVT VT;
+ MVT VT;
if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
return false;
@@ -826,11 +792,11 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
EVT VT) {
unsigned Op0Reg = getRegForValue(Op0);
if (Op0Reg == 0) return false;
-
+
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Op1))
Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
-
+
// We have two options: compare with register or immediate. If the RHS of
// the compare is an immediate that we can fold into this compare, use
// CMPri, otherwise use CMPrr.
@@ -842,23 +808,23 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
return true;
}
}
-
+
unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
if (CompareOpc == 0) return false;
-
+
unsigned Op1Reg = getRegForValue(Op1);
if (Op1Reg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
.addReg(Op0Reg)
.addReg(Op1Reg);
-
+
return true;
}
bool X86FastISel::X86SelectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
- EVT VT;
+ MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT))
return false;
@@ -869,13 +835,13 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
case CmpInst::FCMP_OEQ: {
if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
return false;
-
+
unsigned EReg = createResultReg(&X86::GR8RegClass);
unsigned NPReg = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::SETNPr), NPReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
UpdateValueMap(I, ResultReg);
return true;
@@ -908,7 +874,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break;
case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
-
+
case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
@@ -930,7 +896,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
// Emit a compare of Op0/Op1.
if (!X86FastEmitCompare(Op0, Op1, VT))
return false;
-
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
UpdateValueMap(I, ResultReg);
return true;
@@ -995,7 +961,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break;
case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
-
+
case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
@@ -1009,7 +975,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
default:
return false;
}
-
+
const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
if (SwapArgs)
std::swap(Op0, Op1);
@@ -1017,7 +983,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Emit a compare of the LHS and RHS, setting the flags.
if (!X86FastEmitCompare(Op0, Op1, VT))
return false;
-
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
.addMBB(TrueMBB);
@@ -1070,8 +1036,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
}
const TargetInstrDesc &TID = MI.getDesc();
- if (TID.hasUnmodeledSideEffects() ||
- TID.hasImplicitDefOfPhysReg(X86::EFLAGS))
+ if (TID.hasImplicitDefOfPhysReg(X86::EFLAGS) ||
+ MI.hasUnmodeledSideEffects())
break;
}
@@ -1147,22 +1113,22 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
return false;
}
- EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
- if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
return false;
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (Op0Reg == 0) return false;
-
+
// Fold immediate in shl(x,3).
if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
UpdateValueMap(I, ResultReg);
return true;
}
-
+
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
@@ -1183,23 +1149,26 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
}
bool X86FastISel::X86SelectSelect(const Instruction *I) {
- EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
- if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
return false;
-
+
+ // We only use cmov here, if we don't have a cmov instruction bail.
+ if (!Subtarget->hasCMov()) return false;
+
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
- if (VT.getSimpleVT() == MVT::i16) {
+ if (VT == MVT::i16) {
Opc = X86::CMOVE16rr;
RC = &X86::GR16RegClass;
- } else if (VT.getSimpleVT() == MVT::i32) {
+ } else if (VT == MVT::i32) {
Opc = X86::CMOVE32rr;
RC = &X86::GR32RegClass;
- } else if (VT.getSimpleVT() == MVT::i64) {
+ } else if (VT == MVT::i64) {
Opc = X86::CMOVE64rr;
RC = &X86::GR64RegClass;
} else {
- return false;
+ return false;
}
unsigned Op0Reg = getRegForValue(I->getOperand(0));
@@ -1264,7 +1233,7 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
return false;
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
-
+
// This code only handles truncation to byte right now.
if (DstVT != MVT::i8 && DstVT != MVT::i1)
// All other cases should be handled by the tblgen generated code.
@@ -1335,21 +1304,21 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// Grab the frame index.
X86AddressMode AM;
if (!X86SelectAddress(Slot, AM)) return false;
-
+
if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
-
+
return true;
}
case Intrinsic::objectsize: {
ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
const Type *Ty = I.getCalledFunction()->getReturnType();
-
+
assert(CI && "Non-constant type in Intrinsic::objectsize?");
-
- EVT VT;
+
+ MVT VT;
if (!isTypeLegal(Ty, VT))
return false;
-
+
unsigned OpC = 0;
if (VT == MVT::i32)
OpC = X86::MOV32ri;
@@ -1357,7 +1326,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
OpC = X86::MOV64ri;
else
return false;
-
+
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg).
addImm(CI->isZero() ? -1ULL : 0);
@@ -1392,7 +1361,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
const Type *RetTy =
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
- EVT VT;
+ MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
@@ -1429,7 +1398,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
ResultReg = DestReg1+1;
else
ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
-
+
unsigned Opc = X86::SETBr;
if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
Opc = X86::SETOr;
@@ -1476,7 +1445,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Handle *simple* calls for now.
const Type *RetTy = CS.getType();
- EVT RetVT;
+ MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
else if (!isTypeLegal(RetTy, RetVT, true))
@@ -1506,7 +1475,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Deal with call operands first.
SmallVector<const Value *, 8> ArgVals;
SmallVector<unsigned, 8> Args;
- SmallVector<EVT, 8> ArgVTs;
+ SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
Args.reserve(CS.arg_size());
ArgVals.reserve(CS.arg_size());
@@ -1532,7 +1501,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
return false;
const Type *ArgTy = (*i)->getType();
- EVT ArgVT;
+ MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
@@ -1547,13 +1516,13 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
-
+
// Allocate shadow area for Win64
- if (Subtarget->isTargetWin64()) {
- CCInfo.AllocateStack(32, 8);
+ if (Subtarget->isTargetWin64()) {
+ CCInfo.AllocateStack(32, 8);
}
- CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -1570,7 +1539,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
CCValAssign &VA = ArgLocs[i];
unsigned Arg = Args[VA.getValNo()];
EVT ArgVT = ArgVTs[VA.getValNo()];
-
+
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
@@ -1578,20 +1547,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
case CCValAssign::SExt: {
bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
- Emitted = true;
+ assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
ArgVT = VA.getLocVT();
break;
}
case CCValAssign::ZExt: {
bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
- assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
- Emitted = true;
+ assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
ArgVT = VA.getLocVT();
break;
}
case CCValAssign::AExt: {
+ // We don't handle MMX parameters yet.
+ if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() == 128)
+ return false;
bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
if (!Emitted)
@@ -1600,21 +1570,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (!Emitted)
Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
-
- assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
+
+ assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
ArgVT = VA.getLocVT();
break;
}
case CCValAssign::BCvt: {
- unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
- ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false);
+ unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
+ ISD::BITCAST, Arg, /*TODO: Kill=*/false);
assert(BC != 0 && "Failed to emit a bitcast!");
Arg = BC;
ArgVT = VA.getLocVT();
break;
}
}
-
+
if (VA.isRegLoc()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
VA.getLocReg()).addReg(Arg);
@@ -1625,7 +1595,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
AM.Base.Reg = StackPtr;
AM.Disp = LocMemOffset;
const Value *ArgVal = ArgVals[VA.getValNo()];
-
+
// If this is a really simple value, emit this with the Value* version of
// X86FastEmitStore. If it isn't simple, we don't want to do this, as it
// can cause us to reevaluate the argument.
@@ -1637,13 +1607,13 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
}
// ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
+ // GOT pointer.
if (Subtarget->isPICStyleGOT()) {
unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
X86::EBX).addReg(Base);
}
-
+
// Issue the call.
MachineInstrBuilder MIB;
if (CalleeOp) {
@@ -1657,7 +1627,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
CallOpc = X86::CALL32r;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
.addReg(CalleeOp);
-
+
} else {
// Direct call.
assert(GV && "Not a direct call");
@@ -1668,10 +1638,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
CallOpc = X86::CALL64pcrel32;
else
CallOpc = X86::CALLpcrel32;
-
+
// See if we need any target-specific flags on the GV operand.
unsigned char OpFlags = 0;
-
+
// On ELF targets, in both X86-64 and X86-32 mode, direct calls to
// external symbols most go through the PLT in PIC mode. If the symbol
// has hidden or protected visibility, or if it is static or local, then
@@ -1688,8 +1658,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// automatically synthesizes these stubs.
OpFlags = X86II::MO_DARWIN_STUB;
}
-
-
+
+
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
.addGlobalAddress(GV, 0, OpFlags);
}
@@ -1709,7 +1679,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Now handle call return value (if any).
SmallVector<unsigned, 4> UsedRegs;
- if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
+ if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
@@ -1718,7 +1688,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
EVT CopyVT = RVLocs[0].getValVT();
TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
-
+
// If this is a call to a function that returns an fp value on the x87 fp
// stack, but where we prefer to use the value in xmm registers, copy it
// out as F80 and use a truncate to move it from fp stack reg to xmm reg.
@@ -1756,7 +1726,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (AndToI1) {
// Mask out all but lowest bit for some call which produces an i1.
unsigned AndResult = createResultReg(X86::GR8RegisterClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
ResultReg = AndResult;
}
@@ -1823,14 +1793,14 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
}
unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
- EVT VT;
+ MVT VT;
if (!isTypeLegal(C->getType(), VT))
return false;
-
+
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: return false;
case MVT::i8:
Opc = X86::MOV8rm;
@@ -1871,7 +1841,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
// No f80 support yet.
return false;
}
-
+
// Materialize addresses with LEA instructions.
if (isa<GlobalValue>(C)) {
X86AddressMode AM;
@@ -1887,14 +1857,14 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
}
return 0;
}
-
+
// MachineConstantPool wants an explicit alignment.
unsigned Align = TD.getPrefTypeAlignment(C->getType());
if (Align == 0) {
// Alignment of vector types. FIXME!
Align = TD.getTypeAllocSize(C->getType());
}
-
+
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
unsigned char OpFlag = 0;
@@ -1941,6 +1911,34 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
return ResultReg;
}
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction. If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ X86AddressMode AM;
+ if (!X86SelectAddress(LI->getOperand(0), AM))
+ return false;
+
+ X86InstrInfo &XII = (X86InstrInfo&)TII;
+
+ unsigned Size = TD.getTypeAllocSize(LI->getType());
+ unsigned Alignment = LI->getAlignment();
+
+ SmallVector<MachineOperand, 8> AddrOps;
+ AM.getFullAddress(AddrOps);
+
+ MachineInstr *Result =
+ XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+ if (Result == 0) return false;
+
+ FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
+ MI->eraseFromParent();
+ return true;
+}
+
+
namespace llvm {
llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
return new X86FastISel(funcInfo);
diff --git a/contrib/llvm/lib/Target/X86/X86FixupKinds.h b/contrib/llvm/lib/Target/X86/X86FixupKinds.h
index 96e0aae..17d242a 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupKinds.h
+++ b/contrib/llvm/lib/Target/X86/X86FixupKinds.h
@@ -15,11 +15,17 @@
namespace llvm {
namespace X86 {
enum Fixups {
- reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch.
- reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1
- reloc_pcrel_2byte, // 16-bit pcrel, e.g. callw
- reloc_riprel_4byte, // 32-bit rip-relative
- reloc_riprel_4byte_movq_load // 32-bit rip-relative in movq
+ reloc_riprel_4byte = FirstTargetFixupKind, // 32-bit rip-relative
+ reloc_riprel_4byte_movq_load, // 32-bit rip-relative in movq
+ reloc_signed_4byte, // 32-bit signed. Unlike FK_Data_4
+ // this will be sign extended at
+ // runtime.
+ reloc_global_offset_table, // 32-bit, relative to the start
+ // of the instruction. Used only
+ // for _GLOBAL_OFFSET_TABLE_.
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
}
}
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
index e6ebf66..3aaa693 100644
--- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -32,6 +32,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -51,6 +52,7 @@ namespace {
struct FPS : public MachineFunctionPass {
static char ID;
FPS() : MachineFunctionPass(ID) {
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
// This is really only to keep valgrind quiet.
// The logic in isLive() is too much for it.
memset(Stack, 0, sizeof(Stack));
@@ -59,6 +61,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<EdgeBundles>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -94,7 +97,7 @@ namespace {
// FixStack[i] == getStackEntry(i) for all i < FixCount.
unsigned char FixStack[8];
- LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {}
+ LiveBundle() : Mask(0), FixCount(0) {}
// Have the live registers been assigned a stack order yet?
bool isFixed() const { return !Mask || FixCount; }
@@ -104,10 +107,8 @@ namespace {
// with no live FP registers.
SmallVector<LiveBundle, 8> LiveBundles;
- // Map each MBB in the current function to an (ingoing, outgoing) index into
- // LiveBundles. Blocks with no FP registers live in or out map to (0, 0)
- // and are not actually stored in the map.
- DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle;
+ // The edge bundle analysis provides indices into the LiveBundles vector.
+ EdgeBundles *Bundles;
// Return a bitmask of FP registers in block's live-in list.
unsigned calcLiveInMask(MachineBasicBlock *MBB) {
@@ -167,7 +168,8 @@ namespace {
/// getStackEntry - Return the X86::FP<n> register in register ST(i).
unsigned getStackEntry(unsigned STi) const {
- assert(STi < StackTop && "Access past stack top!");
+ if (STi >= StackTop)
+ report_fatal_error("Access past stack top!");
return Stack[StackTop-1-STi];
}
@@ -180,7 +182,8 @@ namespace {
// pushReg - Push the specified FP<n> register onto the stack.
void pushReg(unsigned Reg) {
assert(Reg < 8 && "Register number out of range!");
- assert(StackTop < 8 && "Stack overflow!");
+ if (StackTop >= 8)
+ report_fatal_error("Stack overflow!");
Stack[StackTop] = Reg;
RegMap[Reg] = StackTop++;
}
@@ -197,7 +200,8 @@ namespace {
std::swap(RegMap[RegNo], RegMap[RegOnTop]);
// Swap stack slot contents.
- assert(RegMap[RegOnTop] < StackTop);
+ if (RegMap[RegOnTop] >= StackTop)
+ report_fatal_error("Access past stack top!");
std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
// Emit an fxch to update the runtime processors version of the state.
@@ -281,6 +285,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
// Early exit.
if (!FPIsUsed) return false;
+ Bundles = &getAnalysis<EdgeBundles>();
TII = MF.getTarget().getInstrInfo();
// Prepare cross-MBB liveness.
@@ -305,7 +310,6 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
if (Processed.insert(BB))
Changed |= processBasicBlock(MF, *BB);
- BlockBundle.clear();
LiveBundles.clear();
return Changed;
@@ -318,90 +322,16 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
/// registers may be implicitly defined, or not used by all successors.
void FPS::bundleCFG(MachineFunction &MF) {
assert(LiveBundles.empty() && "Stale data in LiveBundles");
- assert(BlockBundle.empty() && "Stale data in BlockBundle");
- SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp;
+ LiveBundles.resize(Bundles->getNumBundles());
- // LiveBundle[0] is the empty live-in set.
- LiveBundles.resize(1);
-
- // First gather the actual live-in masks for all MBBs.
+ // Gather the actual live-in masks for all MBBs.
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = I;
const unsigned Mask = calcLiveInMask(MBB);
if (!Mask)
continue;
- // Ingoing bundle index.
- unsigned &Idx = BlockBundle[MBB].first;
- // Already assigned an ingoing bundle?
- if (Idx)
- continue;
- // Allocate a new LiveBundle struct for this block's live-ins.
- const unsigned BundleIdx = Idx = LiveBundles.size();
- DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#"
- << MBB->getNumber());
- LiveBundles.push_back(Mask);
- LiveBundle &Bundle = LiveBundles.back();
-
- // Make sure all predecessors have the same live-out set.
- PropUp.insert(MBB);
-
- // Keep pushing liveness up and down the CFG until convergence.
- // Only critical edges cause iteration here, but when they do, multiple
- // blocks can be assigned to the same LiveBundle index.
- do {
- // Assign BundleIdx as liveout from predecessors in PropUp.
- for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(),
- E = PropUp.end(); I != E; ++I) {
- MachineBasicBlock *MBB = *I;
- for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(),
- LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) {
- MachineBasicBlock *PredMBB = *LinkI;
- // PredMBB's liveout bundle should be set to LIIdx.
- unsigned &Idx = BlockBundle[PredMBB].second;
- if (Idx) {
- assert(Idx == BundleIdx && "Inconsistent CFG");
- continue;
- }
- Idx = BundleIdx;
- DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber());
- // Propagate to siblings.
- if (PredMBB->succ_size() > 1)
- PropDown.insert(PredMBB);
- }
- }
- PropUp.clear();
-
- // Assign BundleIdx as livein to successors in PropDown.
- for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(),
- E = PropDown.end(); I != E; ++I) {
- MachineBasicBlock *MBB = *I;
- for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(),
- LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) {
- MachineBasicBlock *SuccMBB = *LinkI;
- // LinkMBB's livein bundle should be set to BundleIdx.
- unsigned &Idx = BlockBundle[SuccMBB].first;
- if (Idx) {
- assert(Idx == BundleIdx && "Inconsistent CFG");
- continue;
- }
- Idx = BundleIdx;
- DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber());
- // Propagate to siblings.
- if (SuccMBB->pred_size() > 1)
- PropUp.insert(SuccMBB);
- // Also accumulate the bundle liveness mask from the liveins here.
- Bundle.Mask |= calcLiveInMask(SuccMBB);
- }
- }
- PropDown.clear();
- } while (!PropUp.empty());
- DEBUG({
- dbgs() << " live:";
- for (unsigned i = 0; i < 8; ++i)
- if (Bundle.Mask & (1<<i))
- dbgs() << " %FP" << i;
- dbgs() << '\n';
- });
+ // Update MBB ingoing bundle mask.
+ LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
}
}
@@ -489,13 +419,15 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
return Changed;
}
-/// setupBlockStack - Use the BlockBundle map to set up our model of the stack
+/// setupBlockStack - Use the live bundles to set up our model of the stack
/// to match predecessors' live out stack.
void FPS::setupBlockStack() {
DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
<< " derived from " << MBB->getName() << ".\n");
StackTop = 0;
- const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first];
+ // Get the live-in bundle for MBB.
+ const LiveBundle &Bundle =
+ LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
if (!Bundle.Mask) {
DEBUG(dbgs() << "Block has no FP live-ins.\n");
@@ -532,7 +464,8 @@ void FPS::finishBlockStack() {
DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
<< " derived from " << MBB->getName() << ".\n");
- unsigned BundleIdx = BlockBundle.lookup(MBB).second;
+ // Get MBB's live-out bundle.
+ unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
LiveBundle &Bundle = LiveBundles[BundleIdx];
// We may need to kill and define some registers to match successors.
@@ -572,7 +505,8 @@ namespace {
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
}
- friend bool ATTRIBUTE_USED operator<(unsigned V, const TableEntry &TE) {
+ friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V,
+ const TableEntry &TE) {
return V < TE.from;
}
};
@@ -824,7 +758,8 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
MachineInstr* MI = I;
DebugLoc dl = MI->getDebugLoc();
ASSERT_SORTED(PopTable);
- assert(StackTop > 0 && "Cannot pop empty stack!");
+ if (StackTop == 0)
+ report_fatal_error("Cannot pop empty stack!");
RegMap[Stack[--StackTop]] = ~0; // Update state
// Check to see if there is a popping version of this instruction...
@@ -1016,7 +951,8 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
MI->getOpcode() == X86::ISTT_FP32m ||
MI->getOpcode() == X86::ISTT_FP64m ||
MI->getOpcode() == X86::ST_FP80m) {
- assert(StackTop > 0 && "Stack empty??");
+ if (StackTop == 0)
+ report_fatal_error("Stack empty??");
--StackTop;
} else if (KillsSrc) { // Last use of operand?
popStackAfter(I);
@@ -1047,7 +983,8 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
// If this is the last use of the source register, just make sure it's on
// the top of the stack.
moveToTop(Reg, I);
- assert(StackTop > 0 && "Stack cannot be empty!");
+ if (StackTop == 0)
+ report_fatal_error("Stack cannot be empty!");
--StackTop;
pushReg(getFPReg(MI->getOperand(0)));
} else {
@@ -1300,7 +1237,6 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
///
void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
MachineInstr *MI = I;
- DebugLoc dl = MI->getDebugLoc();
switch (MI->getOpcode()) {
default: llvm_unreachable("Unknown SpecialFP instruction!");
case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
@@ -1341,7 +1277,8 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
std::swap(RegMap[RegNo], RegMap[RegOnTop]);
// Swap stack slot contents.
- assert(RegMap[RegOnTop] < StackTop);
+ if (RegMap[RegOnTop] >= StackTop)
+ report_fatal_error("Access past stack top!");
std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
break;
}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
new file mode 100644
index 0000000..0a3f931
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -0,0 +1,994 @@
+//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86FrameLowering.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallSet.h"
+
+using namespace llvm;
+
+// FIXME: completely move here.
+extern cl::opt<bool> ForceStackAlign;
+
+bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineModuleInfo &MMI = MF.getMMI();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+ return (DisableFramePointerElim(MF) ||
+ RI->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken() ||
+ MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+ MMI.callsUnwindInit());
+}
+
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::SUB64ri8;
+ return X86::SUB64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::SUB32ri8;
+ return X86::SUB32ri;
+ }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::ADD64ri8;
+ return X86::ADD64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::ADD32ri8;
+ return X86::ADD32ri;
+ }
+}
+
+/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
+/// when it reaches the "return" instruction. We can then pop a stack object
+/// to this register without worry about clobbering it.
+static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetRegisterInfo &TRI,
+ bool Is64Bit) {
+ const MachineFunction *MF = MBB.getParent();
+ const Function *F = MF->getFunction();
+ if (!F || MF->getMMI().callsEHReturn())
+ return 0;
+
+ static const unsigned CallerSavedRegs32Bit[] = {
+ X86::EAX, X86::EDX, X86::ECX
+ };
+
+ static const unsigned CallerSavedRegs64Bit[] = {
+ X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
+ X86::R8, X86::R9, X86::R10, X86::R11
+ };
+
+ unsigned Opc = MBBI->getOpcode();
+ switch (Opc) {
+ default: return 0;
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ SmallSet<unsigned, 8> Uses;
+ for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MBBI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
+ Uses.insert(*AsI);
+ }
+
+ const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
+ for (; *CS; ++CS)
+ if (!Uses.count(*CS))
+ return *CS;
+ }
+ }
+
+ return 0;
+}
+
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, int64_t NumBytes,
+ bool Is64Bit, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI) {
+ bool isSub = NumBytes < 0;
+ uint64_t Offset = isSub ? -NumBytes : NumBytes;
+ unsigned Opc = isSub ?
+ getSUBriOpcode(Is64Bit, Offset) :
+ getADDriOpcode(Is64Bit, Offset);
+ uint64_t Chunk = (1LL << 31) - 1;
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ while (Offset) {
+ uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+ if (ThisVal == (Is64Bit ? 8 : 4)) {
+ // Use push / pop instead.
+ unsigned Reg = isSub
+ ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
+ : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+ if (Reg) {
+ Opc = isSub
+ ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
+ : (Is64Bit ? X86::POP64r : X86::POP32r);
+ BuildMI(MBB, MBBI, DL, TII.get(Opc))
+ .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
+ Offset -= ThisVal;
+ continue;
+ }
+ }
+
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addImm(ThisVal);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ Offset -= ThisVal;
+ }
+}
+
+/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
+static
+void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ if (MBBI == MBB.begin()) return;
+
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ }
+}
+
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
+static
+void mergeSPUpdatesDown(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ // FIXME: THIS ISN'T RUN!!!
+ return;
+
+ if (MBBI == MBB.end()) return;
+
+ MachineBasicBlock::iterator NI = llvm::next(MBBI);
+ if (NI == MBB.end()) return;
+
+ unsigned Opc = NI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ }
+}
+
+/// mergeSPUpdates - Checks the instruction before/after the passed
+/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD and a negative for
+/// SUB.
+static int mergeSPUpdates(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr,
+ bool doMergeWithPrevious) {
+ if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
+ (!doMergeWithPrevious && MBBI == MBB.end()))
+ return 0;
+
+ MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
+ MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
+ unsigned Opc = PI->getOpcode();
+ int Offset = 0;
+
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr){
+ Offset += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ Offset -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ }
+
+ return Offset;
+}
+
+static bool isEAXLiveIn(MachineFunction &MF) {
+ for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
+ EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
+ unsigned Reg = II->first;
+
+ if (Reg == X86::EAX || Reg == X86::AX ||
+ Reg == X86::AH || Reg == X86::AL)
+ return true;
+ }
+
+ return false;
+}
+
+void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
+ MCSymbol *Label,
+ unsigned FramePtr) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ if (CSI.empty()) return;
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ const TargetData *TD = TM.getTargetData();
+ bool HasFP = hasFP(MF);
+
+ // Calculate amount of bytes used for return address storing.
+ int stackGrowth = -TD->getPointerSize();
+
+ // FIXME: This is dirty hack. The code itself is pretty mess right now.
+ // It should be rewritten from scratch and generalized sometimes.
+
+ // Determine maximum offset (minumum due to stack growth).
+ int64_t MaxOffset = 0;
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I)
+ MaxOffset = std::min(MaxOffset,
+ MFI->getObjectOffset(I->getFrameIdx()));
+
+ // Calculate offsets.
+ int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+ unsigned Reg = I->getReg();
+ Offset = MaxOffset - Offset + saveAreaOffset;
+
+ // Don't output a new machine move if we're re-saving the frame
+ // pointer. This happens when the PrologEpilogInserter has inserted an extra
+ // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
+ // generates one when frame pointers are used. If we generate a "machine
+ // move" for this extra "PUSH", the linker will lose track of the fact that
+ // the frame pointer should have the value of the first "PUSH" when it's
+ // trying to unwind.
+ //
+ // FIXME: This looks inelegant. It's possibly correct, but it's covering up
+ // another bug. I.e., one where we generate a prolog like this:
+ //
+ // pushl %ebp
+ // movl %esp, %ebp
+ // pushl %ebp
+ // pushl %esi
+ // ...
+ //
+ // The immediate re-push of EBP is unnecessary. At the least, it's an
+ // optimization bug. EBP can be used as a scratch register in certain
+ // cases, but probably not when we have a frame pointer.
+ if (HasFP && FramePtr == Reg)
+ continue;
+
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+ }
+}
+
+/// emitPrologue - Push callee-saved registers onto the stack, which
+/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
+/// space for local variables. Also emit labels used by the exception handler to
+/// generate the exception handling frames.
+void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *Fn = MF.getFunction();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ !Fn->doesNotThrow() || UnwindTablesMandatory;
+ uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
+ uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
+ bool HasFP = hasFP(MF);
+ bool Is64Bit = STI.is64Bit();
+ bool IsWin64 = STI.isTargetWin64();
+ unsigned StackAlign = getStackAlignment();
+ unsigned SlotSize = RegInfo->getSlotSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+
+ DebugLoc DL;
+
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum SlotSize.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else if (MaxAlign < SlotSize)
+ MaxAlign = SlotSize;
+ }
+
+ // Add RETADDR move area to callee saved frame size.
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ X86FI->setCalleeSavedFrameSize(
+ X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
+
+ // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
+ // function, and use up to 128 bytes of stack space, don't have a frame
+ // pointer, calls, or dynamic alloca then we do not need to adjust the
+ // stack pointer (we fit in the Red Zone).
+ if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
+ !RegInfo->needsStackRealignment(MF) &&
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ !IsWin64) { // Win64 has no Red Zone
+ uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
+ if (HasFP) MinSize += SlotSize;
+ StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
+ MFI->setStackSize(StackSize);
+ }
+
+ // Insert stack pointer adjustment for later moving of return addr. Only
+ // applies to tail call optimized functions where the callee argument stack
+ // size is bigger than the callers.
+ if (TailCallReturnAddrDelta < 0) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(-TailCallReturnAddrDelta);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ }
+
+ // Mapping for machine moves:
+ //
+ // DST: VirtualFP AND
+ // SRC: VirtualFP => DW_CFA_def_cfa_offset
+ // ELSE => DW_CFA_def_cfa
+ //
+ // SRC: VirtualFP AND
+ // DST: Register => DW_CFA_def_cfa_register
+ //
+ // ELSE
+ // OFFSET < 0 => DW_CFA_offset_extended_sf
+ // REG < 64 => DW_CFA_offset + Reg
+ // ELSE => DW_CFA_offset_extended
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ const TargetData *TD = MF.getTarget().getTargetData();
+ uint64_t NumBytes = 0;
+ int stackGrowth = -TD->getPointerSize();
+
+ if (HasFP) {
+ // Calculate required stack adjustment.
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (RegInfo->needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+
+ NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+
+ // Get the offset of the stack slot for the EBP register, which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save EBP/RBP into the appropriate stack slot.
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+ .addReg(FramePtr, RegState::Kill);
+
+ if (needsFrameMoves) {
+ // Mark the place where EBP/RBP was saved.
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+
+ // Define the current CFA rule to use the provided offset.
+ if (StackSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ }
+
+ // Change the rule for the FramePtr to be an "offset" rule.
+ MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
+ MachineLocation FPSrc(FramePtr);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ // Update EBP with the new base value...
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+ .addReg(StackPtr);
+
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+
+ // Define the current CFA to use the EBP/RBP register.
+ MachineLocation FPDst(FramePtr);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ }
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(FramePtr);
+
+ // Realign stack
+ if (RegInfo->needsStackRealignment(MF)) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
+ StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ } else {
+ NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+ }
+
+ // Skip the callee-saved push instructions.
+ bool PushedRegs = false;
+ int StackOffset = 2 * stackGrowth;
+
+ while (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == X86::PUSH32r ||
+ MBBI->getOpcode() == X86::PUSH64r)) {
+ PushedRegs = true;
+ ++MBBI;
+
+ if (!HasFP && needsFrameMoves) {
+ // Mark callee-saved push instruction.
+ MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+ // Define the current CFA rule to use the provided offset.
+ unsigned Ptr = StackSize ?
+ MachineLocation::VirtualFP : StackPtr;
+ MachineLocation SPDst(Ptr);
+ MachineLocation SPSrc(Ptr, StackOffset);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ StackOffset += stackGrowth;
+ }
+ }
+
+ DL = MBB.findDebugLoc(MBBI);
+
+ // If there is an SUB32ri of ESP immediately before this instruction, merge
+ // the two. This can be the case when tail call elimination is enabled and
+ // the callee has more arguments then the caller.
+ NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately after this
+ // instruction, merge the two instructions.
+ mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
+ // Adjust stack pointer: ESP -= numbytes.
+
+ // Windows and cygwin/mingw require a prologue helper routine when allocating
+ // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
+ // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
+ // stack and adjust the stack pointer in one go. The 64-bit version of
+ // __chkstk is only responsible for probing the stack. The 64-bit prologue is
+ // responsible for adjusting the stack pointer. Touching the stack at 4K
+ // increments is necessary to ensure that the guard pages used by the OS
+ // virtual memory manager are allocated in correct sequence.
+ if (NumBytes >= 4096 &&
+ (STI.isTargetCygMing() || STI.isTargetWin32()) &&
+ !STI.isTargetEnvMacho()) {
+ // Check whether EAX is livein for this function.
+ bool isEAXAlive = isEAXLiveIn(MF);
+
+ const char *StackProbeSymbol =
+ STI.isTargetWindows() ? "_chkstk" : "_alloca";
+ if (Is64Bit && STI.isTargetCygMing())
+ StackProbeSymbol = "__chkstk";
+ unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+ if (!isEAXAlive) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ } else {
+ // Save EAX
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+ .addReg(X86::EAX, RegState::Kill);
+
+ // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
+ // allocated bytes for EAX.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes - 4);
+ BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes - 4);
+ MBB.insert(MBBI, MI);
+ }
+ } else if (NumBytes >= 4096 &&
+ STI.isTargetWin64() &&
+ !STI.isTargetEnvMacho()) {
+ // Sanity check that EAX is not livein for this function. It should
+ // not be, so throw an assert.
+ assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
+
+ // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+ // Function prologue is responsible for adjusting the stack pointer.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
+ .addExternalSymbol("__chkstk")
+ .addReg(StackPtr, RegState::Define | RegState::Implicit);
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ TII, *RegInfo);
+ } else if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ TII, *RegInfo);
+
+ if ((NumBytes || PushedRegs) && needsFrameMoves) {
+ // Mark end of stack pointer adjustment.
+ MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+ if (!HasFP && NumBytes) {
+ // Define the current CFA rule to use the provided offset.
+ if (StackSize) {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP,
+ -StackSize + stackGrowth);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+ }
+ }
+
+ // Emit DWARF info specifying the offsets of the callee-saved registers.
+ if (PushedRegs)
+ emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
+ }
+}
+
+void X86FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86InstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI != MBB.end() && "Returning block has no instructions");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+ bool Is64Bit = STI.is64Bit();
+ unsigned StackAlign = getStackAlignment();
+ unsigned SlotSize = RegInfo->getSlotSize();
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned StackPtr = RegInfo->getStackRegister();
+
+ switch (RetOpcode) {
+ default:
+ llvm_unreachable("Can only insert epilog into returning blocks");
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64:
+ break; // These are ok
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+ uint64_t MaxAlign = MFI->getMaxAlignment();
+ unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else
+ MaxAlign = MaxAlign ? MaxAlign : 4;
+ }
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment.
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (RegInfo->needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+ NumBytes = FrameSize - CSSize;
+
+ // Pop EBP.
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+ } else {
+ NumBytes = StackSize - CSSize;
+ }
+
+ // Skip the callee-saved pop instructions.
+ MachineBasicBlock::iterator LastCSPop = MBBI;
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+
+ if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
+ !PI->getDesc().isTerminator())
+ break;
+
+ --MBBI;
+ }
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately before this
+ // instruction, merge the two instructions.
+ if (NumBytes || MFI->hasVarSizedObjects())
+ mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ // If dynamic alloca is used, then reset esp to point to the last callee-saved
+ // slot before popping them off! Same applies for the case, when stack was
+ // realigned.
+ if (RegInfo->needsStackRealignment(MF)) {
+ // We cannot use LEA here, because stack pointer was realigned. We need to
+ // deallocate local frame back.
+ if (CSSize) {
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ MBBI = prior(LastCSPop);
+ }
+
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(FramePtr);
+ } else if (MFI->hasVarSizedObjects()) {
+ if (CSSize) {
+ unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
+ MachineInstr *MI =
+ addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+ FramePtr, false, -CSSize);
+ MBB.insert(MBBI, MI);
+ } else {
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+ .addReg(FramePtr);
+ }
+ } else if (NumBytes) {
+ // Adjust stack pointer back: ESP += numbytes.
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+ }
+
+ // We're returning from function via eh_return.
+ if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &DestAddr = MBBI->getOperand(0);
+ assert(DestAddr.isReg() && "Offset should be in register!");
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(DestAddr.getReg());
+ } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
+ RetOpcode == X86::TCRETURNmi ||
+ RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
+ RetOpcode == X86::TCRETURNmi64) {
+ bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int MaxTCDelta = X86FI->getTCReturnAddrDelta();
+ int Offset = 0;
+ assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
+
+ // Incoporate the retaddr area.
+ Offset = StackAdj-MaxTCDelta;
+ assert(Offset >= 0 && "Offset should never be negative");
+
+ if (Offset) {
+ // Check for possible merge with preceeding ADD instruction.
+ Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
+ }
+
+ // Jump to label or value in register.
+ if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+ ? X86::TAILJMPd : X86::TAILJMPd64));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+ } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
+ ? X86::TAILJMPm : X86::TAILJMPm64));
+ for (unsigned i = 0; i != 5; ++i)
+ MIB.addOperand(MBBI->getOperand(i));
+ } else if (RetOpcode == X86::TCRETURNri64) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
+ (X86FI->getTCReturnAddrDelta() < 0)) {
+ // Add the return addr area delta back since we are not tail calling.
+ int delta = -1*X86FI->getTCReturnAddrDelta();
+ MBBI = MBB.getLastNonDebugInstr();
+
+ // Check for possible merge with preceeding ADD instruction.
+ delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
+ }
+}
+
+void
+X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth = (STI.is64Bit() ? -8 : -4);
+ const X86RegisterInfo *RI = TM.getRegisterInfo();
+
+ // Initial state of the frame pointer is esp+stackGrowth.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(RI->getStackRegister(), stackGrowth);
+ Moves.push_back(MachineMove(0, Dst, Src));
+
+ // Add return address to move list
+ MachineLocation CSDst(RI->getStackRegister(), stackGrowth);
+ MachineLocation CSSrc(RI->getRARegister());
+ Moves.push_back(MachineMove(0, CSDst, CSSrc));
+}
+
+int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
+ const X86RegisterInfo *RI =
+ static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+ uint64_t StackSize = MFI->getStackSize();
+
+ if (RI->needsStackRealignment(MF)) {
+ if (FI < 0) {
+ // Skip the saved EBP.
+ Offset += RI->getSlotSize();
+ } else {
+ unsigned Align = MFI->getObjectAlignment(FI);
+ assert((-(Offset + StackSize)) % Align == 0);
+ Align = 0;
+ return Offset + StackSize;
+ }
+ // FIXME: Support tail calls
+ } else {
+ if (!hasFP(MF))
+ return Offset + StackSize;
+
+ // Skip the saved EBP.
+ Offset += RI->getSlotSize();
+
+ // Skip the RETADDR move area
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ Offset -= TailCallReturnAddrDelta;
+ }
+
+ return Offset;
+}
+
+bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
+
+ bool isWin64 = STI.isTargetWin64();
+ unsigned SlotSize = STI.is64Bit() ? 8 : 4;
+ unsigned FPReg = TRI->getFrameRegister(MF);
+ unsigned CalleeFrameSize = 0;
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+
+ unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitPrologue will handle spilling of frame register.
+ continue;
+ if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
+ CalleeFrameSize += SlotSize;
+ BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+ RC, TRI);
+ }
+ }
+
+ X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+ return true;
+}
+
+bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ unsigned FPReg = TRI->getFrameRegister(MF);
+ bool isWin64 = STI.isTargetWin64();
+ unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (Reg == FPReg)
+ // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
+ continue;
+ if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
+ BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+ }
+ return true;
+}
+
+void
+X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ unsigned SlotSize = RegInfo->getSlotSize();
+
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
+ if (TailCallReturnAddrDelta < 0) {
+ // create RETURNADDR area
+ // arg
+ // arg
+ // RETADDR
+ // { ...
+ // RETADDR area
+ // ...
+ // }
+ // [EBP]
+ MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+ (-1U*SlotSize)+TailCallReturnAddrDelta, true);
+ }
+
+ if (hasFP(MF)) {
+ assert((TailCallReturnAddrDelta <= 0) &&
+ "The Delta should always be zero or negative");
+ const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+ // Create a frame entry for the EBP register that must be saved.
+ int FrameIdx = MFI->CreateFixedObject(SlotSize,
+ -(int)SlotSize +
+ TFI.getOffsetOfLocalArea() +
+ TailCallReturnAddrDelta,
+ true);
+ assert(FrameIdx == MFI->getObjectIndexBegin() &&
+ "Slot for EBP register must be last in order to be found!");
+ FrameIdx = 0;
+ }
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
new file mode 100644
index 0000000..d71108c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -0,0 +1,65 @@
+//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements X86-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_FRAMELOWERING_H
+#define X86_FRAMELOWERING_H
+
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class MCSymbol;
+ class X86TargetMachine;
+
+class X86FrameLowering : public TargetFrameLowering {
+ const X86TargetMachine &TM;
+ const X86Subtarget &STI;
+public:
+ explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
+ : TargetFrameLowering(StackGrowsDown,
+ sti.getStackAlignment(),
+ (sti.is64Bit() ? -8 : -4)),
+ TM(tm), STI(sti) {
+ }
+
+ void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
+ unsigned FramePtr) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index c523441..9b0ec6e 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -190,20 +190,19 @@ namespace {
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
- bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
- bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
+ bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
- bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectLEAAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
bool SelectScalarSSELoad(SDNode *Root, SDValue N,
@@ -264,12 +263,6 @@ namespace {
return CurDAG->getTargetConstant(Imm, MVT::i8);
}
- /// getI16Imm - Return a target constant with the specified value, of type
- /// i16.
- inline SDValue getI16Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i16);
- }
-
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(unsigned Imm) {
@@ -511,10 +504,11 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// FIXME: optimize the case where the src/dest is a load or store?
SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
N->getOperand(0),
- MemTmp, NULL, 0, MemVT,
+ MemTmp, MachinePointerInfo(), MemVT,
false, false, 0);
- SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp,
- NULL, 0, MemVT, false, false, 0);
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
// extload we created. This will cause general havok on the dag because
@@ -536,9 +530,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
MachineFrameInfo *MFI) {
const TargetInstrInfo *TII = TM.getInstrInfo();
- if (Subtarget->isTargetCygMing())
+ if (Subtarget->isTargetCygMing()) {
+ unsigned CallOp =
+ Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32;
BuildMI(BB, DebugLoc(),
- TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
+ TII->get(CallOp)).addExternalSymbol("__main");
+ }
}
void X86DAGToDAGISel::EmitFunctionEntryCode() {
@@ -549,29 +546,27 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() {
}
-bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
- X86ISelAddressMode &AM) {
- assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
- SDValue Segment = N.getOperand(0);
-
- if (AM.Segment.getNode() == 0) {
- AM.Segment = Segment;
- return false;
- }
-
- return true;
-}
-
-bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
+bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+ SDValue Address = N->getOperand(1);
+
+ // load gs:0 -> GS segment register.
+ // load fs:0 -> FS segment register.
+ //
// This optimization is valid because the GNU TLS model defines that
// gs:0 (or fs:0 on X86-64) contains its own address.
// For more information see http://people.redhat.com/drepper/tls.pdf
-
- SDValue Address = N.getOperand(1);
- if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
- !MatchSegmentBaseAddress (Address, AM))
- return false;
-
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+ if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
+ Subtarget->isTargetELF())
+ switch (N->getPointerInfo().getAddrSpace()) {
+ case 256:
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ return false;
+ case 257:
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ return false;
+ }
+
return true;
}
@@ -690,25 +685,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
return false;
}
-/// isLogicallyAddWithConstant - Return true if this node is semantically an
-/// add of a value with a constantint.
-static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) {
- // Check for (add x, Cst)
- if (V->getOpcode() == ISD::ADD)
- return isa<ConstantSDNode>(V->getOperand(1));
-
- // Check for (or x, Cst), where Cst & x == 0.
- if (V->getOpcode() != ISD::OR ||
- !isa<ConstantSDNode>(V->getOperand(1)))
- return false;
-
- // Handle "X | C" as "X + C" iff X is known to have C bits clear.
- ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1));
-
- // Check to see if the LHS & C is zero.
- return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue());
-}
-
bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
bool is64Bit = Subtarget->is64Bit();
@@ -756,11 +732,6 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
break;
}
- case X86ISD::SegmentBaseAddress:
- if (!MatchSegmentBaseAddress(N, AM))
- return false;
- break;
-
case X86ISD::Wrapper:
case X86ISD::WrapperRIP:
if (!MatchWrapper(N, AM))
@@ -768,7 +739,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
break;
case ISD::LOAD:
- if (!MatchLoad(N, AM))
+ if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
return false;
break;
@@ -799,7 +770,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Okay, we know that we have a scale by now. However, if the scaled
// value is an add of something and a constant, we can fold the
// constant into the disp field here.
- if (isLogicallyAddWithConstant(ShVal, CurDAG)) {
+ if (CurDAG->isBaseWithConstantOffset(ShVal)) {
AM.IndexReg = ShVal.getNode()->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
@@ -943,24 +914,18 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Add an artificial use to this node so that we can keep track of
// it if it gets CSE'd with a different node.
HandleSDNode Handle(N);
- SDValue LHS = Handle.getValue().getNode()->getOperand(0);
- SDValue RHS = Handle.getValue().getNode()->getOperand(1);
X86ISelAddressMode Backup = AM;
- if (!MatchAddressRecursively(LHS, AM, Depth+1) &&
- !MatchAddressRecursively(RHS, AM, Depth+1))
+ if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+ !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
return false;
AM = Backup;
- LHS = Handle.getValue().getNode()->getOperand(0);
- RHS = Handle.getValue().getNode()->getOperand(1);
-
+
// Try again after commuting the operands.
- if (!MatchAddressRecursively(RHS, AM, Depth+1) &&
- !MatchAddressRecursively(LHS, AM, Depth+1))
+ if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
+ !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
return false;
AM = Backup;
- LHS = Handle.getValue().getNode()->getOperand(0);
- RHS = Handle.getValue().getNode()->getOperand(1);
// If we couldn't fold both operands into the address at the same time,
// see if we can just put each operand into a register and fold at least
@@ -968,17 +933,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
if (AM.BaseType == X86ISelAddressMode::RegBase &&
!AM.Base_Reg.getNode() &&
!AM.IndexReg.getNode()) {
- AM.Base_Reg = LHS;
- AM.IndexReg = RHS;
+ N = Handle.getValue();
+ AM.Base_Reg = N.getOperand(0);
+ AM.IndexReg = N.getOperand(1);
AM.Scale = 1;
return false;
}
+ N = Handle.getValue();
break;
}
case ISD::OR:
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
- if (isLogicallyAddWithConstant(N, CurDAG)) {
+ if (CurDAG->isBaseWithConstantOffset(N)) {
X86ISelAddressMode Backup = AM;
ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
uint64_t Offset = CN->getSExtValue();
@@ -1148,10 +1115,30 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
/// SelectAddr - returns true if it is able pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
-bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
+///
+/// Parent is the parent node of the addr operand that is being matched. It
+/// is always a load, store, atomic node, or null. It is only null when
+/// checking memory operands for inline asm nodes.
+bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
+
+ if (Parent &&
+ // This list of opcodes are all the nodes that have an "addr:$ptr" operand
+ // that are not a MemSDNode, and thus don't have proper addrspace info.
+ Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
+ Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
+ Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+ unsigned AddrSpace =
+ cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
+ // AddrSpace 256 -> GS, 257 -> FS.
+ if (AddrSpace == 256)
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ if (AddrSpace == 257)
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ }
+
if (MatchAddress(N, AM))
return false;
@@ -1187,7 +1174,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
- if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment))
+ if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
return true;
}
@@ -1205,7 +1192,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
// Okay, this is a zero extending load. Fold it.
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
- if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
PatternNodeWithChain = SDValue(LD, 0);
return true;
@@ -1216,7 +1203,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
-bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
+bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
@@ -1278,7 +1265,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
}
/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
-bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
@@ -1311,7 +1298,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
!IsLegalToFold(N, P, P, OptLevel))
return false;
- return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
+ return SelectAddr(N.getNode(),
+ N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
/// getGlobalBaseReg - Return an SDNode that returns the value of
@@ -1329,7 +1317,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
SDValue In2L = Node->getOperand(2);
SDValue In2H = Node->getOperand(3);
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return NULL;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
@@ -1355,7 +1343,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
SDValue Ptr = Node->getOperand(1);
SDValue Val = Node->getOperand(2);
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return 0;
bool isInc = false, isDec = false, isSub = false, isCN = false;
@@ -1592,7 +1580,32 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return RetVal;
break;
}
-
+ case X86ISD::UMUL: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ unsigned LoReg;
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break;
+ case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
+ case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
+ case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+ }
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ N0, SDValue()).getValue(1);
+
+ SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+ SDValue Ops[] = {N1, InFlag};
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+
+ ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+ ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
+ return NULL;
+ }
+
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDValue N0 = Node->getOperand(0);
@@ -1642,14 +1655,15 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
SDNode *CNode =
- CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
array_lengthof(Ops));
InFlag = SDValue(CNode, 1);
+
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
} else {
- InFlag =
- SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
+ InFlag = SDValue(CNode, 0);
}
// Prevent use of AH in a REX instruction by referencing AX instead.
@@ -1688,7 +1702,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-
+
return NULL;
}
@@ -1773,7 +1787,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (isSigned && !signBitIsZero) {
// Sign extend the low part into the high part.
InFlag =
- SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
+ SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
} else {
// Zero out the high part, effectively zero extending the input.
SDValue ClrNode =
@@ -1787,14 +1801,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
SDNode *CNode =
- CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
array_lengthof(Ops));
InFlag = SDValue(CNode, 1);
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
} else {
InFlag =
- SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
}
// Prevent use of AH in a REX instruction by referencing AX instead.
@@ -1971,7 +1985,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
case 'v': // not offsetable ??
default: return true;
case 'm': // memory
- if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4))
+ if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
return true;
break;
}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index a6db979..27024b4 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -16,9 +16,9 @@
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
-#include "X86ShuffleDecode.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
+#include "Utils/X86ShuffleDecode.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -28,6 +28,7 @@
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -56,39 +57,172 @@ using namespace dwarf;
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
-DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
+Disable256Bit("disable-256bit", cl::Hidden,
+ cl::desc("Disable use of 256-bit vectors"));
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2);
+static SDValue Insert128BitVector(SDValue Result,
+ SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl);
+
+static SDValue Extract128BitVector(SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl);
+
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG);
+
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
+/// sets things up to match to an AVX VEXTRACTF128 instruction or a
+/// simple subregister reference. Idx is an index in the 128 bits we
+/// want. It need not be aligned to a 128-bit bounday. That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ EVT VT = Vec.getValueType();
+ assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+
+ EVT ElVT = VT.getVectorElementType();
+
+ int Factor = VT.getSizeInBits() / 128;
+
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(),
+ ElVT,
+ VT.getVectorNumElements() / Factor);
+
+ // Extract from UNDEF is UNDEF.
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ISD::UNDEF, dl, ResultVT);
+
+ if (isa<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR
+ // we can match to VEXTRACTF128.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+ SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
+ VecIdx);
+
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits. This
+/// sets things up to match to an AVX VINSERTF128 instruction or a
+/// simple superregister reference. Idx is an index in the 128 bits
+/// we want. It need not be aligned to a 128-bit bounday. That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result,
+ SDValue Vec,
+ SDValue Idx,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ if (isa<ConstantSDNode>(Idx)) {
+ EVT VT = Vec.getValueType();
+ assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+
+ EVT ElVT = VT.getVectorElementType();
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ EVT ResultVT = Result.getValueType();
+
+ // Insert the relevant 128 bits.
+ unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the 128-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+ Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+ VecIdx);
+ return Result;
+ }
+
+ return SDValue();
+}
+
+/// Given two vectors, concat them.
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) {
+ DebugLoc dl = Lower.getDebugLoc();
+
+ assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!");
+
+ EVT VT = EVT::getVectorVT(*DAG.getContext(),
+ Lower.getValueType().getVectorElementType(),
+ Lower.getValueType().getVectorNumElements() * 2);
+
+ // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors).
+ assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!");
+
+ // Insert the upper subvector.
+ SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper,
+ DAG.getConstant(
+ // This is half the length of the result
+ // vector. Start inserting the upper 128
+ // bits here.
+ Lower.getValueType().getVectorNumElements(),
+ MVT::i32),
+ DAG, dl);
+
+ // Insert the lower subvector.
+ Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl);
+ return Vec;
+}
+
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
-
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
-
- if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) {
- if (is64Bit) return new X8664_MachoTargetObjectFile();
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ bool is64Bit = Subtarget->is64Bit();
+
+ if (Subtarget->isTargetEnvMacho()) {
+ if (is64Bit)
+ return new X8664_MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
- } else if (TM.getSubtarget<X86Subtarget>().isTargetELF() ){
- if (is64Bit) return new X8664_ELFTargetObjectFile(TM);
+ }
+
+ if (Subtarget->isTargetELF()) {
+ if (is64Bit)
+ return new X8664_ELFTargetObjectFile(TM);
return new X8632_ELFTargetObjectFile(TM);
- } else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) {
+ }
+ if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
return new TargetLoweringObjectFileCOFF();
- }
llvm_unreachable("unknown subtarget type");
}
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
- X86ScalarSSEf64 = Subtarget->hasSSE2();
- X86ScalarSSEf32 = Subtarget->hasSSE1();
+ X86ScalarSSEf64 = Subtarget->hasXMMInt();
+ X86ScalarSSEf32 = Subtarget->hasXMM();
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
RegInfo = TM.getRegisterInfo();
TD = getTargetData();
// Set up the TargetLowering object.
+ static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setShiftAmountType(MVT::i8);
@@ -96,6 +230,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
+ if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
+ // Setup Windows compiler runtime calls.
+ setLibcallName(RTLIB::SDIV_I64, "_alldiv");
+ setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
+ }
+
if (Subtarget->isTargetDarwin()) {
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(false);
@@ -213,16 +359,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
- if (!X86ScalarSSEf64) {
- setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
- setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
+ if (!X86ScalarSSEf64) {
+ setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
+ setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand);
- // Without SSE, i64->f64 goes through memory; i64->MMX is Legal.
- if (Subtarget->hasMMX() && !DisableMMX)
- setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom);
- else
- setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand);
+ setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
+ // Without SSE, i64->f64 goes through memory.
+ setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
}
@@ -236,30 +379,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
- setOperationAction(ISD::MULHS , MVT::i8 , Expand);
- setOperationAction(ISD::MULHU , MVT::i8 , Expand);
- setOperationAction(ISD::SDIV , MVT::i8 , Expand);
- setOperationAction(ISD::UDIV , MVT::i8 , Expand);
- setOperationAction(ISD::SREM , MVT::i8 , Expand);
- setOperationAction(ISD::UREM , MVT::i8 , Expand);
- setOperationAction(ISD::MULHS , MVT::i16 , Expand);
- setOperationAction(ISD::MULHU , MVT::i16 , Expand);
- setOperationAction(ISD::SDIV , MVT::i16 , Expand);
- setOperationAction(ISD::UDIV , MVT::i16 , Expand);
- setOperationAction(ISD::SREM , MVT::i16 , Expand);
- setOperationAction(ISD::UREM , MVT::i16 , Expand);
- setOperationAction(ISD::MULHS , MVT::i32 , Expand);
- setOperationAction(ISD::MULHU , MVT::i32 , Expand);
- setOperationAction(ISD::SDIV , MVT::i32 , Expand);
- setOperationAction(ISD::UDIV , MVT::i32 , Expand);
- setOperationAction(ISD::SREM , MVT::i32 , Expand);
- setOperationAction(ISD::UREM , MVT::i32 , Expand);
- setOperationAction(ISD::MULHS , MVT::i64 , Expand);
- setOperationAction(ISD::MULHU , MVT::i64 , Expand);
- setOperationAction(ISD::SDIV , MVT::i64 , Expand);
- setOperationAction(ISD::UDIV , MVT::i64 , Expand);
- setOperationAction(ISD::SREM , MVT::i64 , Expand);
- setOperationAction(ISD::UREM , MVT::i64 , Expand);
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+
+ // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
+ setOperationAction(ISD::ADDC, VT, Custom);
+ setOperationAction(ISD::ADDE, VT, Custom);
+ setOperationAction(ISD::SUBC, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Custom);
+ }
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
@@ -276,21 +410,27 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
}
+ if (Subtarget->hasPOPCNT()) {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ }
+
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
@@ -298,7 +438,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SELECT , MVT::i1 , Promote);
// X86 wants to expand cmov itself.
setOperationAction(ISD::SELECT , MVT::i8 , Custom);
- setOperationAction(ISD::SELECT , MVT::i16 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i16 , Custom);
setOperationAction(ISD::SELECT , MVT::i32 , Custom);
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
@@ -341,12 +481,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
}
- if (Subtarget->hasSSE1())
+ if (Subtarget->hasXMM())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
// We may not have a libcall for MEMBARRIER so we should lower this.
setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
-
+
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
// operations are flushed before issuing the locked instruction, and they
@@ -355,15 +495,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setShouldFoldAtomicFences(true);
// Expand certain atomics
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
-
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ for (unsigned i = 0, e = 4; i != e; ++i) {
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ }
if (!Subtarget->is64Bit()) {
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
@@ -415,7 +551,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
- if (Subtarget->isTargetCygMing())
+ if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
@@ -512,13 +648,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
{
- bool ignored;
- APFloat TmpFlt(+0.0);
- TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
- &ignored);
+ APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
addLegalFPImmediate(TmpFlt); // FLD0
TmpFlt.changeSign();
addLegalFPImmediate(TmpFlt); // FLD0/FCHS
+
+ bool ignored;
APFloat TmpFlt2(+1.0);
TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
&ignored);
@@ -564,8 +699,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
@@ -613,91 +749,44 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
- if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) {
- addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false);
- addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false);
- addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false);
-
- addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false);
-
- setOperationAction(ISD::ADD, MVT::v8i8, Legal);
- setOperationAction(ISD::ADD, MVT::v4i16, Legal);
- setOperationAction(ISD::ADD, MVT::v2i32, Legal);
- setOperationAction(ISD::ADD, MVT::v1i64, Legal);
-
- setOperationAction(ISD::SUB, MVT::v8i8, Legal);
- setOperationAction(ISD::SUB, MVT::v4i16, Legal);
- setOperationAction(ISD::SUB, MVT::v2i32, Legal);
- setOperationAction(ISD::SUB, MVT::v1i64, Legal);
-
- setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
- setOperationAction(ISD::MUL, MVT::v4i16, Legal);
-
- setOperationAction(ISD::AND, MVT::v8i8, Promote);
- AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64);
- setOperationAction(ISD::AND, MVT::v4i16, Promote);
- AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64);
- setOperationAction(ISD::AND, MVT::v2i32, Promote);
- AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::AND, MVT::v1i64, Legal);
-
- setOperationAction(ISD::OR, MVT::v8i8, Promote);
- AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64);
- setOperationAction(ISD::OR, MVT::v4i16, Promote);
- AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64);
- setOperationAction(ISD::OR, MVT::v2i32, Promote);
- AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::OR, MVT::v1i64, Legal);
-
- setOperationAction(ISD::XOR, MVT::v8i8, Promote);
- AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64);
- setOperationAction(ISD::XOR, MVT::v4i16, Promote);
- AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64);
- setOperationAction(ISD::XOR, MVT::v2i32, Promote);
- AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::XOR, MVT::v1i64, Legal);
-
- setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
- AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
- setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
- AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
- setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
- AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
-
- setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
-
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
-
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
-
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
-
- setOperationAction(ISD::SELECT, MVT::v8i8, Promote);
- setOperationAction(ISD::SELECT, MVT::v4i16, Promote);
- setOperationAction(ISD::SELECT, MVT::v2i32, Promote);
- setOperationAction(ISD::SELECT, MVT::v1i64, Custom);
- setOperationAction(ISD::VSETCC, MVT::v8i8, Custom);
- setOperationAction(ISD::VSETCC, MVT::v4i16, Custom);
- setOperationAction(ISD::VSETCC, MVT::v2i32, Custom);
-
- if (!X86ScalarSSEf64 && Subtarget->is64Bit()) {
- setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom);
- }
- }
-
- if (!UseSoftFloat && Subtarget->hasSSE1()) {
+ if (!UseSoftFloat && Subtarget->hasMMX()) {
+ addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
+ // No operations on x86mmx supported, everything uses intrinsics.
+ }
+
+ // MMX-sized vectors (other than x86mmx) are expected to be expanded
+ // into smaller operations.
+ setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
+ setOperationAction(ISD::AND, MVT::v8i8, Expand);
+ setOperationAction(ISD::AND, MVT::v4i16, Expand);
+ setOperationAction(ISD::AND, MVT::v2i32, Expand);
+ setOperationAction(ISD::AND, MVT::v1i64, Expand);
+ setOperationAction(ISD::OR, MVT::v8i8, Expand);
+ setOperationAction(ISD::OR, MVT::v4i16, Expand);
+ setOperationAction(ISD::OR, MVT::v2i32, Expand);
+ setOperationAction(ISD::OR, MVT::v1i64, Expand);
+ setOperationAction(ISD::XOR, MVT::v8i8, Expand);
+ setOperationAction(ISD::XOR, MVT::v4i16, Expand);
+ setOperationAction(ISD::XOR, MVT::v2i32, Expand);
+ setOperationAction(ISD::XOR, MVT::v1i64, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
+ setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
+
+ if (!UseSoftFloat && Subtarget->hasXMM()) {
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
@@ -714,7 +803,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
}
- if (!UseSoftFloat && Subtarget->hasSSE2()) {
+ if (!UseSoftFloat && Subtarget->hasXMMInt()) {
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
@@ -795,7 +884,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Do not attempt to promote non-128-bit vectors
if (!VT.is128BitVector())
continue;
-
+
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
setOperationAction(ISD::OR, SVT, Promote);
@@ -818,10 +907,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
- if (!DisableMMX && Subtarget->hasMMX()) {
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
- }
}
if (Subtarget->hasSSE41()) {
@@ -863,9 +948,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
}
- if (Subtarget->hasSSE42()) {
+ if (Subtarget->hasSSE42())
setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
- }
if (!UseSoftFloat && Subtarget->hasAVX()) {
addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
@@ -878,27 +962,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::LOAD, MVT::v8i32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
+
setOperationAction(ISD::FADD, MVT::v8f32, Legal);
setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom);
- //setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom);
- //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom);
- //setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
- //setOperationAction(ISD::VSETCC, MVT::v8f32, Custom);
-
- // Operations to consider commented out -v16i16 v32i8
- //setOperationAction(ISD::ADD, MVT::v16i16, Legal);
- setOperationAction(ISD::ADD, MVT::v8i32, Custom);
- setOperationAction(ISD::ADD, MVT::v4i64, Custom);
- //setOperationAction(ISD::SUB, MVT::v32i8, Legal);
- //setOperationAction(ISD::SUB, MVT::v16i16, Legal);
- setOperationAction(ISD::SUB, MVT::v8i32, Custom);
- setOperationAction(ISD::SUB, MVT::v4i64, Custom);
- //setOperationAction(ISD::MUL, MVT::v16i16, Legal);
+
setOperationAction(ISD::FADD, MVT::v4f64, Legal);
setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
@@ -906,85 +977,66 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
- setOperationAction(ISD::VSETCC, MVT::v4f64, Custom);
- // setOperationAction(ISD::VSETCC, MVT::v32i8, Custom);
- // setOperationAction(ISD::VSETCC, MVT::v16i16, Custom);
- setOperationAction(ISD::VSETCC, MVT::v8i32, Custom);
-
- // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i8, Custom);
- // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i16, Custom);
- // setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i32, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f32, Custom);
-
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i64, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f64, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i64, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom);
-
-#if 0
- // Not sure we want to do this since there are no 256-bit integer
- // operations in AVX
-
- // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
- // This includes 256-bit vectors
- for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) {
- EVT VT = (MVT::SimpleValueType)i;
-
- // Do not attempt to custom lower non-power-of-2 vectors
- if (!isPowerOf2_32(VT.getVectorNumElements()))
+ // Custom lower build_vector, vector_shuffle, scalar_to_vector,
+ // insert_vector_elt extract_subvector and extract_vector_elt for
+ // 256-bit types.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+ // Do not attempt to custom lower non-256-bit vectors
+ if (!isPowerOf2_32(MVT(VT).getVectorNumElements())
+ || (MVT(VT).getSizeInBits() < 256))
continue;
-
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- }
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ }
+ // Custom-lower insert_subvector and extract_subvector based on
+ // the result type.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+ // Do not attempt to custom lower non-256-bit vectors
+ if (!isPowerOf2_32(MVT(VT).getVectorNumElements()))
+ continue;
- if (Subtarget->is64Bit()) {
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom);
+ if (MVT(VT).getSizeInBits() == 128) {
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ }
+ else if (MVT(VT).getSizeInBits() == 256) {
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ }
}
-#endif
-#if 0
- // Not sure we want to do this since there are no 256-bit integer
- // operations in AVX
-
- // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64.
- // Including 256-bit vectors
- for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) {
- EVT VT = (MVT::SimpleValueType)i;
+ // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
+ // Don't promote loads because we need them for VPERM vector index versions.
- if (!VT.is256BitVector()) {
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ VT++) {
+ if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements())
+ || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256))
continue;
- }
- setOperationAction(ISD::AND, VT, Promote);
- AddPromotedToType (ISD::AND, VT, MVT::v4i64);
- setOperationAction(ISD::OR, VT, Promote);
- AddPromotedToType (ISD::OR, VT, MVT::v4i64);
- setOperationAction(ISD::XOR, VT, Promote);
- AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
- setOperationAction(ISD::LOAD, VT, Promote);
- AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
- setOperationAction(ISD::SELECT, VT, Promote);
- AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
+ setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v4i64);
+ setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v4i64);
+ setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v4i64);
+ //setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote);
+ //AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64);
}
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-#endif
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- // Add/Sub/Mul with overflow operations are custom lowered.
- setOperationAction(ISD::SADDO, MVT::i32, Custom);
- setOperationAction(ISD::UADDO, MVT::i32, Custom);
- setOperationAction(ISD::SSUBO, MVT::i32, Custom);
- setOperationAction(ISD::USUBO, MVT::i32, Custom);
- setOperationAction(ISD::SMULO, MVT::i32, Custom);
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
@@ -992,14 +1044,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
- if (Subtarget->is64Bit()) {
- setOperationAction(ISD::SADDO, MVT::i64, Custom);
- setOperationAction(ISD::UADDO, MVT::i64, Custom);
- setOperationAction(ISD::SSUBO, MVT::i64, Custom);
- setOperationAction(ISD::USUBO, MVT::i64, Custom);
- setOperationAction(ISD::SMULO, MVT::i64, Custom);
+ for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+ // Add/Sub/Mul with overflow operations are custom lowered.
+ MVT VT = IntVTs[i];
+ setOperationAction(ISD::SADDO, VT, Custom);
+ setOperationAction(ISD::UADDO, VT, Custom);
+ setOperationAction(ISD::SSUBO, VT, Custom);
+ setOperationAction(ISD::USUBO, VT, Custom);
+ setOperationAction(ISD::SMULO, VT, Custom);
+ setOperationAction(ISD::UMULO, VT, Custom);
}
+ // There are no 8-bit 3-address imul/mul instructions
+ setOperationAction(ISD::SMULO, MVT::i8, Expand);
+ setOperationAction(ISD::UMULO, MVT::i8, Expand);
+
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, 0);
@@ -1016,6 +1075,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
if (Subtarget->is64Bit())
@@ -1023,11 +1085,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
computeRegisterProperties();
- // FIXME: These should be based on subtarget info. Plus, the values should
- // be smaller when we are in optimizing for size mode.
+ // On Darwin, -Os means optimize for size without hurting performance,
+ // do not reduce the limit.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
- maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
+ maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+ maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(16);
benefitFromCodePlacementOpt = true;
}
@@ -1078,7 +1143,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
}
unsigned Align = 4;
- if (Subtarget->hasSSE1())
+ if (Subtarget->hasXMM())
getMaxByValAlign(Ty, Align);
return Align;
}
@@ -1119,7 +1184,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
Subtarget->getStackAlignment() >= 8 &&
- Subtarget->hasSSE2()) {
+ Subtarget->hasXMMInt()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
return MVT::f64;
@@ -1139,21 +1204,11 @@ unsigned X86TargetLowering::getJumpTableEncoding() const {
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT())
return MachineJumpTableInfo::EK_Custom32;
-
+
// Otherwise, use the normal jump table encoding heuristics.
return TargetLowering::getJumpTableEncoding();
}
-/// getPICBaseSymbol - Return the X86-32 PIC base.
-MCSymbol *
-X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF,
- MCContext &Ctx) const {
- const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo();
- return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
- Twine(MF->getFunctionNumber())+"$pb");
-}
-
-
const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
@@ -1188,7 +1243,7 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
// Otherwise, the reference is relative to the PIC base.
- return MCSymbolRefExpr::Create(getPICBaseSymbol(MF, Ctx), Ctx);
+ return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
}
/// getFunctionAlignment - Return the Log2 alignment of this function.
@@ -1196,6 +1251,7 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
}
+// FIXME: Why this routine is here? Move to RegInfo!
std::pair<const TargetRegisterClass*, uint8_t>
X86TargetLowering::findRepresentativeClass(EVT VT) const{
const TargetRegisterClass *RRC = 0;
@@ -1207,8 +1263,7 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{
RRC = (Subtarget->is64Bit()
? X86::GR64RegisterClass : X86::GR32RegisterClass);
break;
- case MVT::v8i8: case MVT::v4i16:
- case MVT::v2i32: case MVT::v1i64:
+ case MVT::x86mmx:
RRC = X86::VR64RegisterClass;
break;
case MVT::f32: case MVT::f64:
@@ -1222,10 +1277,13 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{
return std::make_pair(RRC, Cost);
}
+// FIXME: Why this routine is here? Move to RegInfo!
unsigned
X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
default:
return 0;
@@ -1267,7 +1325,7 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
#include "X86GenCallingConv.inc"
-bool
+bool
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
@@ -1312,16 +1370,18 @@ X86TargetLowering::LowerReturn(SDValue Chain,
SDValue ValToCopy = OutVals[i];
EVT ValVT = ValToCopy.getValueType();
- // If this is x86-64, and we disabled SSE, we can't return FP values
- if ((ValVT == MVT::f32 || ValVT == MVT::f64) &&
- (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
+ // If this is x86-64, and we disabled SSE, we can't return FP values,
+ // or SSE or MMX vectors.
+ if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
+ VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
+ (Subtarget->is64Bit() && !Subtarget->hasXMM())) {
report_fatal_error("SSE register return with SSE disabled");
}
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
// llvm-gcc has never done it right and no one has noticed, so this
// should be OK for now.
if (ValVT == MVT::f64 &&
- (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
+ (Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
report_fatal_error("SSE2 register return with SSE2 disabled");
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
@@ -1340,20 +1400,19 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
if (Subtarget->is64Bit()) {
- if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
- ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
+ if (ValVT == MVT::x86mmx) {
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
ValToCopy);
-
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
if (!Subtarget->hasSSE2())
- ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy);
+ ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
}
}
}
-
+
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
}
@@ -1367,7 +1426,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
- assert(Reg &&
+ assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
@@ -1388,6 +1447,28 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MVT::Other, &RetOps[0], RetOps.size());
}
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg &&
+ Copy->getOpcode() != ISD::FP_EXTEND)
+ return false;
+
+ bool HasRet = false;
+ for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() != X86ISD::RET_FLAG)
+ return false;
+ HasRet = true;
+ }
+
+ return HasRet;
+}
+
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
@@ -1412,7 +1493,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
- ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
+ ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
report_fatal_error("SSE register return with SSE disabled");
}
@@ -1433,7 +1514,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64;
if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80;
SDValue Ops[] = { Chain, InFlag };
- Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag,
+ Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Glue,
Ops, 2), 1);
Val = Chain.getValue(0);
@@ -1456,7 +1537,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
MVT::i64, InFlag).getValue(1);
Val = Chain.getValue(0);
}
- Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val);
+ Val = DAG.getNode(ISD::BITCAST, dl, CopyVT, Val);
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
CopyVT, InFlag).getValue(1);
@@ -1499,30 +1580,6 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
return Ins[0].Flags.isSRet();
}
-/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
-/// given CallingConvention value.
-CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
- if (Subtarget->is64Bit()) {
- if (CC == CallingConv::GHC)
- return CC_X86_64_GHC;
- else if (Subtarget->isTargetWin64())
- return CC_X86_Win64_C;
- else
- return CC_X86_64_C;
- }
-
- if (CC == CallingConv::X86_FastCall)
- return CC_X86_32_FastCall;
- else if (CC == CallingConv::X86_ThisCall)
- return CC_X86_32_ThisCall;
- else if (CC == CallingConv::Fast)
- return CC_X86_32_FastCC;
- else if (CC == CallingConv::GHC)
- return CC_X86_32_GHC;
- else
- return CC_X86_32_C;
-}
-
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
@@ -1531,10 +1588,11 @@ static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
DebugLoc dl) {
- SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile*/false, /*AlwaysInline=*/true,
- NULL, 0, NULL, 0);
+ MachinePointerInfo(), MachinePointerInfo());
}
/// IsTailCallConvention - Return true if the calling convention is one that
@@ -1583,7 +1641,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
VA.getLocMemOffset(), isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0);
}
}
@@ -1617,7 +1675,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
+
+ // Allocate shadow area for Win64
+ if (IsWin64) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
unsigned LastVal = ~0U;
SDValue ArgValue;
@@ -1644,12 +1708,12 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
RC = X86::VR256RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
RC = X86::VR128RegisterClass;
- else if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
+ else if (RegVT == MVT::x86mmx)
RC = X86::VR64RegisterClass;
else
llvm_unreachable("Unknown argument type!");
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
// If this is an 8 or 16-bit value, it is really passed promoted to 32
@@ -1662,14 +1726,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::BCvt)
- ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+ ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
if (VA.isExtInLoc()) {
// Handle MMX values passed in XMM regs.
if (RegVT.isVector()) {
- ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
- ArgValue, DAG.getConstant(0, MVT::i64));
- ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+ ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
+ ArgValue);
} else
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
}
@@ -1680,8 +1743,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// If value is passed via pointer - do a load.
if (VA.getLocInfo() == CCValAssign::Indirect)
- ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0,
- false, false, 0);
+ ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
+ MachinePointerInfo(), false, false, 0);
InVals.push_back(ArgValue);
}
@@ -1708,8 +1771,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
- CallConv != CallingConv::X86_ThisCall)) {
+ if (!IsWin64 && (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall))) {
FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
}
if (Is64Bit) {
@@ -1719,9 +1782,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
static const unsigned GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
- static const unsigned XMMArgRegsWin64[] = {
- X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
- };
static const unsigned GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
@@ -1729,40 +1789,52 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
- const unsigned *GPR64ArgRegs, *XMMArgRegs;
+ const unsigned *GPR64ArgRegs;
+ unsigned NumXMMRegs = 0;
if (IsWin64) {
- TotalNumIntRegs = 4; TotalNumXMMRegs = 4;
+ // The XMM registers which might contain var arg parameters are shadowed
+ // in their paired GPR. So we only need to save the GPR to their home
+ // slots.
+ TotalNumIntRegs = 4;
GPR64ArgRegs = GPR64ArgRegsWin64;
- XMMArgRegs = XMMArgRegsWin64;
} else {
TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
GPR64ArgRegs = GPR64ArgRegs64Bit;
- XMMArgRegs = XMMArgRegs64Bit;
+
+ NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs);
}
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
- unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
- TotalNumXMMRegs);
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
- assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
+ assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
- if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
+ if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
// Kernel mode asks for SSE to be disabled, so don't push them
// on the stack.
TotalNumXMMRegs = 0;
- // For X86-64, if there are vararg parameters that are passed via
- // registers, then we must store them to their spots on the stack so they
- // may be loaded by deferencing the result of va_next.
- FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
- FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
- FuncInfo->setRegSaveFrameIndex(
- MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
+ if (IsWin64) {
+ const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+ // Get to the caller-allocated home save location. Add 8 to account
+ // for the return address.
+ int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+ FuncInfo->setRegSaveFrameIndex(
+ MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+ FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+ } else {
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so they
+ // may be loaded by deferencing the result of va_next.
+ FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+ FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
+ FuncInfo->setRegSaveFrameIndex(
+ MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
false));
+ }
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
@@ -1773,13 +1845,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(Offset));
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
- X86::GR64RegisterClass);
+ X86::GR64RegisterClass, dl);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(
- FuncInfo->getRegSaveFrameIndex()),
- Offset, false, false, 0);
+ MachinePointerInfo::getFixedStack(
+ FuncInfo->getRegSaveFrameIndex(), Offset),
+ false, false, 0);
MemOps.push_back(Store);
Offset += 8;
}
@@ -1789,7 +1861,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<SDValue, 11> SaveXMMOps;
SaveXMMOps.push_back(Chain);
- unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
+ unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass, dl);
SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
SaveXMMOps.push_back(ALVal);
@@ -1799,8 +1871,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->getVarArgsFPOffset()));
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
- unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
- X86::VR128RegisterClass);
+ unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
+ X86::VR128RegisterClass, dl);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
SaveXMMOps.push_back(Val);
}
@@ -1843,15 +1915,14 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
- const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
- unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
+ unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
- if (Flags.isByVal()) {
+ if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
- }
+
return DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), LocMemOffset,
+ MachinePointerInfo::getStack(LocMemOffset),
false, false, 0);
}
@@ -1867,7 +1938,8 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
- OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0);
+ OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
+ false, false, 0);
return SDValue(OutRetAddr.getNode(), 1);
}
@@ -1886,7 +1958,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
- PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0,
+ MachinePointerInfo::getFixedStack(NewReturnAddrFI),
false, false, 0);
return Chain;
}
@@ -1902,6 +1974,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isTargetWin64();
bool IsStructRet = CallIsStructReturn(Outs);
bool IsSibcall = false;
@@ -1927,7 +2000,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+
+ // Allocate shadow area for Win64
+ if (IsWin64) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -1986,21 +2065,21 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case CCValAssign::AExt:
if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
// Special case: passing MMX values in XMM registers.
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
} else
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg);
+ Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
break;
case CCValAssign::Indirect: {
// Store the argument.
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0);
Arg = SpillSlot;
break;
@@ -2009,7 +2088,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- if (isVarArg && Subtarget->isTargetWin64()) {
+ if (isVarArg && IsWin64) {
// Win64 ABI requires argument XMM reg to be copied to the corresponding
// shadow reg if callee is a varargs function.
unsigned ShadowReg = 0;
@@ -2075,7 +2154,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
}
- if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) {
+ if (Is64Bit && isVarArg && !IsWin64) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
@@ -2090,7 +2169,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
- assert((Subtarget->hasSSE1() || !NumXMMRegs)
+ assert((Subtarget->hasXMM() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
@@ -2143,7 +2222,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Store relative to framepointer.
MemOpChains2.push_back(
DAG.getStore(ArgChain, dl, Arg, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
@@ -2192,8 +2271,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- (GV->isDeclaration() || GV->isWeakForLinker()) &&
- Subtarget->getDarwinVers() < 9) {
+ (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ Subtarget->getDarwinVers() < 9) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -2206,13 +2285,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
- // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
- // symbols should go through the PLT.
+ // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
+ // external symbols should go through the PLT.
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- Subtarget->getDarwinVers() < 9) {
+ Subtarget->getDarwinVers() < 9) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -2224,7 +2303,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
// Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
if (!IsSibcall && isTailCall) {
@@ -2250,7 +2329,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
// Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
- if (Is64Bit && isVarArg && !Subtarget->isTargetWin64())
+ if (Is64Bit && isVarArg && !IsWin64)
Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
if (InFlag.getNode())
@@ -2337,7 +2416,7 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
- const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
@@ -2364,7 +2443,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
- if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
+ if (!TargetRegisterInfo::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
@@ -2510,14 +2589,17 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+
+ // Allocate shadow area for Win64
+ if (Subtarget->isTargetWin64()) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
if (CCInfo.getNextStackOffset()) {
MachineFunction &MF = DAG.getMachineFunction();
if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
return false;
- if (Subtarget->isTargetWin64())
- // Win64 ABI has additional complications.
- return false;
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
@@ -2564,6 +2646,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
+ // An stdcall caller is expected to clean up its arguments; the callee
+ // isn't going to do that.
+ if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+ return false;
+
return true;
}
@@ -2592,6 +2679,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFPD:
+ case X86ISD::PALIGN:
case X86ISD::SHUFPS:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
@@ -2600,6 +2688,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVLPD:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
+ case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
@@ -2625,6 +2714,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
+ case X86ISD::MOVDDUP:
return DAG.getNode(Opc, dl, VT, V1);
}
@@ -2648,6 +2738,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::PALIGN:
case X86ISD::SHUFPD:
case X86ISD::SHUFPS:
return DAG.getNode(Opc, dl, VT, V1, V2,
@@ -2770,8 +2861,8 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
// First determine if it is required or is profitable to flip the operands.
// If LHS is a foldable load, but RHS is not, flip the condition.
- if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) &&
- !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) {
+ if (ISD::isNON_EXTLoad(LHS.getNode()) &&
+ !ISD::isNON_EXTLoad(RHS.getNode())) {
SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
std::swap(LHS, RHS);
}
@@ -2865,7 +2956,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
/// the second operand.
static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
- if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16)
+ if (VT == MVT::v4f32 || VT == MVT::v4i32 )
return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
if (VT == MVT::v2f64 || VT == MVT::v2i64)
return (Mask[0] < 2 && Mask[1] < 2);
@@ -2933,15 +3024,15 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool hasSSSE3) {
int i, e = VT.getVectorNumElements();
-
+
// Do not handle v2i64 / v2f64 shuffles with palignr.
if (e < 4 || !hasSSSE3)
return false;
-
+
for (i = 0; i != e; ++i)
if (Mask[i] >= 0)
break;
-
+
// All undef, not a palignr.
if (i == e)
return false;
@@ -2952,13 +3043,13 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool NeedsUnary = false;
int s = Mask[i] - i;
-
+
// Check the rest of the elements to see if they are consecutive.
for (++i; i != e; ++i) {
int m = Mask[i];
- if (m < 0)
+ if (m < 0)
continue;
-
+
Unary = Unary && (m < (int)e);
NeedsUnary = NeedsUnary || (m < s);
@@ -3046,10 +3137,10 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
/// <2, 3, 2, 3>
bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
unsigned NumElems = N->getValueType(0).getVectorNumElements();
-
+
if (NumElems != 4)
return false;
-
+
return isUndefOrEqual(N->getMaskElt(0), 2) &&
isUndefOrEqual(N->getMaskElt(1), 3) &&
isUndefOrEqual(N->getMaskElt(2), 2) &&
@@ -3320,6 +3411,44 @@ bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
return true;
}
+/// isVEXTRACTF128Index - Return true if the specified
+/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+/// suitable for input to VEXTRACTF128.
+bool X86::isVEXTRACTF128Index(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+ return false;
+
+ // The index should be aligned on a 128-bit boundary.
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+ unsigned VL = N->getValueType(0).getVectorNumElements();
+ unsigned VBits = N->getValueType(0).getSizeInBits();
+ unsigned ElSize = VBits / VL;
+ bool Result = (Index * ElSize) % 128 == 0;
+
+ return Result;
+}
+
+/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR
+/// operand specifies a subvector insert that is suitable for input to
+/// VINSERTF128.
+bool X86::isVINSERTF128Index(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+ return false;
+
+ // The index should be aligned on a 128-bit boundary.
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+ unsigned VL = N->getValueType(0).getVectorNumElements();
+ unsigned VBits = N->getValueType(0).getSizeInBits();
+ unsigned ElSize = VBits / VL;
+ bool Result = (Index * ElSize) % 128 == 0;
+
+ return Result;
+}
+
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
@@ -3388,6 +3517,42 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
return (Val - i) * EltSize;
}
+/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate
+/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
+/// instructions.
+unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+ llvm_unreachable("Illegal extract subvector for VEXTRACTF128");
+
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+ EVT VecVT = N->getOperand(0).getValueType();
+ EVT ElVT = VecVT.getVectorElementType();
+
+ unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ return Index / NumElemsPerChunk;
+}
+
+/// getInsertVINSERTF128Immediate - Return the appropriate immediate
+/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
+/// instructions.
+unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
+ if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+ llvm_unreachable("Illegal insert subvector for VINSERTF128");
+
+ uint64_t Index =
+ cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+ EVT VecVT = N->getValueType(0);
+ EVT ElVT = VecVT.getVectorElementType();
+
+ unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+ return Index / NumElemsPerChunk;
+}
+
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
@@ -3537,13 +3702,10 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted
+ // Always build SSE zero vectors as <4 x i32> bitcasted
// to their dest type. This ensures they get CSE'd.
SDValue Vec;
- if (VT.getSizeInBits() == 64) { // MMX
- SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
- } else if (VT.getSizeInBits() == 128) {
+ if (VT.getSizeInBits() == 128) { // SSE
if (HasSSE2) { // SSE2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
@@ -3559,7 +3721,7 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
/// getOnesVector - Returns a vector of specified type with all bits set.
@@ -3571,11 +3733,8 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
// type. This ensures they get CSE'd.
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
- if (VT.getSizeInBits() == 64) // MMX
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
- else // SSE
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
@@ -3640,9 +3799,6 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
- if (SV->getValueType(0).getVectorNumElements() <= 4)
- return SDValue(SV, 0);
-
EVT PVT = MVT::v4f32;
EVT VT = SV->getValueType(0);
DebugLoc dl = SV->getDebugLoc();
@@ -3663,9 +3819,9 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
// Perform the splat.
int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1);
+ V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1);
V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT, V1);
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
@@ -3789,7 +3945,7 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
}
// Actual nodes that may contain scalar elements
- if (Opcode == ISD::BIT_CONVERT) {
+ if (Opcode == ISD::BITCAST) {
V = V.getOperand(0);
EVT SrcVT = V.getValueType();
unsigned NumElems = VT.getVectorNumElements();
@@ -3978,7 +4134,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
}
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
}
/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
@@ -4017,11 +4173,10 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, DebugLoc dl) {
- bool isMMX = VT.getSizeInBits() == 64;
- EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
+ EVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
- SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opc, dl, ShVT, SrcOp,
DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
}
@@ -4029,7 +4184,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
SDValue
X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
SelectionDAG &DAG) const {
-
+
// Check if the scalar load can be widened into a vector load. And if
// the address is "base + cst" see if the cst can be "absorbed" into
// the shuffle mask.
@@ -4046,8 +4201,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
FI = FINode->getIndex();
Offset = 0;
- } else if (Ptr.getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ } else if (DAG.isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
Offset = Ptr.getConstantOperandVal(1);
@@ -4084,41 +4238,42 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
int EltNo = (Offset - StartOffset) >> 2;
int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
- SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0,
+ SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(StartOffset),
false, false, 0);
// Canonicalize it to a v4i32 shuffle.
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getVectorShuffle(MVT::v4i32, dl, V1,
- DAG.getUNDEF(MVT::v4i32), &Mask[0]));
+ DAG.getUNDEF(MVT::v4i32),&Mask[0]));
}
return SDValue();
}
-/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
-/// vector of type 'VT', see if the elements can be replaced by a single large
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
/// load which has the same value as a build_vector whose operands are 'elts'.
///
/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
-///
+///
/// FIXME: we'd also like to handle the case where the last elements are zero
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
/// There's even a handy isZeroNode for that purpose.
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
- DebugLoc &dl, SelectionDAG &DAG) {
+ DebugLoc &DL, SelectionDAG &DAG) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = Elts.size();
-
+
LoadSDNode *LDBase = NULL;
unsigned LastLoadedElt = -1U;
-
+
// For each element in the initializer, see if we've found a load or an undef.
- // If we don't find an initial load element, or later load elements are
+ // If we don't find an initial load element, or later load elements are
// non-consecutive, bail out.
for (unsigned i = 0; i < NumElems; ++i) {
SDValue Elt = Elts[i];
-
+
if (!Elt.getNode() ||
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
return SDValue();
@@ -4143,18 +4298,20 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
// consecutive loads for the low half, generate a vzext_load node.
if (LastLoadedElt == NumElems - 1) {
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
- return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
- LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
- return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
- LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+ return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(),
LDBase->getAlignment());
} else if (NumElems == 4 && LastLoadedElt == 1) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
- SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
+ SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
+ Ops, 2, MVT::i32,
+ LDBase->getMemOperand());
+ return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
}
return SDValue();
}
@@ -4162,6 +4319,35 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
+
+ EVT VT = Op.getValueType();
+ EVT ExtVT = VT.getVectorElementType();
+
+ unsigned NumElems = Op.getNumOperands();
+
+ // For AVX-length vectors, build the individual 128-bit pieces and
+ // use shuffles to put them in place.
+ if (VT.getSizeInBits() > 256 &&
+ Subtarget->hasAVX() &&
+ !Disable256Bit &&
+ !ISD::isBuildVectorAllZeros(Op.getNode())) {
+ SmallVector<SDValue, 8> V;
+ V.resize(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ V[i] = Op.getOperand(i);
+ }
+
+ EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
+
+ // Build the lower subvector.
+ SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
+ // Build the upper subvector.
+ SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
+ NumElems/2);
+
+ return ConcatVectors(Lower, Upper, DAG);
+ }
+
// All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
// All one's are handled with pcmpeqd. In AVX, zero's are handled with
// vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
@@ -4169,10 +4355,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
(Op.getValueType().getSizeInBits() != 256 &&
ISD::isBuildVectorAllOnes(Op.getNode()))) {
- // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
+ // Canonicalize this to <4 x i32> (SSE) to
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
- if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
+ if (Op.getValueType() == MVT::v4i32)
return Op;
if (ISD::isBuildVectorAllOnes(Op.getNode()))
@@ -4180,11 +4366,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
}
- EVT VT = Op.getValueType();
- EVT ExtVT = VT.getVectorElementType();
unsigned EVTBits = ExtVT.getSizeInBits();
- unsigned NumElems = Op.getNumOperands();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
@@ -4223,9 +4406,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
(!IsAllConstants || Idx == 0)) {
if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
- // Handle MMX and SSE both.
- EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
- unsigned VecElts = VT == MVT::v2i64 ? 4 : 2;
+ // Handle SSE only.
+ assert(VT == MVT::v2i64 && "Expected an SSE value type!");
+ EVT VecVT = MVT::v4i32;
+ unsigned VecElts = 4;
// Truncate the value (which may itself be a constant) to i32, and
// convert it to a vector with movd (S2V+shuffle to zero extend).
@@ -4245,7 +4429,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DAG.getUNDEF(Item.getValueType()),
&Mask[0]);
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item);
}
}
@@ -4264,11 +4448,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DAG);
} else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
- EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
+ assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+ EVT MiddleVT = MVT::v4i32;
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
Subtarget->hasSSE2(), DAG);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
}
@@ -4394,20 +4579,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Check for a build vector of consecutive loads.
for (unsigned i = 0; i < NumElems; ++i)
V[i] = Op.getOperand(i);
-
+
// Check for elements which are consecutive loads.
SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
if (LD.getNode())
return LD;
-
- // For SSE 4.1, use insertps to put the high elements into the low element.
+
+ // For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) {
SDValue Result;
if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
else
Result = DAG.getUNDEF(VT);
-
+
for (unsigned i = 1; i < NumElems; ++i) {
if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
@@ -4415,7 +4600,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
return Result;
}
-
+
// Otherwise, expand into a number of unpckl*, start by extending each of
// our (non-undef) elements to the full vector width with the element in the
// bottom slot of the vector (which generates no code for SSE).
@@ -4441,7 +4626,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
EltStride == NumElems/2)
continue;
-
+
V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
}
EltStride >>= 1;
@@ -4461,21 +4646,21 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
int Mask[2];
- SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0));
+ SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
InVec = Op.getOperand(1);
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
unsigned NumElts = ResVT.getVectorNumElements();
- VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
} else {
- InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec);
+ InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
Mask[0] = 0; Mask[1] = 2;
VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
+ return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
}
// v8i16 shuffles - Prefer shuffles in the following order:
@@ -4557,9 +4742,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1),
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]);
- NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV);
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
+ NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
// Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
// source words for the shuffle, to aid later transformations.
@@ -4628,12 +4813,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
}
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1);
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
if (!TwoInputs)
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
// Calculate the shuffle mask for the second input, shuffle it, and
// OR it with the first shuffled input.
@@ -4648,12 +4833,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
}
- V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2);
+ V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2);
V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, &pshufbMask[0], 16));
V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
}
// If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
@@ -4820,8 +5005,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
// No SSSE3 - Calculate in place words and then fix all out of place words
// With 0-16 extracts & inserts. Worst case is 16 bytes out of order from
// the 16 different words that comprise the two doublequadword input vectors.
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
- V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V2);
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
SDValue NewV = V2Only ? V2 : V1;
for (int i = 0; i != 8; ++i) {
int Elt0 = MaskVals[i*2];
@@ -4883,25 +5068,23 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
DAG.getIntPtrConstant(i));
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, NewV);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
}
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
-/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be
+/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
/// the right sequence. e.g.
-/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
+/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
static
SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG,
- const TargetLowering &TLI, DebugLoc dl) {
+ SelectionDAG &DAG, DebugLoc dl) {
EVT VT = SVOp->getValueType(0);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
unsigned NumElems = VT.getVectorNumElements();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
- EVT MaskVT = (NewWidth == 4) ? MVT::v4i16 : MVT::v2i32;
- EVT NewVT = MaskVT;
+ EVT NewVT;
switch (VT.getSimpleVT().SimpleTy) {
default: assert(false && "Unexpected!");
case MVT::v4f32: NewVT = MVT::v2f64; break;
@@ -4910,12 +5093,6 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
case MVT::v16i8: NewVT = MVT::v4i32; break;
}
- if (NewWidth == 2) {
- if (VT.isInteger())
- NewVT = MVT::v2i64;
- else
- NewVT = MVT::v2f64;
- }
int Scale = NumElems / NewWidth;
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i += Scale) {
@@ -4935,8 +5112,8 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
MaskVec.push_back(StartIdx / Scale);
}
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1);
- V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2);
+ V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
}
@@ -4953,13 +5130,13 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
// movssrr and movsdrr do not clear top bits. Try to use movd, movq
// instead.
MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
- if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) &&
+ if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
- SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
+ SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
// PR2108
OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
OpVT,
@@ -4969,9 +5146,9 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
}
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
- DAG.getNode(ISD::BIT_CONVERT, dl,
+ DAG.getNode(ISD::BITCAST, dl,
OpVT, SrcOp)));
}
@@ -5125,7 +5302,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
}
static bool MayFoldVectorLoad(SDValue V) {
- if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
V = V.getOperand(0);
@@ -5134,6 +5311,110 @@ static bool MayFoldVectorLoad(SDValue V) {
return false;
}
+// FIXME: the version above should always be used. Since there's
+// a bug where several vector shuffles can't be folded because the
+// DAG is not updated during lowering and a node claims to have two
+// uses while it only has one, use this version, and let isel match
+// another instruction if the load really happens to have more than
+// one use. Remove this version after this bug get fixed.
+// rdar://8434668, PR8156
+static bool RelaxedMayFoldVectorLoad(SDValue V) {
+ if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ V = V.getOperand(0);
+ if (ISD::isNormalLoad(V.getNode()))
+ return true;
+ return false;
+}
+
+/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
+/// a vector extract, and if both can be later optimized into a single load.
+/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
+/// here because otherwise a target specific shuffle node is going to be
+/// emitted for this shuffle, and the optimization not done.
+/// FIXME: This is probably not the best approach, but fix the problem
+/// until the right path is decided.
+static
+bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = V.getValueType();
+ ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
+
+ // Be sure that the vector shuffle is present in a pattern like this:
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
+ if (!V.hasOneUse())
+ return false;
+
+ SDNode *N = *V.getNode()->use_begin();
+ if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
+ SDValue EltNo = N->getOperand(1);
+ if (!isa<ConstantSDNode>(EltNo))
+ return false;
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ bool HasShuffleIntoBitcast = false;
+ if (V.getOpcode() == ISD::BITCAST) {
+ EVT SrcVT = V.getOperand(0).getValueType();
+ if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
+ return false;
+ V = V.getOperand(0);
+ HasShuffleIntoBitcast = true;
+ }
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
+ V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
+
+ // Skip one more bit_convert if necessary
+ if (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ if (ISD::isNormalLoad(V.getNode())) {
+ // Is the original load suitable?
+ LoadSDNode *LN0 = cast<LoadSDNode>(V);
+
+ // FIXME: avoid the multi-use bug that is preventing lots of
+ // of foldings to be detected, this is still wrong of course, but
+ // give the temporary desired behavior, and if it happens that
+ // the load has real more uses, during isel it will not fold, and
+ // will generate poor code.
+ if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse()
+ return false;
+
+ if (!HasShuffleIntoBitcast)
+ return true;
+
+ // If there's a bitcast before the shuffle, check if the load type and
+ // alignment is valid.
+ unsigned Align = LN0->getAlignment();
+ unsigned NewAlign =
+ TLI.getTargetData()->getABITypeAlignment(
+ VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+ return false;
+ }
+
+ return true;
+}
+
+static
+SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+
+ // Canonizalize to v2f64.
+ V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
+ V1, DAG));
+}
+
static
SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
bool HasSSE2) {
@@ -5191,6 +5472,10 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
CanFoldLoad = true;
+ // Both of them can't be memory operations though.
+ if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
+ CanFoldLoad = false;
+
if (CanFoldLoad) {
if (HasSSE2 && NumElems == 2)
return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
@@ -5228,7 +5513,7 @@ static inline unsigned getUNPCKLOpcode(EVT VT) {
case MVT::v16i8: return X86ISD::PUNPCKLBW;
case MVT::v8i16: return X86ISD::PUNPCKLWD;
default:
- llvm_unreachable("Unknow type for unpckl");
+ llvm_unreachable("Unknown type for unpckl");
}
return 0;
}
@@ -5242,63 +5527,111 @@ static inline unsigned getUNPCKHOpcode(EVT VT) {
case MVT::v16i8: return X86ISD::PUNPCKHBW;
case MVT::v8i16: return X86ISD::PUNPCKHWD;
default:
- llvm_unreachable("Unknow type for unpckh");
+ llvm_unreachable("Unknown type for unpckh");
}
return 0;
}
-SDValue
-X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+static
+SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ const X86Subtarget *Subtarget) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
- unsigned NumElems = VT.getVectorNumElements();
- bool isMMX = VT.getSizeInBits() == 64;
- bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
- bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
- bool V1IsSplat = false;
- bool V2IsSplat = false;
- bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
- bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
- MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
if (isZeroShuffle(SVOp))
return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
- // Promote splats to v4f32.
+ // Handle splat operations
if (SVOp->isSplat()) {
- if (isMMX || NumElems < 4)
+ // Special case, this is the only place now where it's
+ // allowed to return a vector_shuffle operation without
+ // using a target specific node, because *hopefully* it
+ // will be optimized away by the dag combiner.
+ if (VT.getVectorNumElements() <= 4 &&
+ CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
return Op;
+
+ // Handle splats by matching through known masks
+ if (VT.getVectorNumElements() <= 4)
+ return SDValue();
+
+ // Canonicalize all of the remaining to v4f32.
return PromoteSplat(SVOp, DAG);
}
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode())
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
- LowerVECTOR_SHUFFLE(NewOp, DAG));
+ return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
} else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode()) {
if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
DAG, Subtarget, dl);
}
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
- SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
DAG, Subtarget, dl);
}
}
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned NumElems = VT.getVectorNumElements();
+ bool isMMX = VT.getSizeInBits() == 64;
+ bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ bool V1IsSplat = false;
+ bool V2IsSplat = false;
+ bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+ bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
+ bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX();
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
+ // Shuffle operations on MMX not supported.
+ if (isMMX)
+ return Op;
+
+ // Vector shuffle lowering takes 3 steps:
+ //
+ // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
+ // narrowing and commutation of operands should be handled.
+ // 2) Matching of shuffles with known shuffle masks to x86 target specific
+ // shuffle nodes.
+ // 3) Rewriting of unmatched masks into new generic shuffle operations,
+ // so the shuffle can be broken into other shuffles and the legalizer can
+ // try the lowering again.
+ //
+ // The general ideia is that no vector_shuffle operation should be left to
+ // be matched during isel, all of them must be converted to a target specific
+ // node here.
+
+ // Normalize the input vectors. Here splats, zeroed vectors, profitable
+ // narrowing and commutation of operands should be handled. The actual code
+ // doesn't include all of those, work in progress...
+ SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+ if (NewOp.getNode())
+ return NewOp;
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
// unpckh_undef). Only use pshufd if speed is more important than size.
@@ -5309,6 +5642,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (VT != MVT::v2i64 && VT != MVT::v2f64)
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+ if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef &&
+ RelaxedMayFoldVectorLoad(V1))
+ return getMOVDDup(Op, dl, V1, DAG);
+
+ if (X86::isMOVHLPS_v_undef_Mask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
+
+ // Use to match splats
+ if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+ (VT == MVT::v2f64 || VT == MVT::v2i64))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
if (X86::isPSHUFDMask(SVOp)) {
// The actual implementation will match the mask in the if above and then
// during isel it can match several different instructions, not only pshufd
@@ -5349,7 +5694,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return V2;
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
- if (!isMMX && !X86::isMOVLPMask(SVOp)) {
+ if (!X86::isMOVLPMask(SVOp)) {
if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
@@ -5359,22 +5704,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
// FIXME: fold these into legal mask.
- if (!isMMX) {
- if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
- return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+ if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+ return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
- if (X86::isMOVHLPSMask(SVOp))
- return getMOVHighToLow(Op, dl, DAG);
+ if (X86::isMOVHLPSMask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
- if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
- return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+ if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
- if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
- return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+ if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
- if (X86::isMOVLPMask(SVOp))
- return getMOVLP(Op, dl, DAG, HasSSE2);
- }
+ if (X86::isMOVLPMask(SVOp))
+ return getMOVLP(Op, dl, DAG, HasSSE2);
if (ShouldXformToMOVHLPS(SVOp) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
@@ -5414,13 +5757,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getMOVL(DAG, dl, VT, V2, V1);
}
- if (X86::isUNPCKL_v_undef_Mask(SVOp) || X86::isUNPCKLMask(SVOp))
- return (isMMX) ?
- Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+ if (X86::isUNPCKLMask(SVOp))
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
- if (X86::isUNPCKH_v_undef_Mask(SVOp) || X86::isUNPCKHMask(SVOp))
- return (isMMX) ?
- Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
+ if (X86::isUNPCKHMask(SVOp))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
@@ -5443,19 +5784,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
- if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || X86::isUNPCKLMask(NewSVOp))
- return (isMMX) ?
- NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+ if (X86::isUNPCKLMask(NewSVOp))
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
- if (X86::isUNPCKH_v_undef_Mask(NewSVOp) || X86::isUNPCKHMask(NewSVOp))
- return (isMMX) ?
- NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
+ if (X86::isUNPCKHMask(NewSVOp))
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
}
- // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle.
-
// Normalize the node to match x86 shuffle ops if needed
- if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+ if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
@@ -5464,15 +5801,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
SmallVector<int, 16> M;
SVOp->getMask(M);
- // Very little shuffling can be done for 64-bit vectors right now.
- if (VT.getSizeInBits() == 64)
- return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ? Op : SDValue();
+ if (isPALIGNRMask(M, VT, HasSSSE3))
+ return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+ X86::getShufflePALIGNRImmediate(SVOp),
+ DAG);
- // FIXME: pshufb, blends, shifts.
- if (VT.getVectorNumElements() == 2 ||
- ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
- isPALIGNRMask(M, VT, Subtarget->hasSSSE3()))
- return Op;
+ if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
+ SVOp->getSplatIndex() == 0 && V2IsUndef) {
+ if (VT == MVT::v2f64)
+ return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
+ if (VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+ }
if (isPSHUFHWMask(M, VT))
return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
@@ -5494,6 +5834,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
TargetMask, DAG);
}
+ if (X86::isUNPCKL_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ if (X86::isUNPCKH_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG);
@@ -5507,8 +5854,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return NewOp;
}
- // Handle all 4 wide cases with a number of shuffles except for MMX.
- if (NumElems == 4 && !isMMX)
+ // Handle all 4 wide cases with a number of shuffles.
+ if (NumElems == 4)
return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
return SDValue();
@@ -5531,7 +5878,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BIT_CONVERT, dl,
+ DAG.getNode(ISD::BITCAST, dl,
MVT::v4i32,
Op.getOperand(0)),
Op.getOperand(1)));
@@ -5552,14 +5899,14 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
if ((User->getOpcode() != ISD::STORE ||
(isa<ConstantSDNode>(Op.getOperand(1)) &&
cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
- (User->getOpcode() != ISD::BIT_CONVERT ||
+ (User->getOpcode() != ISD::BITCAST ||
User->getValueType(0) != MVT::i32))
return SDValue();
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
Op.getOperand(0)),
Op.getOperand(1));
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Extract);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
} else if (VT == MVT::i32) {
// ExtractPS works with constant index.
if (isa<ConstantSDNode>(Op.getOperand(1)))
@@ -5575,6 +5922,38 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return SDValue();
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+
+ // If this is a 256-bit vector result, first extract the 128-bit
+ // vector and then extract from the 128-bit vector.
+ if (VecVT.getSizeInBits() > 128) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ unsigned NumElems = VecVT.getVectorNumElements();
+ SDValue Idx = Op.getOperand(1);
+
+ if (!isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128);
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ // Get the 128-bit vector.
+ bool Upper = IdxVal >= ExtractNumElems;
+ Vec = Extract128BitVector(Vec, Idx, DAG, dl);
+
+ // Extract from it.
+ SDValue ScaledIdx = Idx;
+ if (Upper)
+ ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(ExtractNumElems,
+ Idx.getValueType()));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+ ScaledIdx);
+ }
+
+ assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+
if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
if (Res.getNode())
@@ -5590,7 +5969,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- DAG.getNode(ISD::BIT_CONVERT, dl,
+ DAG.getNode(ISD::BITCAST, dl,
MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
@@ -5650,8 +6029,6 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
unsigned Opc;
if (VT == MVT::v8i16)
Opc = X86ISD::PINSRW;
- else if (VT == MVT::v4i16)
- Opc = X86ISD::MMX_PINSRW;
else if (VT == MVT::v16i8)
Opc = X86ISD::PINSRB;
else
@@ -5689,17 +6066,45 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ // If this is a 256-bit vector result, first insert into a 128-bit
+ // vector and then insert into the 256-bit vector.
+ if (VT.getSizeInBits() > 128) {
+ if (!isa<ConstantSDNode>(N2))
+ return SDValue();
+
+ // Get the 128-bit vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
+ bool Upper = IdxVal >= NumElems / 2;
+
+ SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl);
+
+ // Insert into it.
+ SDValue ScaledN2 = N2;
+ if (Upper)
+ ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2,
+ DAG.getConstant(NumElems /
+ (VT.getSizeInBits() / 128),
+ N2.getValueType()));
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0,
+ N1, ScaledN2);
+
+ // Insert the 128-bit vector
+ // FIXME: Why UNDEF?
+ return Insert128BitVector(N0, Op, N2, DAG, dl);
+ }
+
if (Subtarget->hasSSE41())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EltVT == MVT::i8)
return SDValue();
- DebugLoc dl = Op.getDebugLoc();
- SDValue N0 = Op.getOperand(0);
- SDValue N1 = Op.getOperand(1);
- SDValue N2 = Op.getOperand(2);
-
if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
@@ -5707,31 +6112,79 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
- return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW,
- dl, VT, N0, N1, N2);
+ return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
}
return SDValue();
}
SDValue
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
-
+ EVT OpVT = Op.getValueType();
+
+ // If this is a 256-bit vector result, first insert into a 128-bit
+ // vector and then insert into the 256-bit vector.
+ if (OpVT.getSizeInBits() > 128) {
+ // Insert into a 128-bit vector.
+ EVT VT128 = EVT::getVectorVT(*Context,
+ OpVT.getVectorElementType(),
+ OpVT.getVectorNumElements() / 2);
+
+ Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
+
+ // Insert the 128-bit vector.
+ return Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, OpVT), Op,
+ DAG.getConstant(0, MVT::i32),
+ DAG, dl);
+ }
+
if (Op.getValueType() == MVT::v1i64 &&
Op.getOperand(0).getValueType() == MVT::i64)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
- EVT VT = MVT::v2i32;
- switch (Op.getValueType().getSimpleVT().SimpleTy) {
- default: break;
- case MVT::v16i8:
- case MVT::v8i16:
- VT = MVT::v4i32;
- break;
+ assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 &&
+ "Expected an SSE type!");
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(),
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
+}
+
+// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in
+// a simple subregister reference or explicit instructions to grab
+// upper bits of a vector.
+SDValue
+X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasAVX()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue Idx = Op.getNode()->getOperand(1);
+
+ if (Op.getNode()->getValueType(0).getSizeInBits() == 128
+ && Vec.getNode()->getValueType(0).getSizeInBits() == 256) {
+ return Extract128BitVector(Vec, Idx, DAG, dl);
+ }
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(),
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt));
+ return SDValue();
+}
+
+// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
+// simple superregister reference or explicit instructions to insert
+// the upper bits of a vector.
+SDValue
+X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->hasAVX()) {
+ DebugLoc dl = Op.getNode()->getDebugLoc();
+ SDValue Vec = Op.getNode()->getOperand(0);
+ SDValue SubVec = Op.getNode()->getOperand(1);
+ SDValue Idx = Op.getNode()->getOperand(2);
+
+ if (Op.getNode()->getValueType(0).getSizeInBits() == 256
+ && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) {
+ return Insert128BitVector(Vec, SubVec, Idx, DAG, dl);
+ }
+ }
+ return SDValue();
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
@@ -5797,12 +6250,11 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
- if (OpFlag) {
+ if (OpFlag)
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc(), getPointerTy()),
Result);
- }
return Result;
}
@@ -5906,7 +6358,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
// load.
if (isGlobalStubReference(OpFlags))
Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
- PseudoSourceValue::getGOT(), 0, false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
@@ -5929,7 +6381,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags) {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
DebugLoc dl = GA->getDebugLoc();
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
@@ -5978,14 +6430,14 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT, TLSModel::Model model,
bool is64Bit) {
DebugLoc dl = GA->getDebugLoc();
- // Get the Thread Pointer
- SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress,
- DebugLoc(), PtrVT,
- DAG.getRegister(is64Bit? X86::FS : X86::GS,
- MVT::i32));
- SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base,
- NULL, 0, false, false, 0);
+ // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
+ Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
+ is64Bit ? 257 : 256));
+
+ SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getIntPtrConstant(0),
+ MachinePointerInfo(Ptr), false, false, 0);
unsigned char OperandFlags = 0;
// Most TLS accesses are not RIP relative, even on x86-64. One exception is
@@ -6004,14 +6456,14 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec)
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
- PseudoSourceValue::getGOT(), 0, false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, 0);
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
@@ -6020,29 +6472,29 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
-
+
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
// TODO: implement the "local dynamic" model
// TODO: implement the "initial exec"model for pic executables
-
+
// If GV is an alias then use the aliasee for determining
// thread-localness.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GV = GA->resolveAliasedGlobal(false);
-
- TLSModel::Model model
+
+ TLSModel::Model model
= getTLSModel(GV, getTargetMachine().getRelocationModel());
-
+
switch (model) {
case TLSModel::GeneralDynamic:
case TLSModel::LocalDynamic: // not implemented
if (Subtarget->is64Bit())
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
-
+
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
@@ -6053,7 +6505,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
unsigned char OpFlag = 0;
unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
X86ISD::WrapperRIP : X86ISD::Wrapper;
-
+
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
@@ -6062,24 +6514,26 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
OpFlag = X86II::MO_TLVP_PIC_BASE;
else
OpFlag = X86II::MO_TLVP;
- DebugLoc DL = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
- getPointerTy(),
+ GA->getValueType(0),
GA->getOffset(), OpFlag);
SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
-
+
// With PIC32, the address is actually $g + Offset.
if (PIC32)
Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc(), getPointerTy()),
Offset);
-
+
// Lowering the machine isd will make sure everything is in the right
// location.
- SDValue Args[] = { Offset };
- SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1);
-
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Args[] = { Chain, Offset };
+ Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2);
+
// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setAdjustsStack(true);
@@ -6089,7 +6543,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
}
-
+
assert(false &&
"TLS not implemented for this target.");
@@ -6148,12 +6602,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
- if (SrcVT.isVector()) {
- if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) {
- return Op;
- }
+ if (SrcVT.isVector())
return SDValue();
- }
assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
@@ -6174,25 +6624,36 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot,
- PseudoSourceValue::getFixedStack(SSFI), 0,
+ MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
}
SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
- SDValue StackSlot,
+ SDValue StackSlot,
SelectionDAG &DAG) const {
// Build the FILD
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
SDVTList Tys;
bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
if (useSSE)
- Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
+ Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+
+ unsigned ByteSize = SrcVT.getSizeInBits()/8;
+
+ int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, ByteSize, ByteSize);
+
SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
- SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, dl,
- Tys, Ops, array_lengthof(Ops));
+ SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
+ X86ISD::FILD, DL,
+ Tys, Ops, array_lengthof(Ops),
+ SrcVT, MMO);
if (useSSE) {
Chain = Result.getValue(1);
@@ -6202,15 +6663,23 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+ unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys = DAG.getVTList(MVT::Other);
SDValue Ops[] = {
Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
};
- Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops));
- Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot,
- PseudoSourceValue::getFixedStack(SSFI), 0,
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, SSFISize, SSFISize);
+
+ Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
+ Ops, array_lengthof(Ops),
+ Op.getValueType(), MMO);
+ Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
}
@@ -6284,12 +6753,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
DAG.getIntPtrConstant(0)));
SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
- SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2);
+ SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2);
SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
@@ -6317,19 +6786,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
DAG.getIntPtrConstant(0)));
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Load),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
DAG.getIntPtrConstant(0));
// Or the load with the bias.
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, Load)),
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, Bias)));
Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Or),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
DAG.getIntPtrConstant(0));
// Subtract the bias.
@@ -6374,24 +6843,34 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
getPointerTy(), StackSlot, WordOff);
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
- StackSlot, NULL, 0, false, false, 0);
+ StackSlot, MachinePointerInfo(),
+ false, false, 0);
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
- OffsetSlot, NULL, 0, false, false, 0);
+ OffsetSlot, MachinePointerInfo(),
+ false, false, 0);
SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
return Fild;
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
- StackSlot, NULL, 0, false, false, 0);
+ StackSlot, MachinePointerInfo(),
+ false, false, 0);
// For i64 source, we need to add the appropriate power of 2 if the input
// was negative. This is the same as the optimization in
// DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
// we must be careful to do the computation in x87 extended precision, not
// in SSE. (The generic code can't know it's OK to do this, or how to.)
+ int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction()
+ .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, 8, 8);
+
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
- SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3);
+ SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
+ MVT::i64, MMO);
APInt FF(32, 0x5F800000ULL);
@@ -6414,9 +6893,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(),
- FudgePtr, PseudoSourceValue::getConstantPool(),
- 0, MVT::f32, false, false, 4);
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+ FudgePtr, MachinePointerInfo::getConstantPool(),
+ MVT::f32, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
@@ -6424,7 +6903,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
std::pair<SDValue,SDValue> X86TargetLowering::
FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
EVT DstTy = Op.getValueType();
@@ -6453,6 +6932,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+
unsigned Opc;
switch (DstTy.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
@@ -6463,37 +6944,43 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
- if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
+ EVT TheVT = Op.getOperand(0).getValueType();
+ if (isScalarFPTypeInSSEReg(TheVT)) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
- Chain = DAG.getStore(Chain, dl, Value, StackSlot,
- PseudoSourceValue::getFixedStack(SSFI), 0,
+ Chain = DAG.getStore(Chain, DL, Value, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDValue Ops[] = {
- Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
+ Chain, StackSlot, DAG.getValueType(TheVT)
};
- Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOLoad, MemSize, MemSize);
+ Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
+ DstTy, MMO);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
}
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, MemSize, MemSize);
+
// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };
- SDValue FIST = DAG.getNode(Opc, dl, MVT::Other, Ops, 3);
+ SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+ Ops, 3, DstTy, MMO);
return std::make_pair(FIST, StackSlot);
}
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getValueType().isVector()) {
- if (Op.getValueType() == MVT::v2i32 &&
- Op.getOperand(0).getValueType() == MVT::v2f64) {
- return Op;
- }
+ if (Op.getValueType().isVector())
return SDValue();
- }
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
@@ -6502,7 +6989,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
// Load the result.
return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
- FIST, StackSlot, NULL, 0, false, false, 0);
+ FIST, StackSlot, MachinePointerInfo(), false, false, 0);
}
SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
@@ -6513,7 +7000,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
// Load the result.
return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
- FIST, StackSlot, NULL, 0, false, false, 0);
+ FIST, StackSlot, MachinePointerInfo(), false, false, 0);
}
SDValue X86TargetLowering::LowerFABS(SDValue Op,
@@ -6539,7 +7026,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op,
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
}
@@ -6566,14 +7053,14 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
if (VT.isVector()) {
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::XOR, dl, MVT::v2i64,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
Op.getOperand(0)),
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, Mask)));
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
} else {
return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
}
@@ -6615,7 +7102,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
@@ -6625,7 +7112,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
DAG.getConstant(32, MVT::i32));
- SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, SignBit);
+ SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit);
SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
DAG.getIntPtrConstant(0));
}
@@ -6644,7 +7131,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
@@ -6884,8 +7371,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
- if (Op0.getOpcode() == ISD::AND &&
- Op0.hasOneUse() &&
+ if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
Op1.getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(Op1)->isNullValue() &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
@@ -6894,19 +7380,25 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return NewSetCC;
}
- // Look for "(setcc) == / != 1" to avoid unncessary setcc.
- if (Op0.getOpcode() == X86ISD::SETCC &&
- Op1.getOpcode() == ISD::Constant &&
+ // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
+ // these.
+ if (Op1.getOpcode() == ISD::Constant &&
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
cast<ConstantSDNode>(Op1)->isNullValue()) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
- bool Invert = (CC == ISD::SETNE) ^
- cast<ConstantSDNode>(Op1)->isNullValue();
- if (Invert)
+
+ // If the input is a setcc, then reuse the input setcc or use a new one with
+ // the inverted condition.
+ if (Op0.getOpcode() == X86ISD::SETCC) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ bool Invert = (CC == ISD::SETNE) ^
+ cast<ConstantSDNode>(Op1)->isNullValue();
+ if (!Invert) return Op0;
+
CCode = X86::GetOppositeBranchCondition(CCode);
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ }
}
bool isFP = Op1.getValueType().isFloatingPoint();
@@ -6914,17 +7406,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (X86CC == X86::COND_INVALID)
return SDValue();
- SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
-
- // Use sbb x, x to materialize carry bit into a GPR.
- if (X86CC == X86::COND_B)
- return DAG.getNode(ISD::AND, dl, MVT::i8,
- DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), Cond),
- DAG.getConstant(1, MVT::i8));
-
+ SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), Cond);
+ DAG.getConstant(X86CC, MVT::i8), EFLAGS);
}
SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -6996,11 +7480,8 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
switch (VT.getSimpleVT().SimpleTy) {
default: break;
- case MVT::v8i8:
case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
- case MVT::v4i16:
case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
- case MVT::v2i32:
case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
}
@@ -7051,6 +7532,8 @@ static bool isX86LogicalCmp(SDValue Op) {
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD ||
Opc == X86ISD::SUB ||
+ Opc == X86ISD::ADC ||
+ Opc == X86ISD::SBB ||
Opc == X86ISD::SMUL ||
Opc == X86ISD::UMUL ||
Opc == X86ISD::INC ||
@@ -7060,13 +7543,28 @@ static bool isX86LogicalCmp(SDValue Op) {
Opc == X86ISD::AND))
return true;
+ if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
+ return true;
+
return false;
}
+static bool isZero(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isNullValue();
+}
+
+static bool isAllOnes(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isAllOnesValue();
+}
+
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
bool addTest = true;
SDValue Cond = Op.getOperand(0);
- DebugLoc dl = Op.getDebugLoc();
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
SDValue CC;
if (Cond.getOpcode() == ISD::SETCC) {
@@ -7075,34 +7573,44 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond = NewCond;
}
- // (select (x == 0), -1, 0) -> (sign_bit (x - 1))
- SDValue Op1 = Op.getOperand(1);
- SDValue Op2 = Op.getOperand(2);
+ // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
+ // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
+ // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
+ // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
if (Cond.getOpcode() == X86ISD::SETCC &&
- cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue() == X86::COND_E) {
+ Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
+ isZero(Cond.getOperand(1).getOperand(1))) {
SDValue Cmp = Cond.getOperand(1);
- if (Cmp.getOpcode() == X86ISD::CMP) {
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op1);
+
+ unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+
+ if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
+ (CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
+ SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
+
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+ CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+
+ SDValue Res = // Res = 0 or -1.
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
+
+ if (isAllOnes(Op1) != (CondCode == X86::COND_E))
+ Res = DAG.getNOT(DL, Res, Res.getValueType());
+
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
- ConstantSDNode *RHSC =
- dyn_cast<ConstantSDNode>(Cmp.getOperand(1).getNode());
- if (N1C && N1C->isAllOnesValue() &&
- N2C && N2C->isNullValue() &&
- RHSC && RHSC->isNullValue()) {
- SDValue CmpOp0 = Cmp.getOperand(0);
- Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
- CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
- return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(),
- DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
- }
+ if (N2C == 0 || !N2C->isNullValue())
+ Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
+ return Res;
}
}
- // Look pass (and (setcc_carry (cmp ...)), 1).
+ // Look past (and (setcc_carry (cmp ...)), 1).
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (C && C->getAPIntValue() == 1)
+ if (C && C->getAPIntValue() == 1)
Cond = Cond.getOperand(0);
}
@@ -7135,8 +7643,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
- if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
@@ -7150,11 +7658,28 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Cond = EmitTest(Cond, X86::COND_NE, DAG);
}
+ // a < b ? -1 : 0 -> RES = ~setcc_carry
+ // a < b ? 0 : -1 -> RES = setcc_carry
+ // a >= b ? -1 : 0 -> RES = setcc_carry
+ // a >= b ? 0 : -1 -> RES = ~setcc_carry
+ if (Cond.getOpcode() == X86ISD::CMP) {
+ unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
+
+ if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
+ (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
+ SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+ DAG.getConstant(X86::COND_B, MVT::i8), Cond);
+ if (isAllOnes(Op1) != (CondCode == X86::COND_B))
+ return DAG.getNOT(DL, Res, Res.getValueType());
+ return Res;
+ }
+ }
+
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = { Op2, Op1, CC, Cond };
- return DAG.getNode(X86ISD::CMOV, dl, VTs, Ops, array_lengthof(Ops));
+ return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
}
// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
@@ -7209,7 +7734,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (C && C->getAPIntValue() == 1)
+ if (C && C->getAPIntValue() == 1)
Cond = Cond.getOperand(0);
}
@@ -7310,7 +7835,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
- if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
@@ -7337,8 +7862,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
- assert(Subtarget->isTargetCygMing() &&
- "This should be used only on Cygwin/Mingw targets");
+ assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
+ "This should be used only on Windows targets");
DebugLoc dl = Op.getDebugLoc();
// Get the inputs.
@@ -7353,9 +7878,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
Flag = Chain.getValue(1);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag);
+ Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
Flag = Chain.getValue(1);
Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
@@ -7369,15 +7894,15 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
- if (!Subtarget->is64Bit()) {
+ if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
- return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
- false, false, 0);
+ return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
}
// __va_list_tag:
@@ -7388,48 +7913,107 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SmallVector<SDValue, 8> MemOps;
SDValue FIN = Op.getOperand(1);
// Store gp_offset
- SDValue Store = DAG.getStore(Op.getOperand(0), dl,
+ SDValue Store = DAG.getStore(Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
MVT::i32),
- FIN, SV, 0, false, false, 0);
+ FIN, MachinePointerInfo(SV), false, false, 0);
MemOps.push_back(Store);
// Store fp_offset
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(4));
- Store = DAG.getStore(Op.getOperand(0), dl,
+ Store = DAG.getStore(Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
MVT::i32),
- FIN, SV, 4, false, false, 0);
+ FIN, MachinePointerInfo(SV, 4), false, false, 0);
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(4));
SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8,
+ Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
+ MachinePointerInfo(SV, 8),
false, false, 0);
MemOps.push_back(Store);
// Store ptr to reg_save_area.
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(8));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16,
- false, false, 0);
+ Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
+ MachinePointerInfo(SV, 16), false, false, 0);
MemOps.push_back(Store);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
&MemOps[0], MemOps.size());
}
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
- // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
- assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
+ assert(Subtarget->is64Bit() &&
+ "LowerVAARG only handles 64-bit va_arg!");
+ assert((Subtarget->isTargetLinux() ||
+ Subtarget->isTargetDarwin()) &&
+ "Unhandled target in LowerVAARG");
+ assert(Op.getNode()->getNumOperands() == 4);
+ SDValue Chain = Op.getOperand(0);
+ SDValue SrcPtr = Op.getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ unsigned Align = Op.getConstantOperandVal(3);
+ DebugLoc dl = Op.getDebugLoc();
- report_fatal_error("VAArgInst is not yet implemented for x86-64!");
- return SDValue();
+ EVT ArgVT = Op.getNode()->getValueType(0);
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+ uint8_t ArgMode;
+
+ // Decide which area this value should be read from.
+ // TODO: Implement the AMD64 ABI in its entirety. This simple
+ // selection mechanism works only for the basic types.
+ if (ArgVT == MVT::f80) {
+ llvm_unreachable("va_arg for f80 not yet implemented");
+ } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
+ ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
+ } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
+ ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
+ } else {
+ llvm_unreachable("Unhandled argument type in LowerVAARG");
+ }
+
+ if (ArgMode == 2) {
+ // Sanity Check: Make sure using fp_offset makes sense.
+ assert(!UseSoftFloat &&
+ !(DAG.getMachineFunction()
+ .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+ Subtarget->hasXMM());
+ }
+
+ // Insert VAARG_64 node into the DAG
+ // VAARG_64 returns two values: Variable Argument Address, Chain
+ SmallVector<SDValue, 11> InstOps;
+ InstOps.push_back(Chain);
+ InstOps.push_back(SrcPtr);
+ InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
+ InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
+ InstOps.push_back(DAG.getConstant(Align, MVT::i32));
+ SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+ SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
+ VTs, &InstOps[0], InstOps.size(),
+ MVT::i64,
+ MachinePointerInfo(SV),
+ /*Align=*/0,
+ /*Volatile=*/false,
+ /*ReadMem=*/true,
+ /*WriteMem=*/true);
+ Chain = VAARG.getValue(1);
+
+ // Load the next argument and return it
+ return DAG.getLoad(ArgVT, dl,
+ Chain,
+ VAARG,
+ MachinePointerInfo(),
+ false, false, 0);
}
SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
@@ -7440,11 +8024,12 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
SDValue SrcPtr = Op.getOperand(2);
const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
- return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr,
+ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
- false, DstSV, 0, SrcSV, 0);
+ false,
+ MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
SDValue
@@ -7713,10 +8298,11 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
} else {
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+// FIXME this must be lowered to get rid of the invalid type.
}
EVT VT = Op.getValueType();
- ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt);
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(NewIntNo, MVT::i32),
Op.getOperand(1), ShAmt);
@@ -7740,13 +8326,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
- NULL, 0, false, false, 0);
+ MachinePointerInfo(), false, false, 0);
}
// Just load the return address.
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, NULL, 0, false, false, 0);
+ RetAddrFI, MachinePointerInfo(), false, false, 0);
}
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
@@ -7759,7 +8345,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
- FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
false, false, 0);
return FrameAddr;
}
@@ -7784,7 +8371,8 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
DAG.getIntPtrConstant(TD->getPointerSize()));
StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
- Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0);
+ Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+ false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
MF.getRegInfo().addLiveOut(StoreAddrReg);
@@ -7819,11 +8407,13 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
SDValue Addr = Trmp;
OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
- Addr, TrmpAddr, 0, false, false, 0);
+ Addr, MachinePointerInfo(TrmpAddr),
+ false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(2, MVT::i64));
- OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2,
+ OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
+ MachinePointerInfo(TrmpAddr, 2),
false, false, 2);
// Load the 'nest' parameter value into R10.
@@ -7832,11 +8422,13 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(10, MVT::i64));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
- Addr, TrmpAddr, 10, false, false, 0);
+ Addr, MachinePointerInfo(TrmpAddr, 10),
+ false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(12, MVT::i64));
- OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12,
+ OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 12),
false, false, 2);
// Jump to the nested function.
@@ -7844,13 +8436,15 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(20, MVT::i64));
OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
- Addr, TrmpAddr, 20, false, false, 0);
+ Addr, MachinePointerInfo(TrmpAddr, 20),
+ false, false, 0);
unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(22, MVT::i64));
OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
- TrmpAddr, 22, false, false, 0);
+ MachinePointerInfo(TrmpAddr, 22),
+ false, false, 0);
SDValue Ops[] =
{ Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
@@ -7912,22 +8506,26 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, dl,
DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
- Trmp, TrmpAddr, 0, false, false, 0);
+ Trmp, MachinePointerInfo(TrmpAddr),
+ false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(1, MVT::i32));
- OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1,
+ OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 1),
false, false, 1);
const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(5, MVT::i32));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
- TrmpAddr, 5, false, false, 1);
+ MachinePointerInfo(TrmpAddr, 5),
+ false, false, 1);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(6, MVT::i32));
- OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6,
+ OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
+ MachinePointerInfo(TrmpAddr, 6),
false, false, 1);
SDValue Ops[] =
@@ -7959,44 +8557,51 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
- const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ const TargetFrameLowering &TFI = *TM.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
// Save FP Control Word to stack slot
int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
- SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other,
- DAG.getEntryNode(), StackSlot);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+ MachineMemOperand::MOStore, 2, 2);
+
+ SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
+ SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
+ DAG.getVTList(MVT::Other),
+ Ops, 2, MVT::i16, MMO);
// Load FP Control Word from stack slot
- SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0,
- false, false, 0);
+ SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
+ MachinePointerInfo(), false, false, 0);
// Transform as necessary
SDValue CWD1 =
- DAG.getNode(ISD::SRL, dl, MVT::i16,
- DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16,
CWD, DAG.getConstant(0x800, MVT::i16)),
DAG.getConstant(11, MVT::i8));
SDValue CWD2 =
- DAG.getNode(ISD::SRL, dl, MVT::i16,
- DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, DL, MVT::i16,
+ DAG.getNode(ISD::AND, DL, MVT::i16,
CWD, DAG.getConstant(0x400, MVT::i16)),
DAG.getConstant(9, MVT::i8));
SDValue RetVal =
- DAG.getNode(ISD::AND, dl, MVT::i16,
- DAG.getNode(ISD::ADD, dl, MVT::i16,
- DAG.getNode(ISD::OR, dl, MVT::i16, CWD1, CWD2),
+ DAG.getNode(ISD::AND, DL, MVT::i16,
+ DAG.getNode(ISD::ADD, DL, MVT::i16,
+ DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
DAG.getConstant(1, MVT::i16)),
DAG.getConstant(3, MVT::i16));
return DAG.getNode((VT.getSizeInBits() < 16 ?
- ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
@@ -8122,16 +8727,16 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(1), DAG.getConstant(23, MVT::i32));
ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
-
+
std::vector<Constant*> CV(4, CI);
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
- Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op);
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
@@ -8149,7 +8754,7 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
Constant *C = ConstantVector::get(CVM1);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 16);
// r = pblendv(r, psllw(r & (char16)15, 4), a);
@@ -8157,31 +8762,27 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(4, MVT::i32));
- R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
- R, M, Op);
+ R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-
+
C = ConstantVector::get(CVM2);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0, false, false, 16);
-
+ MachinePointerInfo::getConstantPool(),
+ false, false, 16);
+
// r = pblendv(r, psllw(r & (char16)63, 2), a);
M = DAG.getNode(ISD::AND, dl, VT, R, M);
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(2, MVT::i32));
- R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
- R, M, Op);
+ R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-
+
// return pblendv(r, r+r, a);
- R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+ R = DAG.getNode(X86ISD::PBLENDVB, dl, VT,
R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
return R;
}
@@ -8198,8 +8799,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
SDValue RHS = N->getOperand(1);
unsigned BaseOp = 0;
unsigned Cond = 0;
- DebugLoc dl = Op.getDebugLoc();
-
+ DebugLoc DL = Op.getDebugLoc();
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
@@ -8238,19 +8838,29 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
BaseOp = X86ISD::SMUL;
Cond = X86::COND_O;
break;
- case ISD::UMULO:
- BaseOp = X86ISD::UMUL;
- Cond = X86::COND_B;
- break;
+ case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
+ MVT::i32);
+ SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
+
+ SDValue SetCC =
+ DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(X86::COND_O, MVT::i32),
+ SDValue(Sum.getNode(), 2));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+ return Sum;
+ }
}
// Also sets EFLAGS.
SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
- SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS);
+ SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
SDValue SetCC =
- DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1),
- DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1));
+ DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
+ DAG.getConstant(Cond, MVT::i32),
+ SDValue(Sum.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
return Sum;
@@ -8258,10 +8868,10 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
DebugLoc dl = Op.getDebugLoc();
-
+
if (!Subtarget->hasSSE2()) {
SDValue Chain = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0,
+ SDValue Zero = DAG.getConstant(0,
Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
SDValue Ops[] = {
DAG.getRegister(X86::ESP, MVT::i32), // Base
@@ -8272,37 +8882,37 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
Zero,
Chain
};
- SDNode *Res =
+ SDNode *Res =
DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
array_lengthof(Ops));
return SDValue(Res, 0);
}
-
+
unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
if (!isDev)
return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
-
+
unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
-
+
// def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
if (!Op1 && !Op2 && !Op3 && Op4)
return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
-
+
// def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
if (Op1 && !Op2 && !Op3 && !Op4)
return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
-
- // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+
+ // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
// (MFENCE)>;
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
}
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
EVT T = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
unsigned Reg = 0;
unsigned size = 0;
switch(T.getSimpleVT().SimpleTy) {
@@ -8316,24 +8926,26 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
Reg = X86::RAX; size = 8;
break;
}
- SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), dl, Reg,
+ SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
Op.getOperand(2), SDValue());
SDValue Ops[] = { cpIn.getValue(0),
Op.getOperand(1),
Op.getOperand(3),
DAG.getTargetConstant(size, MVT::i8),
cpIn.getValue(1) };
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, dl, Tys, Ops, 5);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
+ Ops, 5, T, MMO);
SDValue cpOut =
- DAG.getCopyFromReg(Result.getValue(0), dl, Reg, T, Result.getValue(1));
+ DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
return cpOut;
}
SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->is64Bit() && "Result not type legalized?");
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
@@ -8349,16 +8961,15 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
return DAG.getMergeValues(Ops, 2, dl);
}
-SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op,
+SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
EVT DstVT = Op.getValueType();
- assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
- Subtarget->hasMMX() && !DisableMMX) &&
- "Unexpected custom BIT_CONVERT");
- assert((DstVT == MVT::i64 ||
+ assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+ Subtarget->hasMMX() && "Unexpected custom BITCAST");
+ assert((DstVT == MVT::i64 ||
(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
- "Unexpected custom BIT_CONVERT");
+ "Unexpected custom BITCAST");
// i64 <=> MMX conversions are Legal.
if (SrcVT==MVT::i64 && DstVT.isVector())
return Op;
@@ -8370,6 +8981,7 @@ SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op,
// All other conversions need to be expanded.
return SDValue();
}
+
SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
@@ -8384,6 +8996,32 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
cast<AtomicSDNode>(Node)->getAlignment());
}
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getNode()->getValueType(0);
+
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ unsigned Opc;
+ bool ExtraOp = false;
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Invalid code");
+ case ISD::ADDC: Opc = X86ISD::ADD; break;
+ case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
+ case ISD::SUBC: Opc = X86ISD::SUB; break;
+ case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
+ }
+
+ if (!ExtraOp)
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+ Op.getOperand(1), Op.getOperand(2));
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -8397,6 +9035,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
@@ -8441,7 +9081,11 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMULO:
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
- case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG);
+ case ISD::BITCAST: return LowerBITCAST(Op, DAG);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
}
}
@@ -8478,6 +9122,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
default:
assert(false && "Do not know how to custom type legalize this operation!");
return;
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ // We don't want to expand or promote these.
+ return;
case ISD::FP_TO_SINT: {
std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, true);
@@ -8485,13 +9135,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (FIST.getNode() != 0) {
EVT VT = N->getValueType(0);
// Return a load from the stack slot.
- Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0,
- false, false, 0));
+ Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+ MachinePointerInfo(), false, false, 0));
}
return;
}
case ISD::READCYCLECOUNTER: {
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = N->getOperand(0);
SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
@@ -8527,8 +9177,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Ops[] = { swapInH.getValue(0),
N->getOperand(1),
swapInH.getValue(1) };
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, 3);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys,
+ Ops, 3, T, MMO);
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
MVT::i32, Result.getValue(1));
SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
@@ -8601,15 +9253,18 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
- case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
+ case X86ISD::PANDN: return "X86ISD::PANDN";
+ case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
+ case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
+ case X86ISD::PSIGND: return "X86ISD::PSIGND";
+ case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
- case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
@@ -8637,6 +9292,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ";
case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";
+ case X86ISD::ADC: return "X86ISD::ADC";
+ case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
case X86ISD::INC: return "X86ISD::INC";
@@ -8681,7 +9338,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
- case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";
+ case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
+ case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
}
}
@@ -9203,15 +9861,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
-
assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
"Target must have SSE4.2 or AVX features enabled");
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
unsigned Opc;
-
if (!Subtarget->hasAVX()) {
if (memArg)
Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
@@ -9224,24 +9879,318 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
}
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
-
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
for (unsigned i = 0; i < numArgs; ++i) {
MachineOperand &Op = MI->getOperand(i+1);
-
if (!(Op.isReg() && Op.isImplicit()))
MIB.addOperand(Op);
}
-
- BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
+ BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
MI->eraseFromParent();
+ return BB;
+}
+MachineBasicBlock *
+X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // Address into RAX/EAX, other two args into ECX, EDX.
+ unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
+ for (int i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(i));
+
+ unsigned ValOps = X86::AddrNumOperands;
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI->getOperand(ValOps).getReg());
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
+ .addReg(MI->getOperand(ValOps+1).getReg());
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // First arg in ECX, the second in EAX.
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+ .addReg(MI->getOperand(0).getReg());
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
+ .addReg(MI->getOperand(1).getReg());
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
return BB;
}
MachineBasicBlock *
+X86TargetLowering::EmitVAARG64WithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // Emit va_arg instruction on X86-64.
+
+ // Operands to this pseudo-instruction:
+ // 0 ) Output : destination address (reg)
+ // 1-5) Input : va_list address (addr, i64mem)
+ // 6 ) ArgSize : Size (in bytes) of vararg type
+ // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset
+ // 8 ) Align : Alignment of type
+ // 9 ) EFLAGS (implicit-def)
+
+ assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
+ assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+
+ unsigned DestReg = MI->getOperand(0).getReg();
+ MachineOperand &Base = MI->getOperand(1);
+ MachineOperand &Scale = MI->getOperand(2);
+ MachineOperand &Index = MI->getOperand(3);
+ MachineOperand &Disp = MI->getOperand(4);
+ MachineOperand &Segment = MI->getOperand(5);
+ unsigned ArgSize = MI->getOperand(6).getImm();
+ unsigned ArgMode = MI->getOperand(7).getImm();
+ unsigned Align = MI->getOperand(8).getImm();
+
+ // Memory Reference
+ assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ // Machine Information
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+ const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
+ DebugLoc DL = MI->getDebugLoc();
+
+ // struct va_list {
+ // i32 gp_offset
+ // i32 fp_offset
+ // i64 overflow_area (address)
+ // i64 reg_save_area (address)
+ // }
+ // sizeof(va_list) = 24
+ // alignment(va_list) = 8
+
+ unsigned TotalNumIntRegs = 6;
+ unsigned TotalNumXMMRegs = 8;
+ bool UseGPOffset = (ArgMode == 1);
+ bool UseFPOffset = (ArgMode == 2);
+ unsigned MaxOffset = TotalNumIntRegs * 8 +
+ (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+
+ /* Align ArgSize to a multiple of 8 */
+ unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
+ bool NeedsAlign = (Align > 8);
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *overflowMBB;
+ MachineBasicBlock *offsetMBB;
+ MachineBasicBlock *endMBB;
+
+ unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB
+ unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
+ unsigned OffsetReg = 0;
+
+ if (!UseGPOffset && !UseFPOffset) {
+ // If we only pull from the overflow region, we don't create a branch.
+ // We don't need to alter control flow.
+ OffsetDestReg = 0; // unused
+ OverflowDestReg = DestReg;
+
+ offsetMBB = NULL;
+ overflowMBB = thisMBB;
+ endMBB = thisMBB;
+ } else {
+ // First emit code to check if gp_offset (or fp_offset) is below the bound.
+ // If so, pull the argument from reg_save_area. (branch to offsetMBB)
+ // If not, pull from overflow_area. (branch to overflowMBB)
+ //
+ // thisMBB
+ // | .
+ // | .
+ // offsetMBB overflowMBB
+ // | .
+ // | .
+ // endMBB
+
+ // Registers for the PHI in endMBB
+ OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
+ OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
+
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction *MF = MBB->getParent();
+ overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ // Insert the new basic blocks
+ MF->insert(MBBIter, offsetMBB);
+ MF->insert(MBBIter, overflowMBB);
+ MF->insert(MBBIter, endMBB);
+
+ // Transfer the remainder of MBB and its successor edges to endMBB.
+ endMBB->splice(endMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ thisMBB->end());
+ endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Make offsetMBB and overflowMBB successors of thisMBB
+ thisMBB->addSuccessor(offsetMBB);
+ thisMBB->addSuccessor(overflowMBB);
+
+ // endMBB is a successor of both offsetMBB and overflowMBB
+ offsetMBB->addSuccessor(endMBB);
+ overflowMBB->addSuccessor(endMBB);
+
+ // Load the offset value into a register
+ OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Check if there is enough room left to pull this argument.
+ BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+ .addReg(OffsetReg)
+ .addImm(MaxOffset + 8 - ArgSizeA8);
+
+ // Branch to "overflowMBB" if offset >= max
+ // Fall through to "offsetMBB" otherwise
+ BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
+ .addMBB(overflowMBB);
+ }
+
+ // In offsetMBB, emit code to use the reg_save_area.
+ if (offsetMBB) {
+ assert(OffsetReg != 0);
+
+ // Read the reg_save_area address.
+ unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 16)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Zero-extend the offset
+ unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+ .addImm(0)
+ .addReg(OffsetReg)
+ .addImm(X86::sub_32bit);
+
+ // Add the offset to the reg_save_area to get the final address.
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+ .addReg(OffsetReg64)
+ .addReg(RegSaveReg);
+
+ // Compute the offset for the next argument
+ unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+ .addReg(OffsetReg)
+ .addImm(UseFPOffset ? 16 : 8);
+
+ // Store it back into the va_list.
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .addReg(NextOffsetReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Jump to endMBB
+ BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
+ .addMBB(endMBB);
+ }
+
+ //
+ // Emit code to use overflow area
+ //
+
+ // Load the overflow_area address into a register.
+ unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we need to align it, do so. Otherwise, just copy the address
+ // to OverflowDestReg.
+ if (NeedsAlign) {
+ // Align the overflow address
+ assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
+ unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
+
+ // aligned_addr = (addr + (align-1)) & ~(align-1)
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+ .addReg(OverflowAddrReg)
+ .addImm(Align-1);
+
+ BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+ .addReg(TmpReg)
+ .addImm(~(uint64_t)(Align-1));
+ } else {
+ BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+ .addReg(OverflowAddrReg);
+ }
+
+ // Compute the next overflow address after this argument.
+ // (the overflow address should be kept 8-byte aligned)
+ unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+ .addReg(OverflowDestReg)
+ .addImm(ArgSizeA8);
+
+ // Store the new overflow address.
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .addReg(NextAddrReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we branched, emit the PHI to the front of endMBB.
+ if (offsetMBB) {
+ BuildMI(*endMBB, endMBB->begin(), DL,
+ TII->get(X86::PHI), DestReg)
+ .addReg(OffsetDestReg).addMBB(offsetMBB)
+ .addReg(OverflowDestReg).addMBB(overflowMBB);
+ }
+
+ // Erase the pseudo instruction
+ MI->eraseFromParent();
+
+ return endMBB;
+}
+
+MachineBasicBlock *
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MachineInstr *MI,
MachineBasicBlock *MBB) const {
@@ -9296,8 +10245,8 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
MachineMemOperand *MMO =
F->getMachineMemOperand(
- PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
- MachineMemOperand::MOStore, Offset,
+ MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+ MachineMemOperand::MOStore,
/*Size=*/16, /*Align=*/16);
BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
.addFrameIndex(RegSaveFrameIndex)
@@ -9389,7 +10338,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
}
MachineBasicBlock *
-X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
+X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -9399,8 +10348,11 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
// FIXME: The code should be tweaked as soon as we'll try to do codegen for
// mingw-w64.
+ const char *StackProbeSymbol =
+ Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca")
+ .addExternalSymbol(StackProbeSymbol)
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
@@ -9418,30 +10370,30 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// our load from the relocation, sticking it in either RDI (x86-64)
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
- const X86InstrInfo *TII
+ const X86InstrInfo *TII
= static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineFunction *F = BB->getParent();
- bool IsWin64 = Subtarget->isTargetWin64();
-
+
+ assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
assert(MI->getOperand(3).isGlobal() && "This should be a global");
-
+
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0).addReg(0)
- .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(*BB, MI, DL, TII->get(IsWin64 ? X86::WINCALL64m : X86::CALL64m));
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
} else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0).addReg(0)
- .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
@@ -9451,13 +10403,13 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0).addReg(0)
- .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
}
-
+
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -9467,13 +10419,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
- case X86::MINGW_ALLOCA:
- return EmitLoweredMingwAlloca(MI, BB);
+ case X86::TAILJMPd64:
+ case X86::TAILJMPr64:
+ case X86::TAILJMPm64:
+ assert(!"TAILJMP64 would not be touched here.");
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
+ // On AMD64, additional defs should be added before register allocation.
+ if (!Subtarget->isTargetWin64()) {
+ MI->addRegisterDefined(X86::RSI);
+ MI->addRegisterDefined(X86::RDI);
+ MI->addRegisterDefined(X86::XMM6);
+ MI->addRegisterDefined(X86::XMM7);
+ MI->addRegisterDefined(X86::XMM8);
+ MI->addRegisterDefined(X86::XMM9);
+ MI->addRegisterDefined(X86::XMM10);
+ MI->addRegisterDefined(X86::XMM11);
+ MI->addRegisterDefined(X86::XMM12);
+ MI->addRegisterDefined(X86::XMM13);
+ MI->addRegisterDefined(X86::XMM14);
+ MI->addRegisterDefined(X86::XMM15);
+ }
+ return BB;
+ case X86::WIN_ALLOCA:
+ return EmitLoweredWinAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_GR8:
- case X86::CMOV_V1I64:
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_V4F32:
@@ -9583,6 +10558,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VPCMPESTRM128MEM:
return EmitPCMP(MI, BB, 5, true /* in mem */);
+ // Thread synchronization.
+ case X86::MONITOR:
+ return EmitMonitor(MI, BB);
+ case X86::MWAIT:
+ return EmitMwait(MI, BB);
+
// Atomic Lowering.
case X86::ATOMAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
@@ -9747,6 +10728,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
false);
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
+
+ case X86::VAARG_64:
+ return EmitVAARG64WithCustomInserter(MI, BB);
}
}
@@ -9773,6 +10757,8 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
default: break;
case X86ISD::ADD:
case X86ISD::SUB:
+ case X86ISD::ADC:
+ case X86ISD::SBB:
case X86ISD::SMUL:
case X86ISD::UMUL:
case X86ISD::INC:
@@ -9791,6 +10777,16 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
}
}
+unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
+ if (Op.getOpcode() == X86ISD::SETCC_CARRY)
+ return Op.getValueType().getScalarType().getSizeInBits();
+
+ // Fallback case.
+ return 1;
+}
+
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
/// node is a GlobalAddress + offset.
bool X86TargetLowering::isGAPlusOffset(SDNode *N,
@@ -9811,13 +10807,18 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
/// if the load addresses are consecutive, non-overlapping, and in the right
/// order.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ TargetLowering::DAGCombinerInfo &DCI) {
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
if (VT.getSizeInBits() != 128)
return SDValue();
+ // Don't create instructions with illegal types after legalize types has run.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
+ return SDValue();
+
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
@@ -9877,8 +10878,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
// Store the value to a temporary stack slot.
SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL,
- 0, false, false, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Replace each use (extract) with a load of the appropriate element.
for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
@@ -9893,11 +10894,12 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(),
- OffsetVal, StackPtr);
+ StackPtr, OffsetVal);
// Load the scalar.
SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
- ScalarAddr, NULL, 0, false, false, 0);
+ ScalarAddr, MachinePointerInfo(),
+ false, false, 0);
// Replace the exact with the load.
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
@@ -10473,6 +11475,36 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
return SDValue();
}
+
+static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ // Want to form PANDN nodes, in the hopes of then easily combining them with
+ // OR and AND nodes to form PBLEND/PSIGN.
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v2i64)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Check LHS for vnot
+ if (N0.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1);
+
+ // Check RHS for vnot
+ if (N1.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0);
+
+ return SDValue();
+}
+
static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -10480,12 +11512,99 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT VT = N->getValueType(0);
- if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
return SDValue();
- // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+
+ // look for psign/blend
+ if (Subtarget->hasSSSE3()) {
+ if (VT == MVT::v2i64) {
+ // Canonicalize pandn to RHS
+ if (N0.getOpcode() == X86ISD::PANDN)
+ std::swap(N0, N1);
+ // or (and (m, x), (pandn m, y))
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) {
+ SDValue Mask = N1.getOperand(0);
+ SDValue X = N1.getOperand(1);
+ SDValue Y;
+ if (N0.getOperand(0) == Mask)
+ Y = N0.getOperand(1);
+ if (N0.getOperand(1) == Mask)
+ Y = N0.getOperand(0);
+
+ // Check to see if the mask appeared in both the AND and PANDN and
+ if (!Y.getNode())
+ return SDValue();
+
+ // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+ if (Mask.getOpcode() != ISD::BITCAST ||
+ X.getOpcode() != ISD::BITCAST ||
+ Y.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ // Look through mask bitcast.
+ Mask = Mask.getOperand(0);
+ EVT MaskVT = Mask.getValueType();
+
+ // Validate that the Mask operand is a vector sra node. The sra node
+ // will be an intrinsic.
+ if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+ return SDValue();
+
+ // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+ // there is no psrai.b
+ switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ break;
+ default: return SDValue();
+ }
+
+ // Check that the SRA is all signbits.
+ SDValue SraC = Mask.getOperand(2);
+ unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ if ((SraAmt + 1) != EltBits)
+ return SDValue();
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Now we know we at least have a plendvb with the mask val. See if
+ // we can form a psignb/w/d.
+ // psign = x.type == y.type == mask.type && y = sub(0, x);
+ X = X.getOperand(0);
+ Y = Y.getOperand(0);
+ if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+ ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+ X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
+ unsigned Opc = 0;
+ switch (EltBits) {
+ case 8: Opc = X86ISD::PSIGNB; break;
+ case 16: Opc = X86ISD::PSIGNW; break;
+ case 32: Opc = X86ISD::PSIGND; break;
+ default: break;
+ }
+ if (Opc) {
+ SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
+ }
+ }
+ // PBLENDVB only available on SSE 4.1
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
+ Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
+ Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
+ Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
+ }
+ }
+ }
+
+ // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
@@ -10600,9 +11719,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// pair instead.
if (Subtarget->is64Bit() || F64IsLegal) {
EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
- SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
- Ld->getBasePtr(), Ld->getSrcValue(),
- Ld->getSrcValueOffset(), Ld->isVolatile(),
+ SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->getAlignment());
SDValue NewChain = NewLd.getValue(1);
if (TokenFactorIndex != -1) {
@@ -10611,7 +11729,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
Ops.size());
}
return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
- St->getSrcValue(), St->getSrcValueOffset(),
+ St->getPointerInfo(),
St->isVolatile(), St->isNonTemporal(),
St->getAlignment());
}
@@ -10622,11 +11740,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(4, MVT::i32));
SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
- Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->getAlignment());
SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
- Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
+ Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
MinAlign(Ld->getAlignment(), 4));
@@ -10643,12 +11761,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(4, MVT::i32));
SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
- St->getSrcValue(), St->getSrcValueOffset(),
+ St->getPointerInfo(),
St->isVolatile(), St->isNonTemporal(),
St->getAlignment());
SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
- St->getSrcValue(),
- St->getSrcValueOffset() + 4,
+ St->getPointerInfo().getWithOffset(4),
St->isVolatile(),
St->isNonTemporal(),
MinAlign(St->getAlignment(), 4));
@@ -10706,13 +11823,13 @@ static SDValue PerformBTCombine(SDNode *N,
static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Op = N->getOperand(0);
- if (Op.getOpcode() == ISD::BIT_CONVERT)
+ if (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
EVT VT = N->getValueType(0), OpVT = Op.getValueType();
if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
VT.getVectorElementType().getSizeInBits() ==
OpVT.getVectorElementType().getSizeInBits()) {
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
}
return SDValue();
}
@@ -10743,19 +11860,106 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned X86CC = N->getConstantOperandVal(0);
+ SDValue EFLAG = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
+ // a zext and produces an all-ones bit which is more useful than 0/1 in some
+ // cases.
+ if (X86CC == X86::COND_B)
+ return DAG.getNode(ISD::AND, DL, MVT::i8,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), EFLAG),
+ DAG.getConstant(1, MVT::i8));
+
+ return SDValue();
+}
+
+// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
+static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
+ X86TargetLowering::DAGCombinerInfo &DCI) {
+ // If the LHS and RHS of the ADC node are zero, then it can't overflow and
+ // the result is either zero or one (depending on the input carry bit).
+ // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
+ if (X86::isZeroNode(N->getOperand(0)) &&
+ X86::isZeroNode(N->getOperand(1)) &&
+ // We don't have a good way to replace an EFLAGS use, so only do this when
+ // dead right now.
+ SDValue(N, 1).use_empty()) {
+ DebugLoc DL = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
+ SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getConstant(X86::COND_B,MVT::i8),
+ N->getOperand(2)),
+ DAG.getConstant(1, VT));
+ return DCI.CombineTo(N, Res1, CarryOut);
+ }
+
+ return SDValue();
+}
+
+// fold (add Y, (sete X, 0)) -> adc 0, Y
+// (add Y, (setne X, 0)) -> sbb -1, Y
+// (sub (sete X, 0), Y) -> sbb 0, Y
+// (sub (setne X, 0), Y) -> adc -1, Y
+static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // Look through ZExts.
+ SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
+ if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
+ return SDValue();
+
+ SDValue SetCC = Ext.getOperand(0);
+ if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
+ return SDValue();
+
+ X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ SDValue Cmp = SetCC.getOperand(1);
+ if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
+ !X86::isZeroNode(Cmp.getOperand(1)) ||
+ !Cmp.getOperand(0).getValueType().isInteger())
+ return SDValue();
+
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
+ DAG.getConstant(1, CmpOp0.getValueType()));
+
+ SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
+ if (CC == X86::COND_NE)
+ return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
+ DL, OtherVal.getValueType(), OtherVal,
+ DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
+ return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
+ DL, OtherVal.getValueType(), OtherVal,
+ DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
- return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+ return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
+ case ISD::ADD:
+ case ISD::SUB: return OptimizeConditonalInDecrement(N, DAG);
+ case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
+ case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case X86ISD::FXOR:
@@ -10764,8 +11968,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
+ case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
case X86ISD::SHUFPS: // Handle all target specific shuffles
case X86ISD::SHUFPD:
+ case X86ISD::PALIGN:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHDQ:
@@ -10785,7 +11991,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
}
return SDValue();
@@ -10892,44 +12098,14 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
-static bool LowerToBSwap(CallInst *CI) {
- // FIXME: this should verify that we are targetting a 486 or better. If not,
- // we will turn this bswap into something that will be lowered to logical ops
- // instead of emitting the bswap asm. For now, we don't support 486 or lower
- // so don't worry about this.
-
- // Verify this is a simple bswap.
- if (CI->getNumArgOperands() != 1 ||
- CI->getType() != CI->getArgOperand(0)->getType() ||
- !CI->getType()->isIntegerTy())
- return false;
-
- const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
- if (!Ty || Ty->getBitWidth() % 16 != 0)
- return false;
-
- // Okay, we can do this xform, do so now.
- const Type *Tys[] = { Ty };
- Module *M = CI->getParent()->getParent()->getParent();
- Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
-
- Value *Op = CI->getArgOperand(0);
- Op = CallInst::Create(Int, Op, CI->getName(), CI);
-
- CI->replaceAllUsesWith(Op);
- CI->eraseFromParent();
- return true;
-}
-
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
- std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
std::string AsmStr = IA->getAsmString();
// TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
SmallVector<StringRef, 4> AsmPieces;
- SplitString(AsmStr, AsmPieces, "\n"); // ; as separator?
+ SplitString(AsmStr, AsmPieces, ";\n");
switch (AsmPieces.size()) {
default: return false;
@@ -10938,6 +12114,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces.clear();
SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
+ // FIXME: this should verify that we are targetting a 486 or better. If not,
+ // we will turn this bswap into something that will be lowered to logical ops
+ // instead of emitting the bswap asm. For now, we don't support 486 or lower
+ // so don't worry about this.
// bswap $0
if (AsmPieces.size() == 2 &&
(AsmPieces[0] == "bswap" ||
@@ -10947,7 +12127,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[1] == "${0:q}")) {
// No need to check constraints, nothing other than the equivalent of
// "=r,0" would be valid here.
- return LowerToBSwap(CI);
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
// rorw $$8, ${0:w} --> llvm.bswap.i16
if (CI->getType()->isIntegerTy(16) &&
@@ -10957,35 +12140,76 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces[2] == "${0:w}" &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
AsmPieces.clear();
- const std::string &Constraints = IA->getConstraintString();
- SplitString(StringRef(Constraints).substr(5), AsmPieces, ",");
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
std::sort(AsmPieces.begin(), AsmPieces.end());
if (AsmPieces.size() == 4 &&
AsmPieces[0] == "~{cc}" &&
AsmPieces[1] == "~{dirflag}" &&
AsmPieces[2] == "~{flags}" &&
AsmPieces[3] == "~{fpsr}") {
- return LowerToBSwap(CI);
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
}
break;
case 3:
- if (CI->getType()->isIntegerTy(64) &&
- Constraints.size() >= 2 &&
- Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
- Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
- // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
+ if (CI->getType()->isIntegerTy(32) &&
+ IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
SmallVector<StringRef, 4> Words;
- SplitString(AsmPieces[0], Words, " \t");
- if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+ SplitString(AsmPieces[0], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+ Words[2] == "${0:w}") {
Words.clear();
- SplitString(AsmPieces[1], Words, " \t");
- if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+ SplitString(AsmPieces[1], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" &&
+ Words[2] == "$0") {
Words.clear();
SplitString(AsmPieces[2], Words, " \t,");
- if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
- Words[2] == "%edx") {
- return LowerToBSwap(CI);
+ if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+ Words[2] == "${0:w}") {
+ AsmPieces.clear();
+ const std::string &ConstraintsStr = IA->getConstraintString();
+ SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+ std::sort(AsmPieces.begin(), AsmPieces.end());
+ if (AsmPieces.size() == 4 &&
+ AsmPieces[0] == "~{cc}" &&
+ AsmPieces[1] == "~{dirflag}" &&
+ AsmPieces[2] == "~{flags}" &&
+ AsmPieces[3] == "~{fpsr}") {
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
+ }
+ }
+ }
+ }
+
+ if (CI->getType()->isIntegerTy(64)) {
+ InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+ if (Constraints.size() >= 2 &&
+ Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+ Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+ // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
+ SmallVector<StringRef, 4> Words;
+ SplitString(AsmPieces[0], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+ Words.clear();
+ SplitString(AsmPieces[1], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+ Words.clear();
+ SplitString(AsmPieces[2], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+ Words[2] == "%edx") {
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+ return IntrinsicLowering::LowerToByteSwap(CI);
+ }
}
}
}
@@ -11003,18 +12227,32 @@ X86TargetLowering::ConstraintType
X86TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
- case 'A':
- return C_Register;
- case 'f':
- case 'r':
case 'R':
- case 'l':
case 'q':
case 'Q':
- case 'x':
+ case 'f':
+ case 't':
+ case 'u':
case 'y':
+ case 'x':
case 'Y':
return C_RegisterClass;
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ return C_Register;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'G':
+ case 'C':
case 'e':
case 'Z':
return C_Other;
@@ -11025,6 +12263,110 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const {
return TargetLowering::getConstraintType(Constraint);
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ X86TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ case 'R':
+ case 'q':
+ case 'Q':
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'f':
+ case 't':
+ case 'u':
+ if (type->isFloatingPointTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'y':
+ if (type->isX86_MMXTy() && Subtarget->hasMMX())
+ weight = CW_SpecificReg;
+ break;
+ case 'x':
+ case 'Y':
+ if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
+ weight = CW_Register;
+ break;
+ case 'I':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
+ if (C->getZExtValue() <= 31)
+ weight = CW_Constant;
+ }
+ break;
+ case 'J':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 63)
+ weight = CW_Constant;
+ }
+ break;
+ case 'K':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
+ weight = CW_Constant;
+ }
+ break;
+ case 'L':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
+ weight = CW_Constant;
+ }
+ break;
+ case 'M':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 3)
+ weight = CW_Constant;
+ }
+ break;
+ case 'N':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xff)
+ weight = CW_Constant;
+ }
+ break;
+ case 'G':
+ case 'C':
+ if (dyn_cast<ConstantFP>(CallOperandVal)) {
+ weight = CW_Constant;
+ }
+ break;
+ case 'e':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80000000LL) &&
+ (C->getSExtValue() <= 0x7fffffffLL))
+ weight = CW_Constant;
+ }
+ break;
+ case 'Z':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xffffffff)
+ weight = CW_Constant;
+ }
+ break;
+ }
+ return weight;
+}
+
/// LowerXConstraint - try to replace an X constraint, which matches anything,
/// with another that has more specific requirements based on the type of the
/// corresponding operand.
@@ -11033,9 +12375,9 @@ LowerXConstraint(EVT ConstraintVT) const {
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets.
if (ConstraintVT.isFloatingPoint()) {
- if (Subtarget->hasSSE2())
+ if (Subtarget->hasXMMInt())
return "Y";
- if (Subtarget->hasSSE1())
+ if (Subtarget->hasXMM())
return "x";
}
@@ -11265,10 +12607,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
if (!Subtarget->hasMMX()) break;
return std::make_pair(0U, X86::VR64RegisterClass);
case 'Y': // SSE_REGS if SSE2 allowed
- if (!Subtarget->hasSSE2()) break;
+ if (!Subtarget->hasXMMInt()) break;
// FALL THROUGH.
case 'x': // SSE_REGS if SSE1 allowed
- if (!Subtarget->hasSSE1()) break;
+ if (!Subtarget->hasXMM()) break;
switch (VT.getSimpleVT().SimpleTy) {
default: break;
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index d2d9b28..419da37 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -57,35 +57,6 @@ namespace llvm {
/// corresponds to X86::PSRLDQ.
FSRL,
- /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
- /// integer source in memory and FP reg result. This corresponds to the
- /// X86::FILD*m instructions. It has three inputs (token chain, address,
- /// and source type) and two outputs (FP value and token chain). FILD_FLAG
- /// also produces a flag).
- FILD,
- FILD_FLAG,
-
- /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
- /// integer destination in memory and a FP reg source. This corresponds
- /// to the X86::FIST*m instructions and the rounding mode change stuff. It
- /// has two inputs (token chain and address) and two outputs (int value
- /// and token chain).
- FP_TO_INT16_IN_MEM,
- FP_TO_INT32_IN_MEM,
- FP_TO_INT64_IN_MEM,
-
- /// FLD - This instruction implements an extending load to FP stack slots.
- /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
- /// operand, ptr to load from, and a ValueType node indicating the type
- /// to load to.
- FLD,
-
- /// FST - This instruction implements a truncating store to FP stack
- /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
- /// chain operand, value to store, address, and a ValueType to store it
- /// as.
- FST,
-
/// CALL - These operations represent an abstract X86 call
/// instruction, which includes a bunch of information. In particular the
/// operands of these node are:
@@ -105,7 +76,7 @@ namespace llvm {
///
CALL,
- /// RDTSC_DAG - This operation implements the lowering for
+ /// RDTSC_DAG - This operation implements the lowering for
/// readcyclecounter
RDTSC_DAG,
@@ -115,13 +86,13 @@ namespace llvm {
/// X86 bit-test instructions.
BT,
- /// X86 SetCC. Operand 0 is condition code, and operand 1 is the flag
- /// operand produced by a CMP instruction.
+ /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
+ /// operand, usually produced by a CMP instruction.
SETCC,
// Same as SETCC except it's materialized with a sbb and the value is all
// one's or all zero's.
- SETCC_CARRY,
+ SETCC_CARRY, // R = carry_bit ? ~0 : 0
/// X86 conditional moves. Operand 0 and operand 1 are the two values
/// to select from. Operand 2 is the condition code, and operand 3 is the
@@ -157,11 +128,15 @@ namespace llvm {
/// relative displacements.
WrapperRIP,
- /// MOVQ2DQ - Copies a 64-bit value from a vector to another vector.
- /// Can be used to move a vector value from a MMX register to a XMM
- /// register.
+ /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
+ /// of an XMM vector, with the high word zero filled.
MOVQ2DQ,
+ /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+ /// to an MMX vector. If you think this is too close to the previous
+ /// mnemonic, so do I; blame Intel.
+ MOVDQ2Q,
+
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRB.
PEXTRB,
@@ -184,7 +159,16 @@ namespace llvm {
/// PSHUFB - Shuffle 16 8-bit values within a vector.
PSHUFB,
-
+
+ /// PANDN - and with not'd value.
+ PANDN,
+
+ /// PSIGNB/W/D - Copy integer sign.
+ PSIGNB, PSIGNW, PSIGND,
+
+ /// PBLENDVB - Variable blend
+ PBLENDVB,
+
/// FMAX, FMIN - Floating point max and min.
///
FMAX, FMIN,
@@ -196,17 +180,14 @@ namespace llvm {
// TLSADDR - Thread Local Storage.
TLSADDR,
-
+
// TLSCALL - Thread Local Storage. When calling to an OS provided
// thunk at the address from an earlier relocation.
TLSCALL,
- // SegmentBaseAddress - The address segment:0
- SegmentBaseAddress,
-
// EH_RETURN - Exception Handling helpers.
EH_RETURN,
-
+
/// TC_RETURN - Tail call return.
/// operand #0 chain
/// operand #1 callee (register or absolute)
@@ -214,37 +195,29 @@ namespace llvm {
/// operand #3 optional in flag
TC_RETURN,
- // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
- LCMPXCHG_DAG,
- LCMPXCHG8_DAG,
-
- // FNSTCW16m - Store FP control world into i16 memory.
- FNSTCW16m,
-
// VZEXT_MOVL - Vector move low and zero extend.
VZEXT_MOVL,
- // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
- VZEXT_LOAD,
-
// VSHL, VSRL - Vector logical left / right shift.
VSHL, VSRL,
// CMPPD, CMPPS - Vector double/float comparison.
// CMPPD, CMPPS - Vector double/float comparison.
CMPPD, CMPPS,
-
+
// PCMP* - Vector integer comparisons.
PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
- // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results.
- ADD, SUB, SMUL, UMUL,
+ // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+ ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
+
+ UMUL, // LOW, HI, FLAGS = umul LHS, RHS
// MUL_IMM - X86 specific multiply by immediate.
MUL_IMM,
-
+
// PTEST - Vector bitwise comparisons
PTEST,
@@ -291,11 +264,17 @@ namespace llvm {
// with control flow.
VASTART_SAVE_XMM_REGS,
- // MINGW_ALLOCA - MingW's __alloca call to do stack probing.
- MINGW_ALLOCA,
+ // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
+ WIN_ALLOCA,
+
+ // Memory barrier
+ MEMBARRIER,
+ MFENCE,
+ SFENCE,
+ LFENCE,
- // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
- // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
+ // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
+ // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
// Atomic 64-bit binary operations.
ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
ATOMSUB64_DAG,
@@ -304,12 +283,49 @@ namespace llvm {
ATOMAND64_DAG,
ATOMNAND64_DAG,
ATOMSWAP64_DAG,
-
- // Memory barrier
- MEMBARRIER,
- MFENCE,
- SFENCE,
- LFENCE
+
+ // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
+ LCMPXCHG_DAG,
+ LCMPXCHG8_DAG,
+
+ // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+ VZEXT_LOAD,
+
+ // FNSTCW16m - Store FP control world into i16 memory.
+ FNSTCW16m,
+
+ /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+ /// integer destination in memory and a FP reg source. This corresponds
+ /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+ /// has two inputs (token chain and address) and two outputs (int value
+ /// and token chain).
+ FP_TO_INT16_IN_MEM,
+ FP_TO_INT32_IN_MEM,
+ FP_TO_INT64_IN_MEM,
+
+ /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+ /// integer source in memory and FP reg result. This corresponds to the
+ /// X86::FILD*m instructions. It has three inputs (token chain, address,
+ /// and source type) and two outputs (FP value and token chain). FILD_FLAG
+ /// also produces a flag).
+ FILD,
+ FILD_FLAG,
+
+ /// FLD - This instruction implements an extending load to FP stack slots.
+ /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+ /// operand, ptr to load from, and a ValueType node indicating the type
+ /// to load to.
+ FLD,
+
+ /// FST - This instruction implements a truncating store to FP stack
+ /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+ /// chain operand, value to store, address, and a ValueType to store it
+ /// as.
+ FST,
+
+ /// VAARG_64 - This instruction grabs the address of the next argument
+ /// from a va_list. (reads and modifies the va_list in memory)
+ VAARG_64
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
@@ -392,6 +408,16 @@ namespace llvm {
/// specifies a shuffle of elements that is suitable for input to PALIGNR.
bool isPALIGNRMask(ShuffleVectorSDNode *N);
+ /// isVEXTRACTF128Index - Return true if the specified
+ /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+ /// suitable for input to VEXTRACTF128.
+ bool isVEXTRACTF128Index(SDNode *N);
+
+ /// isVINSERTF128Index - Return true if the specified
+ /// INSERT_SUBVECTOR operand specifies a subvector insert that is
+ /// suitable for input to VINSERTF128.
+ bool isVINSERTF128Index(SDNode *N);
+
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
/// instructions.
@@ -409,6 +435,16 @@ namespace llvm {
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
unsigned getShufflePALIGNRImmediate(SDNode *N);
+ /// getExtractVEXTRACTF128Immediate - Return the appropriate
+ /// immediate to extract the specified EXTRACT_SUBVECTOR index
+ /// with VEXTRACTF128 instructions.
+ unsigned getExtractVEXTRACTF128Immediate(SDNode *N);
+
+ /// getInsertVINSERTF128Immediate - Return the appropriate
+ /// immediate to insert at the specified INSERT_SUBVECTOR index
+ /// with VINSERTF128 instructions.
+ unsigned getInsertVINSERTF128Immediate(SDNode *N);
+
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool isZeroNode(SDValue Elt);
@@ -425,16 +461,13 @@ namespace llvm {
public:
explicit X86TargetLowering(X86TargetMachine &TM);
- /// getPICBaseSymbol - Return the X86-32 PIC base.
- MCSymbol *getPICBaseSymbol(const MachineFunction *MF, MCContext &Ctx) const;
-
virtual unsigned getJumpTableEncoding() const;
virtual const MCExpr *
LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB, unsigned uid,
MCContext &Ctx) const;
-
+
/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
virtual SDValue getPICJumpTableRelocBase(SDValue Table,
@@ -442,7 +475,7 @@ namespace llvm {
virtual const MCExpr *
getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI, MCContext &Ctx) const;
-
+
/// getStackPtrReg - Return the stack pointer register we are using: either
/// ESP or RSP.
unsigned getStackPtrReg() const { return X86StackPtr; }
@@ -486,7 +519,7 @@ namespace llvm {
virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const;
-
+
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
/// isTypeDesirableForOp - Return true if the target has native support for
@@ -505,7 +538,7 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const;
-
+
/// getTargetNodeName - This method returns the name of a target specific
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
@@ -513,26 +546,36 @@ namespace llvm {
/// getSetCCResultType - Return the ISD::SETCC ValueType
virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
- /// computeMaskedBitsForTargetNode - Determine which of the bits specified
- /// in Mask are known to be either zero or one and return them in the
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
const APInt &Mask,
- APInt &KnownZero,
+ APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth = 0) const;
+ // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
+ // operation that are sign bits.
+ virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const;
+
virtual bool
isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
-
+
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
virtual bool ExpandInlineAsm(CallInst *CI) const;
-
+
ConstraintType getConstraintType(const std::string &Constraint) const;
-
- std::vector<unsigned>
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ virtual ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::vector<unsigned>
getRegClassForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
@@ -546,15 +589,15 @@ namespace llvm {
char ConstraintLetter,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
-
+
/// getRegForInlineAsmConstraint - Given a physical register constraint
/// (e.g. {edx}), return the register number and the register class for the
/// register. This should only be used for C_Register constraints. On
/// error, this returns a register number of 0.
- std::pair<unsigned, const TargetRegisterClass*>
+ std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
-
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
@@ -609,7 +652,7 @@ namespace llvm {
// shrink long double fp constant since fldt is very slow.
return !X86ScalarSSEf64 || VT == MVT::f80;
}
-
+
const X86Subtarget* getSubtarget() const {
return Subtarget;
}
@@ -650,8 +693,8 @@ namespace llvm {
/// X86StackPtr - X86 physical register used as stack ptr.
unsigned X86StackPtr;
-
- /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
/// floating point ops.
/// When SSE is available, use it for f32 operations.
/// When SSE2 is available, use it for f64 operations.
@@ -702,7 +745,6 @@ namespace llvm {
SDValue Chain, bool IsTailCall, bool Is64Bit,
int FPDiff, DebugLoc dl) const;
- CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const;
unsigned GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG &DAG) const;
@@ -719,6 +761,8 @@ namespace llvm {
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
@@ -729,7 +773,7 @@ namespace llvm {
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
- SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const;
+ SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
@@ -794,6 +838,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+ virtual bool isUsedByReturnOnly(SDNode *N) const;
+
virtual bool
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -810,6 +856,13 @@ namespace llvm {
MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
unsigned argNum, bool inMem) const;
+ /// Utility functions to emit monitor and mwait instructions. These
+ /// need to make sure that the arguments to the intrinsic are in the
+ /// correct registers.
+ MachineBasicBlock *EmitMonitor(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
+
/// Utility function to emit atomic bitwise operations (and, or, xor).
/// It takes the bitwise instruction to expand, the associated machine basic
/// block, and the associated X86 opcodes for reg/reg and reg/imm.
@@ -833,7 +886,7 @@ namespace llvm {
unsigned immOpcL,
unsigned immOpcH,
bool invSrc = false) const;
-
+
/// Utility function to emit atomic min and max. It takes the min/max
/// instruction to expand, the associated basic block, and the associated
/// cmov opcode for moving the min or max value.
@@ -841,6 +894,11 @@ namespace llvm {
MachineBasicBlock *BB,
unsigned cmovOpc) const;
+ // Utility function to emit the low-level va_arg code for X86-64.
+ MachineBasicBlock *EmitVAARG64WithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
/// Utility function to emit the xmm reg save portion of va_start.
MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
MachineInstr *BInstr,
@@ -849,12 +907,15 @@ namespace llvm {
MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI,
+ MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const;
-
+
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent, for use with the given x86 condition code.
SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
new file mode 100644
index 0000000..45d1c6b
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -0,0 +1,77 @@
+//====- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the 3DNow! instruction set, which extends MMX to support
+// floating point and also adds a few more random instructions for good measure.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: We don't support any intrinsics for these instructions yet.
+
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> {
+}
+
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic>
+ : I<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>,
+ TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode {
+ // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+ let isAsmParserOnly = 1;
+}
+
+
+let Constraints = "$src1 = $dst" in {
+ // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+ // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
+ multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>;
+ }
+}
+
+defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">;
+defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">;
+defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">;
+defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">;
+defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">;
+defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">;
+defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">;
+defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">;
+defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">;
+defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">;
+defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">;
+defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">;
+defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">;
+defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">;
+defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">;
+defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">;
+
+
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+
+def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
+ "prefetch $addr", []>;
+
+// FIXME: Diassembler gets a bogus decode conflict.
+let isAsmParserOnly = 1 in {
+def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
+ "prefetchw $addr", []>;
+}
+
+// "3DNowA" instructions
+defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">;
+defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">;
+defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">;
+defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">;
+defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">;
diff --git a/contrib/llvm/lib/Target/X86/X86Instr64bit.td b/contrib/llvm/lib/Target/X86/X86Instr64bit.td
deleted file mode 100644
index 0884b61..0000000
--- a/contrib/llvm/lib/Target/X86/X86Instr64bit.td
+++ /dev/null
@@ -1,2250 +0,0 @@
-//====- X86Instr64bit.td - Describe X86-64 Instructions ----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the X86-64 instruction set, defining the instructions,
-// and properties of the instructions which are needed for code generation,
-// machine code emission, and analysis.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Operand Definitions.
-//
-
-// 64-bits but only 32 bits are significant.
-def i64i32imm : Operand<i64> {
- let ParserMatchClass = ImmSExti64i32AsmOperand;
-}
-
-// 64-bits but only 32 bits are significant, and those bits are treated as being
-// pc relative.
-def i64i32imm_pcrel : Operand<i64> {
- let PrintMethod = "print_pcrel_imm";
- let ParserMatchClass = X86AbsMemAsmOperand;
-}
-
-
-// 64-bits but only 8 bits are significant.
-def i64i8imm : Operand<i64> {
- let ParserMatchClass = ImmSExti64i8AsmOperand;
-}
-
-def lea64_32mem : Operand<i32> {
- let PrintMethod = "printi32mem";
- let AsmOperandLowerMethod = "lower_lea64_32mem";
- let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
-}
-
-
-// Special i64mem for addresses of load folding tail calls. These are not
-// allowed to use callee-saved registers since they must be scheduled
-// after callee-saved register are popped.
-def i64mem_TC : Operand<i64> {
- let PrintMethod = "printi64mem";
- let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
-}
-
-//===----------------------------------------------------------------------===//
-// Complex Pattern Definitions.
-//
-def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
- [add, sub, mul, X86mul_imm, shl, or, frameindex,
- X86WrapperRIP], []>;
-
-def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
- [tglobaltlsaddr], []>;
-
-//===----------------------------------------------------------------------===//
-// Pattern fragments.
-//
-
-def i64immSExt8 : PatLeaf<(i64 immSext8)>;
-
-def GetLo32XForm : SDNodeXForm<imm, [{
- // Transformation function: get the low 32 bits.
- return getI32Imm((unsigned)N->getZExtValue());
-}]>;
-
-def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
-
-
-def i64immZExt32 : PatLeaf<(i64 imm), [{
- // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
- // unsignedsign extended field.
- return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
-}]>;
-
-def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
-def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
-def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
-
-def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
-def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
-def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
-def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
-
-def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
-def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
-def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
-def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
-
-//===----------------------------------------------------------------------===//
-// Instruction list...
-//
-
-// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
-// a stack adjustment and the codegen must know that they may modify the stack
-// pointer before prolog-epilog rewriting occurs.
-// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
-// sub / add which can clobber EFLAGS.
-let Defs = [RSP, EFLAGS], Uses = [RSP] in {
-def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
- "#ADJCALLSTACKDOWN",
- [(X86callseq_start timm:$amt)]>,
- Requires<[In64BitMode]>;
-def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKUP",
- [(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[In64BitMode]>;
-}
-
-// Interrupt Instructions
-def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iret{q}", []>;
-
-//===----------------------------------------------------------------------===//
-// Call Instructions...
-//
-let isCall = 1 in
- // All calls clobber the non-callee saved registers. RSP is marked as
- // a use to prevent stack-pointer assignments that appear immediately
- // before calls from potentially appearing dead. Uses for argument
- // registers are added manually.
- let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [RSP] in {
-
- // NOTE: this pattern doesn't match "X86call imm", because we do not know
- // that the offset between an arbitrary immediate and the call will fit in
- // the 32-bit pcrel field that we have.
- def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
- "call{q}\t$dst", []>,
- Requires<[In64BitMode, NotWin64]>;
- def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
- "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
- Requires<[NotWin64]>;
- def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
- "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
- Requires<[NotWin64]>;
-
- def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
- "lcall{q}\t{*}$dst", []>;
- }
-
- // FIXME: We need to teach codegen about single list of call-clobbered
- // registers.
-let isCall = 1, isCodeGenOnly = 1 in
- // All calls clobber the non-callee saved registers. RSP is marked as
- // a use to prevent stack-pointer assignments that appear immediately
- // before calls from potentially appearing dead. Uses for argument
- // registers are added manually.
- let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
- Uses = [RSP] in {
- def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
- "call\t$dst", []>,
- Requires<[IsWin64]>;
- def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
- "call\t{*}$dst",
- [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
- def WINCALL64m : I<0xFF, MRM2m, (outs),
- (ins i64mem:$dst, variable_ops), "call\t{*}$dst",
- [(X86call (loadi64 addr:$dst))]>,
- Requires<[IsWin64]>;
- }
-
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
- isCodeGenOnly = 1 in
- let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [RSP] in {
- def TCRETURNdi64 : I<0, Pseudo, (outs),
- (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
- def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset,
- variable_ops),
- "#TC_RETURN $dst $offset", []>;
- let mayLoad = 1 in
- def TCRETURNmi64 : I<0, Pseudo, (outs),
- (ins i64mem_TC:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
-
- def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
- (ins i64i32imm_pcrel:$dst, variable_ops),
- "jmp\t$dst # TAILCALL", []>;
- def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
- "jmp{q}\t{*}$dst # TAILCALL", []>;
-
- let mayLoad = 1 in
- def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
- "jmp{q}\t{*}$dst # TAILCALL", []>;
-}
-
-// Branches
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
- def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
- "jmp{q}\t$dst", []>;
- def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
- [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
- def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
- [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
- def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
- "ljmp{q}\t{*}$dst", []>;
-}
-
-//===----------------------------------------------------------------------===//
-// EH Pseudo Instructions
-//
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1, isCodeGenOnly = 1 in {
-def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
- "ret\t#eh_return, addr: $addr",
- [(X86ehret GR64:$addr)]>;
-
-}
-
-//===----------------------------------------------------------------------===//
-// Miscellaneous Instructions...
-//
-
-def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
-def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
-
-let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
-def LEAVE64 : I<0xC9, RawFrm,
- (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
-def POP64r : I<0x58, AddRegFrm,
- (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
-def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
-def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
-}
-let mayStore = 1 in {
-def PUSH64r : I<0x50, AddRegFrm,
- (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
-def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
-def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
-}
-}
-
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
- "push{q}\t$imm", []>;
-def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
- "push{q}\t$imm", []>;
-def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
- "push{q}\t$imm", []>;
-}
-
-let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
-def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
- Requires<[In64BitMode]>;
-let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
-def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
- Requires<[In64BitMode]>;
-
-def LEA64_32r : I<0x8D, MRMSrcMem,
- (outs GR32:$dst), (ins lea64_32mem:$src),
- "lea{l}\t{$src|$dst}, {$dst|$src}",
- [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
-
-let isReMaterializable = 1 in
-def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "lea{q}\t{$src|$dst}, {$dst|$src}",
- [(set GR64:$dst, lea64addr:$src)]>;
-
-let Constraints = "$src = $dst" in
-def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
- "bswap{q}\t$dst",
- [(set GR64:$dst, (bswap GR64:$src))]>, TB;
-
-// Bit scan instructions.
-let Defs = [EFLAGS] in {
-def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "bsf{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
-def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "bsf{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
-
-def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
-def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
-} // Defs = [EFLAGS]
-
-// Repeat string ops
-let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
-def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
- [(X86rep_movs i64)]>, REP;
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
-def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
- [(X86rep_stos i64)]>, REP;
-
-let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in
-def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
-
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
-def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
-
-def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
-
-def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
-
-// Fast system-call instructions
-def SYSEXIT64 : RI<0x35, RawFrm,
- (outs), (ins), "sysexit", []>, TB, Requires<[In64BitMode]>;
-
-//===----------------------------------------------------------------------===//
-// Move Instructions...
-//
-
-let neverHasSideEffects = 1 in
-def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
- "movabs{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, imm:$src)]>;
-def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, i64immSExt32:$src)]>;
-}
-
-// The assembler accepts movq of a 64-bit immediate as an alternate spelling of
-// movabsq.
-let isAsmParserOnly = 1 in {
-def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-}
-
-let isCodeGenOnly = 1 in {
-def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-}
-
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (load addr:$src))]>;
-
-def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- [(store GR64:$src, addr:$dst)]>;
-def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- [(store i64immSExt32:$src, addr:$dst)]>;
-
-/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC.
-let isCodeGenOnly = 1 in {
-let neverHasSideEffects = 1 in
-def MOV64rr_TC : RI<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-
-let mayLoad = 1,
- canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV64rm_TC : RI<0x8B, MRMSrcMem, (outs GR64_TC:$dst), (ins i64mem_TC:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- []>;
-
-let mayStore = 1 in
-def MOV64mr_TC : RI<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- []>;
-}
-
-// FIXME: These definitions are utterly broken
-// Just leave them commented out for now because they're useless outside
-// of the large code model, and most compilers won't generate the instructions
-// in question.
-/*
-def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
- "mov{q}\t{$src, %rax|%rax, $src}", []>;
-def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
- "mov{q}\t{$src, %rax|%rax, $src}", []>;
-def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
- "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
-def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
- "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
-*/
-
-// Moves to and from segment registers
-def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-
-// Moves to and from debug registers
-def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Moves to and from control registers
-def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Sign/Zero extenders
-
-// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
-// operand, which makes it a rare instruction with an 8-bit register
-// operand that can never access an h register. If support for h registers
-// were generalized, this would require a special register class.
-def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
- "movs{bq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR8:$src))]>, TB;
-def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
- "movs{bq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
-def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "movs{wq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR16:$src))]>, TB;
-def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "movs{wq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
-def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
- "movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR32:$src))]>;
-def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
- "movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
-
-// movzbq and movzwq encodings for the disassembler
-def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
- "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
- "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Use movzbl instead of movzbq when the destination is a register; it's
-// equivalent due to implicit zero-extending, and it has a smaller encoding.
-def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
- "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
-def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
- "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
-// Use movzwl instead of movzwq when the destination is a register; it's
-// equivalent due to implicit zero-extending, and it has a smaller encoding.
-def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
-def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
-
-// There's no movzlq instruction, but movl can be used for this purpose, using
-// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
-// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
-// zero-extension, however this isn't possible when the 32-bit value is
-// defined by a truncate or is copied from something where the high bits aren't
-// necessarily all zero. In such cases, we fall back to these explicit zext
-// instructions.
-def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
- "", [(set GR64:$dst, (zext GR32:$src))]>;
-def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
- "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
-
-// Any instruction that defines a 32-bit result leaves the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. And x86's cmov doesn't do anything if the
-// condition is false. But any other 32-bit operation will zero-extend
-// up to 64 bits.
-def def32 : PatLeaf<(i32 GR32:$src), [{
- return N->getOpcode() != ISD::TRUNCATE &&
- N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
- N->getOpcode() != ISD::CopyFromReg &&
- N->getOpcode() != X86ISD::CMOV;
-}]>;
-
-// In the case of a 32-bit def that is known to implicitly zero-extend,
-// we can use a SUBREG_TO_REG.
-def : Pat<(i64 (zext def32:$src)),
- (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
-
-let neverHasSideEffects = 1 in {
- let Defs = [RAX], Uses = [EAX] in
- def CDQE : RI<0x98, RawFrm, (outs), (ins),
- "{cltq|cdqe}", []>; // RAX = signext(EAX)
-
- let Defs = [RAX,RDX], Uses = [RAX] in
- def CQO : RI<0x99, RawFrm, (outs), (ins),
- "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
-}
-
-//===----------------------------------------------------------------------===//
-// Arithmetic Instructions...
-//
-
-let Defs = [EFLAGS] in {
-
-def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src),
- "add{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isConvertibleToThreeAddress = 1 in {
-let isCommutable = 1 in
-// Register-Register Addition
-def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86add_flag GR64:$src1, GR64:$src2))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
- def ADD64rr_alt : RI<0x03, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Integer Addition
-def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst),
- (ins GR64:$src1, i64i8imm:$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86add_flag GR64:$src1, i64immSExt8:$src2))]>;
-def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst),
- (ins GR64:$src1, i64i32imm:$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86add_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isConvertibleToThreeAddress
-
-// Register-Memory Addition
-def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86add_flag GR64:$src1, (load addr:$src2)))]>;
-
-} // Constraints = "$src1 = $dst"
-
-// Memory-Register Addition
-def ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR64:$src2), addr:$dst),
- (implicit EFLAGS)]>;
-def ADD64mi8 : RIi8<0x83, MRM0m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst),
- (implicit EFLAGS)]>;
-def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
- "add{q}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst),
- (implicit EFLAGS)]>;
-
-let Uses = [EFLAGS] in {
-
-def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src),
- "adc{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst),
- (ins GR64:$src1, GR64:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>;
-
-def ADC64ri8 : RIi8<0x83, MRM2r, (outs GR64:$dst),
- (ins GR64:$src1, i64i8imm:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>;
-def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst),
- (ins GR64:$src1, i64i32imm:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def ADC64mr : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>;
-def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i64immSExt8:$src2),
- addr:$dst)]>;
-def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
- "adc{q}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i64immSExt32:$src2),
- addr:$dst)]>;
-} // Uses = [EFLAGS]
-
-let Constraints = "$src1 = $dst" in {
-// Register-Register Subtraction
-def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86sub_flag GR64:$src1, GR64:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Subtraction
-def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86sub_flag GR64:$src1, (load addr:$src2)))]>;
-
-// Register-Integer Subtraction
-def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst),
- (ins GR64:$src1, i64i8imm:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86sub_flag GR64:$src1, i64immSExt8:$src2))]>;
-def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
- (ins GR64:$src1, i64i32imm:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src),
- "sub{q}\t{$src, %rax|%rax, $src}", []>;
-
-// Memory-Register Subtraction
-def SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR64:$src2), addr:$dst),
- (implicit EFLAGS)]>;
-
-// Memory-Integer Subtraction
-def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), i64immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>;
-def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
- "sub{q}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), i64immSExt32:$src2),
- addr:$dst),
- (implicit EFLAGS)]>;
-
-let Uses = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
-def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>;
-
-def SBB64ri8 : RIi8<0x83, MRM3r, (outs GR64:$dst),
- (ins GR64:$src1, i64i8imm:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>;
-def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst),
- (ins GR64:$src1, i64i32imm:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src),
- "sbb{q}\t{$src, %rax|%rax, $src}", []>;
-
-def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>;
-def SBB64mi8 : RIi8<0x83, MRM3m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
-def SBB64mi32 : RIi32<0x81, MRM3m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
- "sbb{q}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
-} // Uses = [EFLAGS]
-} // Defs = [EFLAGS]
-
-// Unsigned multiplication
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in {
-def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
- "mul{q}\t$src", []>; // RAX,RDX = RAX*GR64
-let mayLoad = 1 in
-def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
-
-// Signed multiplication
-def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src),
- "imul{q}\t$src", []>; // RAX,RDX = RAX*GR64
-let mayLoad = 1 in
-def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
-}
-
-let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-// Register-Register Signed Integer Multiplication
-def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "imul{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
-
-// Register-Memory Signed Integer Multiplication
-def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "imul{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
-} // Constraints = "$src1 = $dst"
-
-// Suprisingly enough, these are not two address instructions!
-
-// Register-Integer Signed Integer Multiplication
-def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
- (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
-def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
- (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
-
-// Memory-Integer Signed Integer Multiplication
-def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
- (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1),
- i64immSExt8:$src2))]>;
-def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
- (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1),
- i64immSExt32:$src2))]>;
-} // Defs = [EFLAGS]
-
-// Unsigned division / remainder
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in {
-// RDX:RAX/r64 = RAX,RDX
-def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
- "div{q}\t$src", []>;
-// Signed division / remainder
-// RDX:RAX/r64 = RAX,RDX
-def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
- "idiv{q}\t$src", []>;
-let mayLoad = 1 in {
-// RDX:RAX/[mem64] = RAX,RDX
-def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", []>;
-// RDX:RAX/[mem64] = RAX,RDX
-def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", []>;
-}
-}
-
-// Unary instructions
-let Defs = [EFLAGS], CodeSize = 2 in {
-let Constraints = "$src = $dst" in
-def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst",
- [(set GR64:$dst, (ineg GR64:$src)),
- (implicit EFLAGS)]>;
-def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
- [(store (ineg (loadi64 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
-
-let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in
-def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>;
-def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
- [(store (add (loadi64 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>;
-
-let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in
-def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>;
-def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
- [(store (add (loadi64 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>;
-
-// In 64-bit mode, single byte INC and DEC cannot be encoded.
-let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in {
-// Can transform into LEA.
-def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src),
- "inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>,
- OpSize, Requires<[In64BitMode]>;
-def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src),
- "inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>,
- Requires<[In64BitMode]>;
-def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src),
- "dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>,
- OpSize, Requires<[In64BitMode]>;
-def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src),
- "dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
- Requires<[In64BitMode]>;
-} // Constraints = "$src = $dst", isConvertibleToThreeAddress
-
-// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
-// how to unfold them.
-def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
- [(store (add (loadi16 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In64BitMode]>;
-def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
- [(store (add (loadi32 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In64BitMode]>;
-def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
- [(store (add (loadi16 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In64BitMode]>;
-def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
- [(store (add (loadi32 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In64BitMode]>;
-} // Defs = [EFLAGS], CodeSize
-
-
-let Defs = [EFLAGS] in {
-// Shift instructions
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
- "shl{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (shl GR64:$src1, CL))]>;
-let isConvertibleToThreeAddress = 1 in // Can transform into LEA.
-def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
- "shl{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
-// NOTE: We don't include patterns for shifts of a register by one, because
-// 'add reg,reg' is cheaper.
-def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
- "shl{q}\t$dst", []>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
- "shl{q}\t{%cl, $dst|$dst, %CL}",
- [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
-def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
- "shl{q}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
- "shl{q}\t$dst",
- [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
- "shr{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (srl GR64:$src1, CL))]>;
-def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
- "shr{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
-def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
- "shr{q}\t$dst",
- [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
- "shr{q}\t{%cl, $dst|$dst, %CL}",
- [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
-def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
- "shr{q}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
- "shr{q}\t$dst",
- [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
- "sar{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (sra GR64:$src1, CL))]>;
-def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
- "sar{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
-def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
- "sar{q}\t$dst",
- [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src = $dst"
-
-let Uses = [CL] in
-def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
- "sar{q}\t{%cl, $dst|$dst, %CL}",
- [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
-def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
- "sar{q}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
- "sar{q}\t$dst",
- [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-// Rotate instructions
-
-let Constraints = "$src = $dst" in {
-def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src),
- "rcl{q}\t{1, $dst|$dst, 1}", []>;
-def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
- "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src),
- "rcr{q}\t{1, $dst|$dst, 1}", []>;
-def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
- "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
- "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src),
- "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
-}
-} // Constraints = "$src = $dst"
-
-def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
- "rcl{q}\t{1, $dst|$dst, 1}", []>;
-def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
- "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
- "rcr{q}\t{1, $dst|$dst, 1}", []>;
-def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
- "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
- "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
- "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
-}
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
- "rol{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
-def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
- "rol{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
-def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
- "rol{q}\t$dst",
- [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
- "rol{q}\t{%cl, $dst|$dst, %CL}",
- [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
-def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src),
- "rol{q}\t{$src, $dst|$dst, $src}",
- [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
- "rol{q}\t$dst",
- [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in
-def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
- "ror{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
-def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
- "ror{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
-def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
- "ror{q}\t$dst",
- [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in
-def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
- "ror{q}\t{%cl, $dst|$dst, %CL}",
- [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
-def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
- "ror{q}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
- "ror{q}\t$dst",
- [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-
-// Double shift instructions (generalizations of rotate)
-let Constraints = "$src1 = $dst" in {
-let Uses = [CL] in {
-def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
- [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>,
- TB;
-def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
- [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
- TB;
-}
-
-let isCommutable = 1 in { // FIXME: Update X86InstrInfo::commuteInstruction
-def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
- (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2, i8imm:$src3),
- "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
- (i8 imm:$src3)))]>,
- TB;
-def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
- (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2, i8imm:$src3),
- "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
- (i8 imm:$src3)))]>,
- TB;
-} // isCommutable
-} // Constraints = "$src1 = $dst"
-
-let Uses = [CL] in {
-def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
- [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
- addr:$dst)]>, TB;
-def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
- [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
- addr:$dst)]>, TB;
-}
-def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
- (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
- "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB;
-def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
- (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
- "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB;
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-// Logical Instructions...
-//
-
-let Constraints = "$src = $dst" , AddedComplexity = 15 in
-def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst",
- [(set GR64:$dst, (not GR64:$src))]>;
-def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
- [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
-
-let Defs = [EFLAGS] in {
-def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src),
- "and{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def AND64rr : RI<0x21, MRMDestReg,
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86and_flag GR64:$src1, GR64:$src2))]>;
-let isCodeGenOnly = 1 in {
-def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "and{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-def AND64rm : RI<0x23, MRMSrcMem,
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86and_flag GR64:$src1, (load addr:$src2)))]>;
-def AND64ri8 : RIi8<0x83, MRM4r,
- (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
- "and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86and_flag GR64:$src1, i64immSExt8:$src2))]>;
-def AND64ri32 : RIi32<0x81, MRM4r,
- (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
- "and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86and_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def AND64mr : RI<0x21, MRMDestMem,
- (outs), (ins i64mem:$dst, GR64:$src),
- "and{q}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), GR64:$src), addr:$dst),
- (implicit EFLAGS)]>;
-def AND64mi8 : RIi8<0x83, MRM4m,
- (outs), (ins i64mem:$dst, i64i8imm :$src),
- "and{q}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-def AND64mi32 : RIi32<0x81, MRM4m,
- (outs), (ins i64mem:$dst, i64i32imm:$src),
- "and{q}\t{$src, $dst|$dst, $src}",
- [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86or_flag GR64:$src1, GR64:$src2))]>;
-let isCodeGenOnly = 1 in {
-def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "or{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86or_flag GR64:$src1, (load addr:$src2)))]>;
-def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst),
- (ins GR64:$src1, i64i8imm:$src2),
- "or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86or_flag GR64:$src1, i64immSExt8:$src2))]>;
-def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst),
- (ins GR64:$src1, i64i32imm:$src2),
- "or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86or_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "or{q}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), GR64:$src), addr:$dst),
- (implicit EFLAGS)]>;
-def OR64mi8 : RIi8<0x83, MRM1m, (outs), (ins i64mem:$dst, i64i8imm:$src),
- "or{q}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "or{q}\t{$src, $dst|$dst, $src}",
- [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
-def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src),
- "or{q}\t{$src, %rax|%rax, $src}", []>;
-
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in
-def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86xor_flag GR64:$src1, GR64:$src2))]>;
-let isCodeGenOnly = 1 in {
-def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "xor{q}\t{$src2, $dst|$dst, $src2}", []>;
-}
-def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86xor_flag GR64:$src1, (load addr:$src2)))]>;
-def XOR64ri8 : RIi8<0x83, MRM6r, (outs GR64:$dst),
- (ins GR64:$src1, i64i8imm:$src2),
- "xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86xor_flag GR64:$src1, i64immSExt8:$src2))]>;
-def XOR64ri32 : RIi32<0x81, MRM6r,
- (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
- "xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // Constraints = "$src1 = $dst"
-
-def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "xor{q}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), GR64:$src), addr:$dst),
- (implicit EFLAGS)]>;
-def XOR64mi8 : RIi8<0x83, MRM6m, (outs), (ins i64mem:$dst, i64i8imm :$src),
- "xor{q}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "xor{q}\t{$src, $dst|$dst, $src}",
- [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
-def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src),
- "xor{q}\t{$src, %rax|%rax, $src}", []>;
-
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-// Comparison Instructions...
-//
-
-// Integer comparison
-let Defs = [EFLAGS] in {
-def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src),
- "test{q}\t{$src, %rax|%rax, $src}", []>;
-let isCommutable = 1 in
-def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
- "test{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR64:$src1, GR64:$src2), 0))]>;
-def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
- "test{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR64:$src1, (loadi64 addr:$src2)),
- 0))]>;
-def TEST64ri32 : RIi32<0xF7, MRM0r, (outs),
- (ins GR64:$src1, i64i32imm:$src2),
- "test{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR64:$src1, i64immSExt32:$src2),
- 0))]>;
-def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
- (ins i64mem:$src1, i64i32imm:$src2),
- "test{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and (loadi64 addr:$src1),
- i64immSExt32:$src2), 0))]>;
-
-
-def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src),
- "cmp{q}\t{$src, %rax|%rax, $src}", []>;
-def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR64:$src1, GR64:$src2))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
- def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}", []>;
-}
-
-def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi64 addr:$src1), GR64:$src2))]>;
-def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR64:$src1, (loadi64 addr:$src2)))]>;
-def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt8:$src2))]>;
-def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt32:$src2))]>;
-def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi64 addr:$src1),
- i64immSExt8:$src2))]>;
-def CMP64mi32 : RIi32<0x81, MRM7m, (outs),
- (ins i64mem:$src1, i64i32imm:$src2),
- "cmp{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi64 addr:$src1),
- i64immSExt32:$src2))]>;
-} // Defs = [EFLAGS]
-
-// Bit tests.
-// TODO: BTC, BTR, and BTS
-let Defs = [EFLAGS] in {
-def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
-
-// Unlike with the register+register form, the memory+register form of the
-// bt instruction does not ignore the high bits of the index. From ISel's
-// perspective, this is pretty bizarre. Disable these instructions for now.
-def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
-// [(X86bt (loadi64 addr:$src1), GR64:$src2),
-// (implicit EFLAGS)]
- []
- >, TB;
-
-def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
-// Note that these instructions don't need FastBTMem because that
-// only applies when the other operand is in a register. When it's
-// an immediate, bt is still fast.
-def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt (loadi64 addr:$src1),
- i64immSExt8:$src2))]>, TB;
-
-def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-
-def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-
-def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-} // Defs = [EFLAGS]
-
-// Conditional moves
-let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in {
-def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovb{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_B, EFLAGS))]>, TB;
-def CMOVAE64rr: RI<0x43, MRMSrcReg, // if >=u, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovae{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_AE, EFLAGS))]>, TB;
-def CMOVE64rr : RI<0x44, MRMSrcReg, // if ==, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmove{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_E, EFLAGS))]>, TB;
-def CMOVNE64rr: RI<0x45, MRMSrcReg, // if !=, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovne{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_NE, EFLAGS))]>, TB;
-def CMOVBE64rr: RI<0x46, MRMSrcReg, // if <=u, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovbe{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_BE, EFLAGS))]>, TB;
-def CMOVA64rr : RI<0x47, MRMSrcReg, // if >u, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmova{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_A, EFLAGS))]>, TB;
-def CMOVL64rr : RI<0x4C, MRMSrcReg, // if <s, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovl{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_L, EFLAGS))]>, TB;
-def CMOVGE64rr: RI<0x4D, MRMSrcReg, // if >=s, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovge{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_GE, EFLAGS))]>, TB;
-def CMOVLE64rr: RI<0x4E, MRMSrcReg, // if <=s, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovle{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_LE, EFLAGS))]>, TB;
-def CMOVG64rr : RI<0x4F, MRMSrcReg, // if >s, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovg{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_G, EFLAGS))]>, TB;
-def CMOVS64rr : RI<0x48, MRMSrcReg, // if signed, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovs{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_S, EFLAGS))]>, TB;
-def CMOVNS64rr: RI<0x49, MRMSrcReg, // if !signed, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovns{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_NS, EFLAGS))]>, TB;
-def CMOVP64rr : RI<0x4A, MRMSrcReg, // if parity, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovp{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_P, EFLAGS))]>, TB;
-def CMOVNP64rr : RI<0x4B, MRMSrcReg, // if !parity, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovnp{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_NP, EFLAGS))]>, TB;
-def CMOVO64rr : RI<0x40, MRMSrcReg, // if overflow, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovo{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_O, EFLAGS))]>, TB;
-def CMOVNO64rr : RI<0x41, MRMSrcReg, // if !overflow, GR64 = GR64
- (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "cmovno{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
- X86_COND_NO, EFLAGS))]>, TB;
-} // isCommutable = 1
-
-def CMOVB64rm : RI<0x42, MRMSrcMem, // if <u, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovb{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_B, EFLAGS))]>, TB;
-def CMOVAE64rm: RI<0x43, MRMSrcMem, // if >=u, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovae{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_AE, EFLAGS))]>, TB;
-def CMOVE64rm : RI<0x44, MRMSrcMem, // if ==, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmove{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_E, EFLAGS))]>, TB;
-def CMOVNE64rm: RI<0x45, MRMSrcMem, // if !=, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovne{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_NE, EFLAGS))]>, TB;
-def CMOVBE64rm: RI<0x46, MRMSrcMem, // if <=u, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovbe{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_BE, EFLAGS))]>, TB;
-def CMOVA64rm : RI<0x47, MRMSrcMem, // if >u, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmova{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_A, EFLAGS))]>, TB;
-def CMOVL64rm : RI<0x4C, MRMSrcMem, // if <s, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovl{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_L, EFLAGS))]>, TB;
-def CMOVGE64rm: RI<0x4D, MRMSrcMem, // if >=s, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovge{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_GE, EFLAGS))]>, TB;
-def CMOVLE64rm: RI<0x4E, MRMSrcMem, // if <=s, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovle{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_LE, EFLAGS))]>, TB;
-def CMOVG64rm : RI<0x4F, MRMSrcMem, // if >s, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovg{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_G, EFLAGS))]>, TB;
-def CMOVS64rm : RI<0x48, MRMSrcMem, // if signed, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovs{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_S, EFLAGS))]>, TB;
-def CMOVNS64rm: RI<0x49, MRMSrcMem, // if !signed, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovns{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_NS, EFLAGS))]>, TB;
-def CMOVP64rm : RI<0x4A, MRMSrcMem, // if parity, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovp{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_P, EFLAGS))]>, TB;
-def CMOVNP64rm : RI<0x4B, MRMSrcMem, // if !parity, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovnp{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_NP, EFLAGS))]>, TB;
-def CMOVO64rm : RI<0x40, MRMSrcMem, // if overflow, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovo{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_O, EFLAGS))]>, TB;
-def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64]
- (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- "cmovno{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- X86_COND_NO, EFLAGS))]>, TB;
-} // Constraints = "$src1 = $dst"
-
-// Use sbb to materialize carry flag into a GPR.
-// FIXME: This are pseudo ops that should be replaced with Pat<> patterns.
-// However, Pat<> can't replicate the destination reg into the inputs of the
-// result.
-// FIXME: Change this to have encoding Pseudo when X86MCCodeEmitter replaces
-// X86CodeEmitter.
-let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in
-def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-
-def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C64r)>;
-
-//===----------------------------------------------------------------------===//
-// Descriptor-table support instructions
-
-// LLDT is not interpreted specially in 64-bit mode because there is no sign
-// extension.
-def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
- "sldt{q}\t$dst", []>, TB;
-def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
- "sldt{q}\t$dst", []>, TB;
-
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
-// smaller encoding, but doing so at isel time interferes with rematerialization
-// in the current register allocator. For now, this is rewritten when the
-// instruction is lowered to an MCInst.
-// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
-// when we have a better way to specify isel priority.
-let Defs = [EFLAGS],
- AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, 0)]>;
-
-// Materialize i64 constant where top 32-bits are zero. This could theoretically
-// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
-// that would make it more difficult to rematerialize.
-let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
- "", [(set GR64:$dst, i64immZExt32:$src)]>;
-
-//===----------------------------------------------------------------------===//
-// Thread Local Storage Instructions
-//===----------------------------------------------------------------------===//
-
-// ELF TLS Support
-// All calls clobber the non-callee saved registers. RSP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
-let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [RSP] in
-def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
- ".byte\t0x66; "
- "leaq\t$sym(%rip), %rdi; "
- ".word\t0x6666; "
- "rex64; "
- "call\t__tls_get_addr@PLT",
- [(X86tlsaddr tls64addr:$sym)]>,
- Requires<[In64BitMode]>;
-
-// Darwin TLS Support
-// For x86_64, the address of the thunk is passed in %rdi, on return
-// the address of the variable is in %rax. All other registers are preserved.
-let Defs = [RAX],
- Uses = [RDI],
- usesCustomInserter = 1 in
-def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
- "# TLSCall_64",
- [(X86TLSCall addr:$sym)]>,
- Requires<[In64BitMode]>;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "movq\t%gs:$src, $dst",
- [(set GR64:$dst, (gsload addr:$src))]>, SegGS;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "movq\t%fs:$src, $dst",
- [(set GR64:$dst, (fsload addr:$src))]>, SegFS;
-
-//===----------------------------------------------------------------------===//
-// Atomic Instructions
-//===----------------------------------------------------------------------===//
-
-// TODO: Get this to fold the constant into the instruction.
-let hasSideEffects = 1, Defs = [ESP] in
-def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
- "lock\n\t"
- "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
- [(X86MemBarrierNoSSE GR64:$zero)]>,
- Requires<[In64BitMode]>, LOCK;
-
-let Defs = [RAX, EFLAGS], Uses = [RAX] in {
-def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
- "lock\n\t"
- "cmpxchgq\t$swap,$ptr",
- [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
-}
-
-let Constraints = "$val = $dst" in {
-let Defs = [EFLAGS] in
-def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
- "lock\n\t"
- "xadd\t$val, $ptr",
- [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
- TB, LOCK;
-
-def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$val,i64mem:$ptr),
- "xchg{q}\t{$val, $ptr|$ptr, $val}",
- [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
-
-def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
- "xchg{q}\t{$val, $src|$src, $val}", []>;
-}
-
-def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
-let mayLoad = 1, mayStore = 1 in
-def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
-let mayLoad = 1, mayStore = 1 in
-def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
-def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
- "cmpxchg16b\t$dst", []>, TB;
-
-def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
- "xchg{q}\t{$src, %rax|%rax, $src}", []>;
-
-// Optimized codegen when the non-memory output is not used.
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
-// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "lock\n\t"
- "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
- (ins i64mem:$dst, i64i8imm :$src2),
- "lock\n\t"
- "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
- (ins i64mem:$dst, i64i32imm :$src2),
- "lock\n\t"
- "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
- "lock\n\t"
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
- (ins i64mem:$dst, i64i8imm :$src2),
- "lock\n\t"
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
- (ins i64mem:$dst, i64i32imm:$src2),
- "lock\n\t"
- "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
- "lock\n\t"
- "inc{q}\t$dst", []>, LOCK;
-def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
- "lock\n\t"
- "dec{q}\t$dst", []>, LOCK;
-}
-// Atomic exchange, and, or, xor
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomInserter = 1 in {
-def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMAND64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
-def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMOR64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
-def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMXOR64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
-def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMNAND64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
-def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
- "#ATOMMIN64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
-def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMMAX64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
-def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMUMIN64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
-def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
- "#ATOMUMAX64 PSEUDO!",
- [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
-}
-
-// Segmentation support instructions
-
-// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
-def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
- "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
-
-def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
- "push{q}\t%fs", []>, TB;
-def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
- "push{q}\t%gs", []>, TB;
-
-def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
- "pop{q}\t%fs", []>, TB;
-def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
- "pop{q}\t%gs", []>, TB;
-
-def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
- "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
- "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
- "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Specialized register support
-
-// no m form encodable; use SMSW16m
-def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
- "smsw{q}\t$dst", []>, TB;
-
-// String manipulation instructions
-
-def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
-// code model mode, should use 'movabs'. FIXME: This is really a hack, the
-// 'movabs' predicate should handle this sort of thing.
-def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
- (MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
- (MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
- (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
- (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
- (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
-
-// In static codegen with small code model, we can get the address of a label
-// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of
-// the MOV64ri64i32 should accept these.
-def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
- (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
- (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
- (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
- (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
- (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
-
-// In kernel code model, we can get the address of a label
-// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
-// the MOV64ri32 should accept these.
-def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
- (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
- (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
- (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
- (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
- (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
-
-// If we have small model and -static mode, it is safe to store global addresses
-// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
-// for MOV64mi32 should handle this sort of thing.
-def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, tconstpool:$src)>,
- Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, tjumptable:$src)>,
- Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
- Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, texternalsym:$src)>,
- Requires<[NearData, IsStatic]>;
-def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
- (MOV64mi32 addr:$dst, tblockaddress:$src)>,
- Requires<[NearData, IsStatic]>;
-
-// Calls
-// Direct PC relative function call for small code model. 32-bit displacement
-// sign extended to 64-bit.
-def : Pat<(X86call (i64 tglobaladdr:$dst)),
- (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
-def : Pat<(X86call (i64 texternalsym:$dst)),
- (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
-
-def : Pat<(X86call (i64 tglobaladdr:$dst)),
- (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
-def : Pat<(X86call (i64 texternalsym:$dst)),
- (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
-
-// tailcall stuff
-def : Pat<(X86tcret GR64_TC:$dst, imm:$off),
- (TCRETURNri64 GR64_TC:$dst, imm:$off)>,
- Requires<[In64BitMode]>;
-
-def : Pat<(X86tcret (load addr:$dst), imm:$off),
- (TCRETURNmi64 addr:$dst, imm:$off)>,
- Requires<[In64BitMode]>;
-
-def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
- (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
- Requires<[In64BitMode]>;
-
-def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
- (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
- Requires<[In64BitMode]>;
-
-// tls has some funny stuff here...
-// This corresponds to movabs $foo@tpoff, %rax
-def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
- (MOV64ri tglobaltlsaddr :$dst)>;
-// This corresponds to add $foo@tpoff, %rax
-def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
- (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
-// This corresponds to mov foo@tpoff(%rbx), %eax
-def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
- (MOV64rm tglobaltlsaddr :$dst)>;
-
-// Comparisons.
-
-// TEST R,R is smaller than CMP R,0
-def : Pat<(X86cmp GR64:$src1, 0),
- (TEST64rr GR64:$src1, GR64:$src1)>;
-
-// Conditional moves with folded loads with operands swapped and conditions
-// inverted.
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_B, EFLAGS),
- (CMOVAE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_AE, EFLAGS),
- (CMOVB64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_E, EFLAGS),
- (CMOVNE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NE, EFLAGS),
- (CMOVE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_BE, EFLAGS),
- (CMOVA64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_A, EFLAGS),
- (CMOVBE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_L, EFLAGS),
- (CMOVGE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_GE, EFLAGS),
- (CMOVL64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_LE, EFLAGS),
- (CMOVG64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_G, EFLAGS),
- (CMOVLE64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_P, EFLAGS),
- (CMOVNP64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NP, EFLAGS),
- (CMOVP64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_S, EFLAGS),
- (CMOVNS64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NS, EFLAGS),
- (CMOVS64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS),
- (CMOVNO64rm GR64:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS),
- (CMOVO64rm GR64:$src2, addr:$src1)>;
-
-// zextload bool -> zextload byte
-def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
-
-// extload
-// When extloading from 16-bit and smaller memory locations into 64-bit
-// registers, use zero-extending loads so that the entire 64-bit register is
-// defined, avoiding partial-register updates.
-def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
-def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
-def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
-// For other extloads, use subregs, since the high contents of the register are
-// defined after an extload.
-def : Pat<(extloadi64i32 addr:$src),
- (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
- sub_32bit)>;
-
-// anyext. Define these to do an explicit zero-extend to
-// avoid partial-register updates.
-def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
-def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
-def : Pat<(i64 (anyext GR32:$src)),
- (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
-
-//===----------------------------------------------------------------------===//
-// Some peepholes
-//===----------------------------------------------------------------------===//
-
-// Odd encoding trick: -128 fits into an 8-bit immediate field while
-// +128 doesn't, so in this special case use a sub instead of an add.
-def : Pat<(add GR64:$src1, 128),
- (SUB64ri8 GR64:$src1, -128)>;
-def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
- (SUB64mi8 addr:$dst, -128)>;
-
-// The same trick applies for 32-bit immediate fields in 64-bit
-// instructions.
-def : Pat<(add GR64:$src1, 0x0000000080000000),
- (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
-def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
- (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
-
-// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it
-// has an immediate with at least 32 bits of leading zeros, to avoid needing to
-// materialize that immediate in a register first.
-def : Pat<(and GR64:$src, i64immZExt32:$imm),
- (SUBREG_TO_REG
- (i64 0),
- (AND32ri
- (EXTRACT_SUBREG GR64:$src, sub_32bit),
- (i32 (GetLo32XForm imm:$imm))),
- sub_32bit)>;
-
-// r & (2^32-1) ==> movz
-def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
- (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
-// r & (2^16-1) ==> movz
-def : Pat<(and GR64:$src, 0xffff),
- (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR64:$src, 0xff),
- (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR32:$src1, 0xff),
- (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
- Requires<[In64BitMode]>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
- Requires<[In64BitMode]>;
-
-// sext_inreg patterns
-def : Pat<(sext_inreg GR64:$src, i32),
- (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
-def : Pat<(sext_inreg GR64:$src, i16),
- (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
-def : Pat<(sext_inreg GR64:$src, i8),
- (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
-def : Pat<(sext_inreg GR32:$src, i8),
- (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
- Requires<[In64BitMode]>;
-def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
- Requires<[In64BitMode]>;
-
-// trunc patterns
-def : Pat<(i32 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
-def : Pat<(i16 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
-def : Pat<(i8 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
-def : Pat<(i8 (trunc GR32:$src)),
- (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
- Requires<[In64BitMode]>;
-def : Pat<(i8 (trunc GR16:$src)),
- (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
- Requires<[In64BitMode]>;
-
-// h-register tricks.
-// For now, be conservative on x86-64 and use an h-register extract only if the
-// value is immediately zero-extended or stored, which are somewhat common
-// cases. This uses a bunch of code to prevent a register requiring a REX prefix
-// from being allocated in the same instruction as the h register, as there's
-// currently no way to describe this requirement to the register allocator.
-
-// h-register extract and zero-extend.
-def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
- (SUBREG_TO_REG
- (i64 0),
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
- sub_8bit_hi)),
- sub_32bit)>;
-def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- sub_8bit_hi))>,
- Requires<[In64BitMode]>;
-def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
- (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
- GR32_ABCD)),
- sub_8bit_hi))>,
- Requires<[In64BitMode]>;
-def : Pat<(srl GR16:$src, (i8 8)),
- (EXTRACT_SUBREG
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi)),
- sub_16bit)>,
- Requires<[In64BitMode]>;
-def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi))>,
- Requires<[In64BitMode]>;
-def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi))>,
- Requires<[In64BitMode]>;
-def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
- (SUBREG_TO_REG
- (i64 0),
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi)),
- sub_32bit)>;
-def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
- (SUBREG_TO_REG
- (i64 0),
- (MOVZX32_NOREXrr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi)),
- sub_32bit)>;
-
-// h-register extract and store.
-def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
- (MOV8mr_NOREX
- addr:$dst,
- (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
- sub_8bit_hi))>;
-def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
- (MOV8mr_NOREX
- addr:$dst,
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- sub_8bit_hi))>,
- Requires<[In64BitMode]>;
-def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
- (MOV8mr_NOREX
- addr:$dst,
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi))>,
- Requires<[In64BitMode]>;
-
-// (shl x, 1) ==> (add x, x)
-def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
-
-// (shl x (and y, 63)) ==> (shl x, y)
-def : Pat<(shl GR64:$src1, (and CL, 63)),
- (SHL64rCL GR64:$src1)>;
-def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
- (SHL64mCL addr:$dst)>;
-
-def : Pat<(srl GR64:$src1, (and CL, 63)),
- (SHR64rCL GR64:$src1)>;
-def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
- (SHR64mCL addr:$dst)>;
-
-def : Pat<(sra GR64:$src1, (and CL, 63)),
- (SAR64rCL GR64:$src1)>;
-def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
- (SAR64mCL addr:$dst)>;
-
-// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in { // Try this before the selecting to OR
-def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
- (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
- (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(or_is_add GR64:$src1, GR64:$src2),
- (ADD64rr GR64:$src1, GR64:$src2)>;
-} // AddedComplexity
-
-// X86 specific add which produces a flag.
-def : Pat<(addc GR64:$src1, GR64:$src2),
- (ADD64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(addc GR64:$src1, (load addr:$src2)),
- (ADD64rm GR64:$src1, addr:$src2)>;
-def : Pat<(addc GR64:$src1, i64immSExt8:$src2),
- (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(addc GR64:$src1, i64immSExt32:$src2),
- (ADD64ri32 GR64:$src1, imm:$src2)>;
-
-def : Pat<(subc GR64:$src1, GR64:$src2),
- (SUB64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(subc GR64:$src1, (load addr:$src2)),
- (SUB64rm GR64:$src1, addr:$src2)>;
-def : Pat<(subc GR64:$src1, i64immSExt8:$src2),
- (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(subc GR64:$src1, imm:$src2),
- (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-//===----------------------------------------------------------------------===//
-// EFLAGS-defining Patterns
-//===----------------------------------------------------------------------===//
-
-// addition
-def : Pat<(add GR64:$src1, GR64:$src2),
- (ADD64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(add GR64:$src1, i64immSExt8:$src2),
- (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(add GR64:$src1, i64immSExt32:$src2),
- (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
- (ADD64rm GR64:$src1, addr:$src2)>;
-
-// subtraction
-def : Pat<(sub GR64:$src1, GR64:$src2),
- (SUB64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
- (SUB64rm GR64:$src1, addr:$src2)>;
-def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
- (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
- (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Multiply
-def : Pat<(mul GR64:$src1, GR64:$src2),
- (IMUL64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
- (IMUL64rm GR64:$src1, addr:$src2)>;
-def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
- (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
- (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
- (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
-def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
- (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
-
-// inc/dec
-def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
-def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
-
-// or
-def : Pat<(or GR64:$src1, GR64:$src2),
- (OR64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(or GR64:$src1, i64immSExt8:$src2),
- (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(or GR64:$src1, i64immSExt32:$src2),
- (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
- (OR64rm GR64:$src1, addr:$src2)>;
-
-// xor
-def : Pat<(xor GR64:$src1, GR64:$src2),
- (XOR64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
- (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
- (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
- (XOR64rm GR64:$src1, addr:$src2)>;
-
-// and
-def : Pat<(and GR64:$src1, GR64:$src2),
- (AND64rr GR64:$src1, GR64:$src2)>;
-def : Pat<(and GR64:$src1, i64immSExt8:$src2),
- (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(and GR64:$src1, i64immSExt32:$src2),
- (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
- (AND64rm GR64:$src1, addr:$src2)>;
-
-//===----------------------------------------------------------------------===//
-// X86-64 SSE Instructions
-//===----------------------------------------------------------------------===//
-
-// Move instructions...
-
-def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (scalar_to_vector GR64:$src)))]>;
-def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
- (iPTR 0)))]>;
-
-def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert GR64:$src))]>;
-def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
-
-def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bitconvert FR64:$src))]>;
-def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
-
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
new file mode 100644
index 0000000..f0ea068
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -0,0 +1,1125 @@
+//===- X86InstrArithmetic.td - Integer Arithmetic Instrs ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the integer arithmetic instructions in the X86
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LEA - Load Effective Address
+
+let neverHasSideEffects = 1 in
+def LEA16r : I<0x8D, MRMSrcMem,
+ (outs GR16:$dst), (ins i32mem:$src),
+ "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
+let isReMaterializable = 1 in
+def LEA32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins i32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
+
+def LEA64_32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins lea64_32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+
+let isReMaterializable = 1 in
+def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lea{q}\t{$src|$dst}, {$dst|$src}",
+ [(set GR64:$dst, lea64addr:$src)]>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Fixed-Register Multiplication and Division Instructions.
+//
+
+// Extra precision multiplication
+
+// AL is really implied by AX, but the registers in Defs must match the
+// SDNode results (i8, i32).
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, GR8:$src)),
+ (implicit EFLAGS)]>; // AL,AH = AL*GR8
+
+let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
+def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*GR16
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
+def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
+ "mul{l}\t$src", // EAX,EDX = EAX*GR32
+ [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
+ "mul{q}\t$src", // RAX,RDX = RAX*GR64
+ [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
+
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
+ "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, (loadi8 addr:$src))),
+ (implicit EFLAGS)]>; // AL,AH = AL*[mem8]
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*[mem16]
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
+ "mul{l}\t$src",
+ []>; // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
+ "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+}
+
+let neverHasSideEffects = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>;
+ // AL,AH = AL*GR8
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
+ OpSize; // AX,DX = AX*GR16
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>;
+ // EAX,EDX = EAX*GR32
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>;
+ // RAX,RDX = RAX*GR64
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
+ "imul{b}\t$src", []>; // AL,AH = AL*[mem8]
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
+ "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
+ "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
+ "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+}
+} // neverHasSideEffects
+
+
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+
+let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
+// Register-Register Signed Integer Multiply
+def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
+def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
+def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
+}
+
+// Register-Memory Signed Integer Multiply
+def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
+ TB, OpSize;
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
+def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i64mem:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
+} // Constraints = "$src1 = $dst"
+
+} // Defs = [EFLAGS]
+
+// Suprisingly enough, these are not two address instructions!
+let Defs = [EFLAGS] in {
+// Register-Integer Signed Integer Multiply
+def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
+def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, imm:$src2))]>;
+def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
+def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
+def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
+
+
+// Memory-Integer Signed Integer Multiply
+def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
+ (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))]>,
+ OpSize;
+def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
+ (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i16immSExt8:$src2))]>, OpSize;
+def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
+ (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))]>;
+def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
+ (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i32immSExt8:$src2))]>;
+def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
+ (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt32:$src2))]>;
+def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
+ (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt8:$src2))]>;
+} // Defs = [EFLAGS]
+
+
+
+
+// unsigned division/remainder
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "div{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
+ "div{q}\t$src", []>;
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
+def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
+ "div{l}\t$src", []>;
+// RDX:RAX/[mem64] = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
+ "div{q}\t$src", []>;
+}
+
+// Signed division/remainder.
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "idiv{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
+ "idiv{q}\t$src", []>;
+
+let mayLoad = 1, mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
+ "idiv{l}\t$src", []>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
+def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
+ "idiv{q}\t$src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Two address Instructions.
+//
+
+// unary instructions
+let CodeSize = 2 in {
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "neg{b}\t$dst",
+ [(set GR8:$dst, (ineg GR8:$src1)),
+ (implicit EFLAGS)]>;
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "neg{w}\t$dst",
+ [(set GR16:$dst, (ineg GR16:$src1)),
+ (implicit EFLAGS)]>, OpSize;
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "neg{l}\t$dst",
+ [(set GR32:$dst, (ineg GR32:$src1)),
+ (implicit EFLAGS)]>;
+def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
+ [(set GR64:$dst, (ineg GR64:$src1)),
+ (implicit EFLAGS)]>;
+} // Constraints = "$src1 = $dst"
+
+def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
+ "neg{b}\t$dst",
+ [(store (ineg (loadi8 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
+ "neg{w}\t$dst",
+ [(store (ineg (loadi16 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
+ "neg{l}\t$dst",
+ [(store (ineg (loadi32 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
+ [(store (ineg (loadi64 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+
+// Note: NOT does not set EFLAGS!
+
+let Constraints = "$src1 = $dst" in {
+// Match xor -1 to not. Favors these over a move imm + xor to save code size.
+let AddedComplexity = 15 in {
+def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "not{b}\t$dst",
+ [(set GR8:$dst, (not GR8:$src1))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "not{w}\t$dst",
+ [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "not{l}\t$dst",
+ [(set GR32:$dst, (not GR32:$src1))]>;
+def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
+ [(set GR64:$dst, (not GR64:$src1))]>;
+}
+} // Constraints = "$src1 = $dst"
+
+def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
+ "not{b}\t$dst",
+ [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
+def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
+ "not{w}\t$dst",
+ [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
+ "not{l}\t$dst",
+ [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
+def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
+ [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+} // CodeSize
+
+// TODO: inc/dec is slow for P4, but fast for Pentium-M.
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "inc{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+ OpSize, Requires<[In32BitMode]>;
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+ Requires<[In32BitMode]>;
+def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 1
+
+
+// In 64-bit mode, single byte INC and DEC cannot be encoded.
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
+// Can transform into LEA.
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+ Requires<[In64BitMode]>;
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+ Requires<[In64BitMode]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 2
+
+} // Constraints = "$src1 = $dst"
+
+let CodeSize = 2 in {
+ def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>;
+ def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+ def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In32BitMode]>;
+ def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
+// how to unfold them.
+// FIXME: What is this for??
+def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+} // CodeSize = 2
+
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "dec{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+ OpSize, Requires<[In32BitMode]>;
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+ Requires<[In32BitMode]>;
+def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+} // CodeSize = 2
+} // Constraints = "$src1 = $dst"
+
+
+let CodeSize = 2 in {
+ def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>;
+ def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+ def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In32BitMode]>;
+ def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>;
+} // CodeSize = 2
+} // Defs = [EFLAGS]
+
+
+/// X86TypeInfo - This is a bunch of information that describes relevant X86
+/// information about value types. For example, it can tell you what the
+/// register class and preferred load to use.
+class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
+ PatFrag loadnode, X86MemOperand memoperand, ImmType immkind,
+ Operand immoperand, SDPatternOperator immoperator,
+ Operand imm8operand, SDPatternOperator imm8operator,
+ bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> {
+ /// VT - This is the value type itself.
+ ValueType VT = vt;
+
+ /// InstrSuffix - This is the suffix used on instructions with this type. For
+ /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
+ string InstrSuffix = instrsuffix;
+
+ /// RegClass - This is the register class associated with this type. For
+ /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
+ RegisterClass RegClass = regclass;
+
+ /// LoadNode - This is the load node associated with this type. For
+ /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
+ PatFrag LoadNode = loadnode;
+
+ /// MemOperand - This is the memory operand associated with this type. For
+ /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
+ X86MemOperand MemOperand = memoperand;
+
+ /// ImmEncoding - This is the encoding of an immediate of this type. For
+ /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32
+ /// since the immediate fields of i64 instructions is a 32-bit sign extended
+ /// value.
+ ImmType ImmEncoding = immkind;
+
+ /// ImmOperand - This is the operand kind of an immediate of this type. For
+ /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 ->
+ /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
+ /// extended value.
+ Operand ImmOperand = immoperand;
+
+ /// ImmOperator - This is the operator that should be used to match an
+ /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
+ SDPatternOperator ImmOperator = immoperator;
+
+ /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
+ /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is
+ /// only used for instructions that have a sign-extended imm8 field form.
+ Operand Imm8Operand = imm8operand;
+
+ /// Imm8Operator - This is the operator that should be used to match an 8-bit
+ /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
+ SDPatternOperator Imm8Operator = imm8operator;
+
+ /// HasOddOpcode - This bit is true if the instruction should have an odd (as
+ /// opposed to even) opcode. Operations on i8 are usually even, operations on
+ /// other datatypes are odd.
+ bit HasOddOpcode = hasOddOpcode;
+
+ /// HasOpSizePrefix - This bit is set to true if the instruction should have
+ /// the 0x66 operand size prefix. This is set for i16 types.
+ bit HasOpSizePrefix = hasOpSizePrefix;
+
+ /// HasREX_WPrefix - This bit is set to true if the instruction should have
+ /// the 0x40 REX prefix. This is set for i64 types.
+ bit HasREX_WPrefix = hasREX_WPrefix;
+}
+
+def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
+
+
+def Xi8 : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem ,
+ Imm8 , i8imm , imm, i8imm , invalid_node,
+ 0, 0, 0>;
+def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
+ Imm16, i16imm, imm, i16i8imm, i16immSExt8,
+ 1, 1, 0>;
+def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
+ Imm32, i32imm, imm, i32i8imm, i32immSExt8,
+ 1, 0, 0>;
+def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
+ Imm32, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
+ 1, 0, 1>;
+
+/// ITy - This instruction base class takes the type info for the instruction.
+/// Using this, it:
+/// 1. Concatenates together the instruction mnemonic with the appropriate
+/// suffix letter, a tab, and the arguments.
+/// 2. Infers whether the instruction should have a 0x66 prefix byte.
+/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
+/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
+/// or 1 (for i16,i32,i64 operations).
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
+ string mnemonic, string args, list<dag> pattern>
+ : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
+ opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
+ f, outs, ins,
+ !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
+
+ // Infer instruction prefixes from type info.
+ let hasOpSizePrefix = typeinfo.HasOpSizePrefix;
+ let hasREX_WPrefix = typeinfo.HasREX_WPrefix;
+}
+
+// BinOpRR - Instructions like "add reg, reg, reg".
+class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, list<dag> pattern, Format f = MRMDestReg>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
+// just a regclass (no eflags) as a result.
+class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
+// just a EFLAGS as a result.
+class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f = MRMDestReg>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+ f>;
+
+// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result.
+class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result, and has EFLAGS as input.
+class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
+ EFLAGS))]>;
+
+// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
+class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+ : ITy<opcode, MRMSrcReg, typeinfo,
+ (outs typeinfo.RegClass:$dst),
+ (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+ mnemonic, "{$src2, $dst|$dst, $src2}", []> {
+ // The disassembler should know about this, but not the asmparser.
+ let isCodeGenOnly = 1;
+}
+
+// BinOpRM - Instructions like "add reg, reg, [mem]".
+class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, list<dag> pattern>
+ : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRM_R - Instructions like "add reg, reg, [mem]".
+class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_F - Instructions like "cmp reg, [mem]".
+class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RF - Instructions like "add reg, reg, [mem]".
+class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
+class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
+ EFLAGS))]>;
+
+// BinOpRI - Instructions like "add reg, reg, imm".
+class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Format f, dag outlist, list<dag> pattern>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+ let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpRI_R - Instructions like "add reg, reg, imm".
+class BinOpRI_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_F - Instructions like "cmp reg, imm".
+class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RF - Instructions like "add reg, reg, imm".
+class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
+class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
+ EFLAGS))]>;
+
+// BinOpRI8 - Instructions like "add reg, reg, imm8".
+class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Format f, dag outlist, list<dag> pattern>
+ : ITy<opcode, f, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+ let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpRI8_R - Instructions like "add reg, reg, imm8".
+class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_F - Instructions like "cmp reg, imm8".
+class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs),
+ [(set EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RF - Instructions like "add reg, reg, imm8".
+class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8".
+class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
+ EFLAGS))]>;
+
+// BinOpMR - Instructions like "add [mem], reg".
+class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ list<dag> pattern>
+ : ITy<opcode, MRMDestMem, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern>;
+
+// BinOpMR_RMW - Instructions like "add [mem], reg".
+class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
+class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMR_F - Instructions like "cmp [mem], reg".
+class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode>
+ : BinOpMR<opcode, mnemonic, typeinfo,
+ [(set EFLAGS, (opnode (load addr:$dst), typeinfo.RegClass:$src))]>;
+
+// BinOpMI - Instructions like "add [mem], imm".
+class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
+ Format f, list<dag> pattern, bits<8> opcode = 0x80>
+ : ITy<opcode, f, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+ let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpMI_RMW - Instructions like "add [mem], imm".
+class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(store (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
+class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(store (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI_F - Instructions like "cmp [mem], imm".
+class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
+ SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
+ : BinOpMI<mnemonic, typeinfo, f,
+ [(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)),
+ typeinfo.ImmOperator:$src))],
+ opcode>;
+
+// BinOpMI8 - Instructions like "add [mem], imm8".
+class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
+ Format f, list<dag> pattern>
+ : ITy<0x82, f, typeinfo,
+ (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
+ mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+ let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpMI8_RMW - Instructions like "add [mem], imm8".
+class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(store (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8".
+class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(store (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// BinOpMI8_F - Instructions like "cmp [mem], imm8".
+class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
+ SDNode opnode, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f,
+ [(set EFLAGS, (opnode (load addr:$dst),
+ typeinfo.Imm8Operator:$src))]>;
+
+// BinOpAI - Instructions like "add %eax, %eax, imm".
+class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Register areg>
+ : ITy<opcode, RawFrm, typeinfo,
+ (outs), (ins typeinfo.ImmOperand:$src),
+ mnemonic, !strconcat("{$src, %", areg.AsmName, "|%",
+ areg.AsmName, ", $src}"), []> {
+ let ImmT = typeinfo.ImmEncoding;
+ let Uses = [areg];
+ let Defs = [areg];
+}
+
+/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (...".
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnodeflag, SDNode opnode,
+ bit CommutableRR, bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def #NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+ def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
+ def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
+ def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+ } // isCommutable
+
+ def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def #NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+ def #NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
+ def #NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
+ def #NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+ def #NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
+ def #NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
+ def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
+ }
+ } // Constraints = "$src1 = $dst"
+
+ def #NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def #NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ }
+}
+
+/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (node LHS, RHS, EFLAGS))" like ADC and
+/// SBB.
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnode, bit CommutableRR,
+ bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def #NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isCommutable
+
+ def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def #NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def #NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
+ def #NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
+ def #NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def #NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ }
+ } // Constraints = "$src1 = $dst"
+
+ def #NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def #NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ }
+}
+
+/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
+/// defined with "(set EFLAGS, (...". It would be really nice to find a way
+/// to factor this with the other ArithBinOp_*.
+///
+multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+ string mnemonic, Format RegMRM, Format MemMRM,
+ SDNode opnode,
+ bit CommutableRR, bit ConvertibleToThreeAddress> {
+ let Defs = [EFLAGS] in {
+ let isCommutable = CommutableRR,
+ isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ def #NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+ } // isCommutable
+
+ def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+ def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+ def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
+ def #NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
+ def #NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ // NOTE: These are order specific, we want the ri8 forms to be listed
+ // first so that they are slightly preferred to the ri forms.
+ def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+ def #NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+ def #NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
+ def #NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
+ def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
+ }
+
+ def #NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+ def #NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
+ def #NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
+ def #NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def #NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
+ def #NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
+ def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
+ def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
+
+ def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+ def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+ def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+ def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+ }
+}
+
+
+defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
+ X86and_flag, and, 1, 0>;
+defm OR : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
+ X86or_flag, or, 1, 0>;
+defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
+ X86xor_flag, xor, 1, 0>;
+defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
+ X86add_flag, add, 1, 1>;
+defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
+ X86sub_flag, sub, 0, 0>;
+
+// Arithmetic.
+let Uses = [EFLAGS] in {
+ defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag,
+ 1, 0>;
+ defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag,
+ 0, 0>;
+}
+
+defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
+
+
+//===----------------------------------------------------------------------===//
+// Semantically, test instructions are similar like AND, except they don't
+// generate a result. From an encoding perspective, they are very different:
+// they don't have all the usual imm8 and REV forms, and are encoded into a
+// different space.
+def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86cmp (and_su node:$lhs, node:$rhs), 0)>;
+
+let Defs = [EFLAGS] in {
+ let isCommutable = 1 in {
+ def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
+ def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
+ def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>;
+ def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>;
+ } // isCommutable
+
+ def TEST8rm : BinOpRM_F<0x84, "test", Xi8 , X86testpat>;
+ def TEST16rm : BinOpRM_F<0x84, "test", Xi16, X86testpat>;
+ def TEST32rm : BinOpRM_F<0x84, "test", Xi32, X86testpat>;
+ def TEST64rm : BinOpRM_F<0x84, "test", Xi64, X86testpat>;
+
+ def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
+ def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
+ def TEST32ri : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>;
+ def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;
+
+ def TEST8mi : BinOpMI_F<"test", Xi8 , X86testpat, MRM0m, 0xF6>;
+ def TEST16mi : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>;
+ def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
+ def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
+
+ def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL>;
+ def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX>;
+ def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX>;
+ def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX>;
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
index 2a6a71d..1ea8071 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
@@ -56,6 +56,31 @@ struct X86AddressMode {
: BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
Base.Reg = 0;
}
+
+
+ void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
+ assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
+
+ if (BaseType == X86AddressMode::RegBase)
+ MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
+ false, false, false, 0, false));
+ else {
+ assert(BaseType == X86AddressMode::FrameIndexBase);
+ MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
+ }
+
+ MO.push_back(MachineOperand::CreateImm(Scale));
+ MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
+ false, false, false, 0, false));
+
+ if (GV)
+ MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
+ else
+ MO.push_back(MachineOperand::CreateImm(Disp));
+
+ MO.push_back(MachineOperand::CreateReg(0, false, false,
+ false, false, false, 0, false));
+ }
};
/// addDirectMem - This function is used to add a direct memory reference to the
@@ -101,10 +126,11 @@ addFullAddress(const MachineInstrBuilder &MIB,
if (AM.BaseType == X86AddressMode::RegBase)
MIB.addReg(AM.Base.Reg);
- else if (AM.BaseType == X86AddressMode::FrameIndexBase)
+ else {
+ assert(AM.BaseType == X86AddressMode::FrameIndexBase);
MIB.addFrameIndex(AM.Base.FrameIndex);
- else
- assert (0);
+ }
+
MIB.addImm(AM.Scale).addReg(AM.IndexReg);
if (AM.GV)
MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
@@ -131,9 +157,8 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
if (TID.mayStore())
Flags |= MachineMemOperand::MOStore;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- Flags, Offset,
- MFI.getObjectSize(FI),
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
+ Flags, MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
return addOffset(MIB.addFrameIndex(FI), Offset)
.addMemOperand(MMO);
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
new file mode 100644
index 0000000..3a43b22
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -0,0 +1,104 @@
+//===- X86InstrCMovSetCC.td - Conditional Move and SetCC ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 conditional move and set on condition
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+
+// SetCC instructions.
+multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+ isCommutable = 1 in {
+ def #NAME#16rr
+ : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR16:$dst,
+ (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize;
+ def #NAME#32rr
+ : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR32:$dst,
+ (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB;
+ def #NAME#64rr
+ :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR64:$dst,
+ (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
+ }
+
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
+ def #NAME#16rm
+ : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ CondNode, EFLAGS))]>, TB, OpSize;
+ def #NAME#32rm
+ : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ CondNode, EFLAGS))]>, TB;
+ def #NAME#64rm
+ :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ CondNode, EFLAGS))]>, TB;
+ } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // end multiclass
+
+
+// Conditional Moves.
+defm CMOVO : CMOV<0x40, "cmovo" , X86_COND_O>;
+defm CMOVNO : CMOV<0x41, "cmovno", X86_COND_NO>;
+defm CMOVB : CMOV<0x42, "cmovb" , X86_COND_B>;
+defm CMOVAE : CMOV<0x43, "cmovae", X86_COND_AE>;
+defm CMOVE : CMOV<0x44, "cmove" , X86_COND_E>;
+defm CMOVNE : CMOV<0x45, "cmovne", X86_COND_NE>;
+defm CMOVBE : CMOV<0x46, "cmovbe", X86_COND_BE>;
+defm CMOVA : CMOV<0x47, "cmova" , X86_COND_A>;
+defm CMOVS : CMOV<0x48, "cmovs" , X86_COND_S>;
+defm CMOVNS : CMOV<0x49, "cmovns", X86_COND_NS>;
+defm CMOVP : CMOV<0x4A, "cmovp" , X86_COND_P>;
+defm CMOVNP : CMOV<0x4B, "cmovnp", X86_COND_NP>;
+defm CMOVL : CMOV<0x4C, "cmovl" , X86_COND_L>;
+defm CMOVGE : CMOV<0x4D, "cmovge", X86_COND_GE>;
+defm CMOVLE : CMOV<0x4E, "cmovle", X86_COND_LE>;
+defm CMOVG : CMOV<0x4F, "cmovg" , X86_COND_G>;
+
+
+// SetCC instructions.
+multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
+ let Uses = [EFLAGS] in {
+ def r : I<opc, MRM0r, (outs GR8:$dst), (ins),
+ !strconcat(Mnemonic, "\t$dst"),
+ [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB;
+ def m : I<opc, MRM0m, (outs), (ins i8mem:$dst),
+ !strconcat(Mnemonic, "\t$dst"),
+ [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB;
+ } // Uses = [EFLAGS]
+}
+
+defm SETO : SETCC<0x90, "seto", X86_COND_O>; // is overflow bit set
+defm SETNO : SETCC<0x91, "setno", X86_COND_NO>; // is overflow bit not set
+defm SETB : SETCC<0x92, "setb", X86_COND_B>; // unsigned less than
+defm SETAE : SETCC<0x93, "setae", X86_COND_AE>; // unsigned greater or equal
+defm SETE : SETCC<0x94, "sete", X86_COND_E>; // equal to
+defm SETNE : SETCC<0x95, "setne", X86_COND_NE>; // not equal to
+defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>; // unsigned less than or equal
+defm SETA : SETCC<0x97, "seta", X86_COND_A>; // unsigned greater than
+defm SETS : SETCC<0x98, "sets", X86_COND_S>; // is signed bit set
+defm SETNS : SETCC<0x99, "setns", X86_COND_NS>; // is not signed
+defm SETP : SETCC<0x9A, "setp", X86_COND_P>; // is parity bit set
+defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>; // is parity bit not set
+defm SETL : SETCC<0x9C, "setl", X86_COND_L>; // signed less than
+defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal
+defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal
+defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
new file mode 100644
index 0000000..4c915d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -0,0 +1,1626 @@
+//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various pseudo instructions used by the compiler,
+// as well as Pat patterns used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pattern Matching Support
+
+def GetLo32XForm : SDNodeXForm<imm, [{
+ // Transformation function: get the low 32 bits.
+ return getI32Imm((unsigned)N->getZExtValue());
+}]>;
+
+def GetLo8XForm : SDNodeXForm<imm, [{
+ // Transformation function: get the low 8 bits.
+ return getI8Imm((uint8_t)N->getZExtValue());
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Random Pseudo Instructions.
+
+// PIC base construction. This expands to code that looks like this:
+// call $next_inst
+// popl %destreg"
+let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
+ def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
+ "", []>;
+
+
+// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [ESP, EFLAGS], Uses = [ESP] in {
+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In32BitMode]>;
+def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In32BitMode]>;
+}
+
+// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [RSP, EFLAGS], Uses = [RSP] in {
+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In64BitMode]>;
+def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In64BitMode]>;
+}
+
+
+
+// x86-64 va_start lowering magic.
+let usesCustomInserter = 1 in {
+def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
+ (outs),
+ (ins GR8:$al,
+ i64imm:$regsavefi, i64imm:$offset,
+ variable_ops),
+ "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
+ [(X86vastart_save_xmm_regs GR8:$al,
+ imm:$regsavefi,
+ imm:$offset)]>;
+
+// The VAARG_64 pseudo-instruction takes the address of the va_list,
+// and places the address of the next argument into a register.
+let Defs = [EFLAGS] in
+def VAARG_64 : I<0, Pseudo,
+ (outs GR64:$dst),
+ (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
+ "#VAARG_64 $dst, $ap, $size, $mode, $align",
+ [(set GR64:$dst,
+ (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
+ (implicit EFLAGS)]>;
+
+// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
+// targets. These calls are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+ def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
+ "# dynamic stack allocation",
+ [(X86WinAlloca)]>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR32:$addr)]>;
+
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR64:$addr)]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: Set encoding to pseudo.
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isCodeGenOnly = 1 in {
+def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
+ [(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+ "",
+ [(set GR16:$dst, 0)]>, OpSize;
+
+// FIXME: Set encoding to pseudo.
+def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, 0)]>;
+}
+
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
+// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
+// when we have a better way to specify isel priority.
+let Defs = [EFLAGS], isCodeGenOnly=1,
+ AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
+ [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
+ isCodeGenOnly = 1 in
+def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
+ "", [(set GR64:$dst, i64immZExt32:$src)]>;
+
+// Use sbb to materialize carry bit.
+let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
+// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
+// However, Pat<> can't replicate the destination reg into the inputs of the
+// result.
+// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
+// X86CodeEmitter.
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
+ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
+ [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
+ OpSize;
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
+ [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+} // isCodeGenOnly
+
+
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C64r)>;
+
+def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C64r)>;
+
+// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
+// will be eliminated and that the sbb can be extended up to a wider type. When
+// this happens, it is great. However, if we are left with an 8-bit sbb and an
+// and, we might as well just match it as a setb.
+def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
+ (SETBr)>;
+
+//===----------------------------------------------------------------------===//
+// String Pseudo Instructions
+//
+let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
+def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)]>, REP;
+def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)]>, REP, OpSize;
+def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)]>, REP;
+}
+
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
+def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+ [(X86rep_movs i64)]>, REP;
+
+
+// FIXME: Should use "(X86rep_stos AL)" as the pattern.
+let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)]>, REP;
+let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)]>, REP, OpSize;
+let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)]>, REP;
+
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
+def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+ [(X86rep_stos i64)]>, REP;
+
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//
+
+// ELF TLS Support
+// All calls clobber the non-callee saved registers. ESP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLS_addr32",
+ [(X86tlsaddr tls32addr:$sym)]>,
+ Requires<[In32BitMode]>;
+
+// All calls clobber the non-callee saved registers. RSP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in
+def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLS_addr64",
+ [(X86tlsaddr tls64addr:$sym)]>,
+ Requires<[In64BitMode]>;
+
+// Darwin TLS Support
+// For i386, the address of the thunk is passed on the stack, on return the
+// address of the variable is in %eax. %ecx is trashed during the function
+// call. All other registers are preserved.
+let Defs = [EAX, ECX, EFLAGS],
+ Uses = [ESP],
+ usesCustomInserter = 1 in
+def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLSCall_32",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In32BitMode]>;
+
+// For x86_64, the address of the thunk is passed in %rdi, on return
+// the address of the variable is in %rax. All other registers are preserved.
+let Defs = [RAX, EFLAGS],
+ Uses = [RSP, RDI],
+ usesCustomInserter = 1 in
+def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLSCall_64",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions
+
+let Constraints = "$src1 = $dst" in {
+
+// Conditional moves
+let Uses = [EFLAGS] in {
+
+// X86 doesn't have 8-bit conditional moves. Use a customInserter to
+// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
+// however that requires promoting the operands, and can induce additional
+// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
+// clobber EFLAGS, because if one of the operands is zero, the expansion
+// could involve an xor.
+let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
+def CMOV_GR8 : I<0, Pseudo,
+ (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
+ "#CMOV_GR8 PSEUDO!",
+ [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
+ imm:$cond, EFLAGS))]>;
+
+let Predicates = [NoCMov] in {
+def CMOV_GR32 : I<0, Pseudo,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
+ "#CMOV_GR32* PSEUDO!",
+ [(set GR32:$dst,
+ (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_GR16 : I<0, Pseudo,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
+ "#CMOV_GR16* PSEUDO!",
+ [(set GR16:$dst,
+ (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_RFP32 : I<0, Pseudo,
+ (outs RFP32:$dst),
+ (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+ "#CMOV_RFP32 PSEUDO!",
+ [(set RFP32:$dst,
+ (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+ EFLAGS))]>;
+def CMOV_RFP64 : I<0, Pseudo,
+ (outs RFP64:$dst),
+ (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+ "#CMOV_RFP64 PSEUDO!",
+ [(set RFP64:$dst,
+ (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+ EFLAGS))]>;
+def CMOV_RFP80 : I<0, Pseudo,
+ (outs RFP80:$dst),
+ (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+ "#CMOV_RFP80 PSEUDO!",
+ [(set RFP80:$dst,
+ (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+ EFLAGS))]>;
+} // Predicates = [NoCMov]
+} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
+} // Uses = [EFLAGS]
+
+} // Constraints = "$src1 = $dst" in
+
+
+//===----------------------------------------------------------------------===//
+// Atomic Instruction Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// Atomic exchange, and, or, xor
+let Constraints = "$val = $dst", Defs = [EFLAGS],
+ usesCustomInserter = 1 in {
+
+def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
+def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
+def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMXOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
+def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMNAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
+
+def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
+def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
+def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMXOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
+def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMNAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
+def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+ "#ATOMMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
+def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
+
+
+def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
+def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
+def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMXOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
+def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMNAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
+def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+ "#ATOMMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
+def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
+
+
+
+def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMAND64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
+def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMOR64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
+def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMXOR64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
+def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMNAND64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
+def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+ "#ATOMMIN64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
+def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMMAX64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMUMIN64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMUMAX64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
+}
+
+let Constraints = "$val1 = $dst1, $val2 = $dst2",
+ Defs = [EFLAGS, EAX, EBX, ECX, EDX],
+ Uses = [EAX, EBX, ECX, EDX],
+ mayLoad = 1, mayStore = 1,
+ usesCustomInserter = 1 in {
+def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMAND6432 PSEUDO!", []>;
+def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMOR6432 PSEUDO!", []>;
+def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMXOR6432 PSEUDO!", []>;
+def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMNAND6432 PSEUDO!", []>;
+def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMADD6432 PSEUDO!", []>;
+def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSUB6432 PSEUDO!", []>;
+def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSWAP6432 PSEUDO!", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// FIXME: Use normal instructions and add lock prefix dynamically.
+
+// Memory barriers
+
+// TODO: Get this to fold the constant into the instruction.
+let isCodeGenOnly = 1 in
+def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
+ "lock\n\t"
+ "or{l}\t{$zero, $dst|$dst, $zero}",
+ []>, Requires<[In32BitMode]>, LOCK;
+
+let hasSideEffects = 1 in
+def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
+ "#MEMBARRIER",
+ [(X86MemBarrier)]>, Requires<[HasSSE2]>;
+
+// TODO: Get this to fold the constant into the instruction.
+let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in
+def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
+ "lock\n\t"
+ "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
+ [(X86MemBarrierNoSSE GR64:$zero)]>,
+ Requires<[In64BitMode]>, LOCK;
+
+
+// Optimized codegen when the non-memory output is not used.
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ "lock\n\t"
+ "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "lock\n\t"
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "lock\n\t"
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "lock\n\t"
+ "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
+ "lock\n\t"
+ "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "lock\n\t"
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "lock\n\t"
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
+ (ins i64mem:$dst, i64i32imm :$src2),
+ "lock\n\t"
+ "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "lock\n\t"
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "lock\n\t"
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
+ (ins i64mem:$dst, i64i8imm :$src2),
+ "lock\n\t"
+ "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
+ "lock\n\t"
+ "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "lock\n\t"
+ "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "lock\n\t"
+ "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "lock\n\t"
+ "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+
+def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
+ "lock\n\t"
+ "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "lock\n\t"
+ "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "lock\n\t"
+ "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
+ (ins i64mem:$dst, i64i32imm:$src2),
+ "lock\n\t"
+ "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+
+def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "lock\n\t"
+ "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "lock\n\t"
+ "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
+ (ins i64mem:$dst, i64i8imm :$src2),
+ "lock\n\t"
+ "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+ "lock\n\t"
+ "inc{b}\t$dst", []>, LOCK;
+def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+ "lock\n\t"
+ "inc{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+ "lock\n\t"
+ "inc{l}\t$dst", []>, LOCK;
+def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+ "lock\n\t"
+ "inc{q}\t$dst", []>, LOCK;
+
+def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+ "lock\n\t"
+ "dec{b}\t$dst", []>, LOCK;
+def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+ "lock\n\t"
+ "dec{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+ "lock\n\t"
+ "dec{l}\t$dst", []>, LOCK;
+def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+ "lock\n\t"
+ "dec{q}\t$dst", []>, LOCK;
+}
+
+// Atomic compare and swap.
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
+ isCodeGenOnly = 1 in {
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
+ "lock\n\t"
+ "cmpxchg8b\t$ptr",
+ [(X86cas8 addr:$ptr)]>, TB, LOCK;
+}
+let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
+def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
+ "lock\n\t"
+ "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+}
+
+let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
+def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
+ "lock\n\t"
+ "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+}
+
+let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
+def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
+ "lock\n\t"
+ "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+}
+
+let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
+def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
+ "lock\n\t"
+ "cmpxchgq\t$swap,$ptr",
+ [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+}
+
+// Atomic exchange and add
+let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
+def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
+ "lock\n\t"
+ "xadd{b}\t{$val, $ptr|$ptr, $val}",
+ [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+ TB, LOCK;
+def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
+ "lock\n\t"
+ "xadd{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+ TB, OpSize, LOCK;
+def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
+ "lock\n\t"
+ "xadd{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+ TB, LOCK;
+def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
+ "lock\n\t"
+ "xadd\t$val, $ptr",
+ [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+ TB, LOCK;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions.
+//===----------------------------------------------------------------------===//
+
+
+// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let Uses = [EFLAGS], usesCustomInserter = 1 in {
+ def CMOV_FR32 : I<0, Pseudo,
+ (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
+ "#CMOV_FR32 PSEUDO!",
+ [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_FR64 : I<0, Pseudo,
+ (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
+ "#CMOV_FR64 PSEUDO!",
+ [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_V4F32 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V4F32 PSEUDO!",
+ [(set VR128:$dst,
+ (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2F64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2F64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2I64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2I64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DAG Pattern Matching Rules
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
+def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
+def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
+
+def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
+ (ADD32ri GR32:$src1, tconstpool:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
+ (ADD32ri GR32:$src1, tjumptable:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
+ (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
+ (ADD32ri GR32:$src1, texternalsym:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
+ (ADD32ri GR32:$src1, tblockaddress:$src2)>;
+
+def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV32mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tblockaddress:$src)>;
+
+
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
+// code model mode, should use 'movabs'. FIXME: This is really a hack, the
+// 'movabs' predicate should handle this sort of thing.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri tconstpool :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri tjumptable :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
+
+// In static codegen with small code model, we can get the address of a label
+// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri64i32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
+
+// In kernel code model, we can get the address of a label
+// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+ (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
+
+// If we have small model and -static mode, it is safe to store global addresses
+// directly as immediates. FIXME: This is really a hack, the 'imm' predicate
+// for MOV64mi32 should handle this sort of thing.
+def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tconstpool:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tjumptable:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, texternalsym:$src)>,
+ Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tblockaddress:$src)>,
+ Requires<[NearData, IsStatic]>;
+
+
+
+// Calls
+
+// tls has some funny stuff here...
+// This corresponds to movabs $foo@tpoff, %rax
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+ (MOV64ri tglobaltlsaddr :$dst)>;
+// This corresponds to add $foo@tpoff, %rax
+def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
+ (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
+// This corresponds to mov foo@tpoff(%rbx), %eax
+def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
+ (MOV64rm tglobaltlsaddr :$dst)>;
+
+
+// Direct PC relative function call for small code model. 32-bit displacement
+// sign extended to 64-bit.
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
+
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
+
+// tailcall stuff
+def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
+ (TCRETURNri GR32_TC:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+// FIXME: This is disabled for 32-bit PIC mode because the global base
+// register which is part of the address mode may be assigned a
+// callee-saved register.
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+ (TCRETURNmi addr:$dst, imm:$off)>,
+ Requires<[In32BitMode, IsNotPIC]>;
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+ (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+ (TCRETURNmi64 addr:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
+ (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+ Requires<[In64BitMode]>;
+
+// Normal calls, with various flavors of addresses.
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+ (CALLpcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+ (CALLpcrel32 texternalsym:$dst)>;
+def : Pat<(X86call (i32 imm:$dst)),
+ (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(X86cmp GR8:$src1, 0),
+ (TEST8rr GR8:$src1, GR8:$src1)>;
+def : Pat<(X86cmp GR16:$src1, 0),
+ (TEST16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86cmp GR32:$src1, 0),
+ (TEST32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(X86cmp GR64:$src1, 0),
+ (TEST64rr GR64:$src1, GR64:$src1)>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
+ Instruction Inst64> {
+ def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
+ (Inst16 GR16:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
+ (Inst32 GR32:$src2, addr:$src1)>;
+ def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
+ (Inst64 GR64:$src2, addr:$src1)>;
+}
+
+defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
+defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
+defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
+defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
+defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
+defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
+defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
+defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
+defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
+defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
+defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
+defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
+defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
+defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
+defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
+defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
+
+// zextload bool -> zextload byte
+def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+
+// extload bool -> extload byte
+// When extloading from 16-bit and smaller memory locations into 64-bit
+// registers, use zero-extending loads so that the entire 64-bit register is
+// defined, avoiding partial-register updates.
+
+def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
+
+def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
+// For other extloads, use subregs, since the high contents of the register are
+// defined after an extload.
+def : Pat<(extloadi64i32 addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
+ sub_32bit)>;
+
+// anyext. Define these to do an explicit zero-extend to
+// avoid partial-register updates.
+def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
+
+// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
+def : Pat<(i32 (anyext GR16:$src)),
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
+
+def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
+def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
+def : Pat<(i64 (anyext GR32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. And x86's cmov doesn't do anything if the
+// condition is false. But any other 32-bit operation will zero-extend
+// up to 64 bits.
+def def32 : PatLeaf<(i32 GR32:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg &&
+ N->getOpcode() != X86ISD::CMOV;
+}]>;
+
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+//===----------------------------------------------------------------------===//
+// Pattern match OR as ADD
+//===----------------------------------------------------------------------===//
+
+// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
+// 3-addressified into an LEA instruction to avoid copies. However, we also
+// want to finally emit these instructions as an or at the end of the code
+// generator to make the generated code easier to read. To do this, we select
+// into "disjoint bits" pseudo ops.
+
+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero0, KnownOne0;
+ CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+ APInt KnownZero1, KnownOne1;
+ CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+ return (~KnownZero0 & ~KnownZero1) == 0;
+}]>;
+
+
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+let AddedComplexity = 5 in { // Try this before the selecting to OR
+
+let isConvertibleToThreeAddress = 1,
+ Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
+let isCommutable = 1 in {
+def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "", // orw/addw REG, REG
+ [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
+def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "", // orl/addl REG, REG
+ [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
+def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "", // orq/addq REG, REG
+ [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
+} // isCommutable
+
+// NOTE: These are order specific, we want the ri8 forms to be listed
+// first so that they are slightly preferred to the ri forms.
+
+def ADD16ri8_DB : I<0, Pseudo,
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "", // orw/addw REG, imm8
+ [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
+def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "", // orw/addw REG, imm
+ [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
+
+def ADD32ri8_DB : I<0, Pseudo,
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "", // orl/addl REG, imm8
+ [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
+def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "", // orl/addl REG, imm
+ [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
+
+
+def ADD64ri8_DB : I<0, Pseudo,
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "", // orq/addq REG, imm8
+ [(set GR64:$dst, (or_is_add GR64:$src1,
+ i64immSExt8:$src2))]>;
+def ADD64ri32_DB : I<0, Pseudo,
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "", // orq/addq REG, imm
+ [(set GR64:$dst, (or_is_add GR64:$src1,
+ i64immSExt32:$src2))]>;
+}
+} // AddedComplexity
+
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR16:$src1, 128),
+ (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
+ (SUB16mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR32:$src1, 128),
+ (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
+ (SUB32mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR64:$src1, 128),
+ (SUB64ri8 GR64:$src1, -128)>;
+def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
+ (SUB64mi8 addr:$dst, -128)>;
+
+// The same trick applies for 32-bit immediate fields in 64-bit
+// instructions.
+def : Pat<(add GR64:$src1, 0x0000000080000000),
+ (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
+ (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+
+// To avoid needing to materialize an immediate in a register, use a 32-bit and
+// with implicit zero-extension instead of a 64-bit and if the immediate has at
+// least 32 bits of leading zeros. If in addition the last 32 bits can be
+// represented with a sign extension of a 8 bit constant, use that.
+
+def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri8
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo8XForm imm:$imm))),
+ sub_32bit)>;
+
+def : Pat<(and GR64:$src, i64immZExt32:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo32XForm imm:$imm))),
+ sub_32bit)>;
+
+
+// r & (2^16-1) ==> movz
+def : Pat<(and GR32:$src1, 0xffff),
+ (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
+ GR32_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
+ GR16_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+
+// r & (2^32-1) ==> movz
+def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
+ (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+// r & (2^16-1) ==> movz
+def : Pat<(and GR64:$src, 0xffff),
+ (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR64:$src, 0xff),
+ (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
+ Requires<[In64BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
+ Requires<[In64BitMode]>;
+
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR32:$src, i16),
+ (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+ (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+ GR16_ABCD)),
+ sub_8bit))>,
+ Requires<[In32BitMode]>;
+
+def : Pat<(sext_inreg GR64:$src, i32),
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+def : Pat<(sext_inreg GR64:$src, i16),
+ (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR64:$src, i8),
+ (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
+ Requires<[In64BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+ (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
+ Requires<[In64BitMode]>;
+
+
+// trunc patterns
+def : Pat<(i16 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
+def : Pat<(i16 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
+ Requires<[In64BitMode]>;
+
+// h-register tricks
+def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32rr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_16bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+ GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+ GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In32BitMode]>;
+
+// h-register tricks.
+// For now, be conservative on x86-64 and use an h-register extract only if the
+// value is immediately zero-extended or stored, which are somewhat common
+// cases. This uses a bunch of code to prevent a register requiring a REX prefix
+// from being allocated in the same instruction as the h register, as there's
+// currently no way to describe this requirement to the register allocator.
+
+// h-register extract and zero-extend.
+def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+ (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+ GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_16bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi)),
+ sub_32bit)>;
+
+// h-register extract and store.
+def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+ sub_8bit_hi))>;
+def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+ sub_8bit_hi))>,
+ Requires<[In64BitMode]>;
+
+
+// (shl x, 1) ==> (add x, x)
+def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
+def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+
+// (shl x (and y, 31)) ==> (shl x, y)
+def : Pat<(shl GR8:$src1, (and CL, 31)),
+ (SHL8rCL GR8:$src1)>;
+def : Pat<(shl GR16:$src1, (and CL, 31)),
+ (SHL16rCL GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (and CL, 31)),
+ (SHL32rCL GR32:$src1)>;
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHL8mCL addr:$dst)>;
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHL16mCL addr:$dst)>;
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHL32mCL addr:$dst)>;
+
+def : Pat<(srl GR8:$src1, (and CL, 31)),
+ (SHR8rCL GR8:$src1)>;
+def : Pat<(srl GR16:$src1, (and CL, 31)),
+ (SHR16rCL GR16:$src1)>;
+def : Pat<(srl GR32:$src1, (and CL, 31)),
+ (SHR32rCL GR32:$src1)>;
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHR8mCL addr:$dst)>;
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHR16mCL addr:$dst)>;
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+ (SHR32mCL addr:$dst)>;
+
+def : Pat<(sra GR8:$src1, (and CL, 31)),
+ (SAR8rCL GR8:$src1)>;
+def : Pat<(sra GR16:$src1, (and CL, 31)),
+ (SAR16rCL GR16:$src1)>;
+def : Pat<(sra GR32:$src1, (and CL, 31)),
+ (SAR32rCL GR32:$src1)>;
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+ (SAR8mCL addr:$dst)>;
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+ (SAR16mCL addr:$dst)>;
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+ (SAR32mCL addr:$dst)>;
+
+// (shl x (and y, 63)) ==> (shl x, y)
+def : Pat<(shl GR64:$src1, (and CL, 63)),
+ (SHL64rCL GR64:$src1)>;
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SHL64mCL addr:$dst)>;
+
+def : Pat<(srl GR64:$src1, (and CL, 63)),
+ (SHR64rCL GR64:$src1)>;
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SHR64mCL addr:$dst)>;
+
+def : Pat<(sra GR64:$src1, (and CL, 63)),
+ (SAR64rCL GR64:$src1)>;
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+ (SAR64mCL addr:$dst)>;
+
+
+// (anyext (setcc_carry)) -> (setcc_carry)
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
+ (SETB_C32r)>;
+
+
+
+
+//===----------------------------------------------------------------------===//
+// EFLAGS-defining Patterns
+//===----------------------------------------------------------------------===//
+
+// add reg, reg
+def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
+
+// add reg, mem
+def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
+ (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
+ (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
+ (ADD32rm GR32:$src1, addr:$src2)>;
+
+// add reg, imm
+def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
+def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(add GR16:$src1, i16immSExt8:$src2),
+ (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(add GR32:$src1, i32immSExt8:$src2),
+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// sub reg, reg
+def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
+
+// sub reg, mem
+def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
+ (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
+ (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
+ (SUB32rm GR32:$src1, addr:$src2)>;
+
+// sub reg, imm
+def : Pat<(sub GR8:$src1, imm:$src2),
+ (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, imm:$src2),
+ (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(sub GR32:$src1, imm:$src2),
+ (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
+ (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
+ (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// mul reg, reg
+def : Pat<(mul GR16:$src1, GR16:$src2),
+ (IMUL16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(mul GR32:$src1, GR32:$src2),
+ (IMUL32rr GR32:$src1, GR32:$src2)>;
+
+// mul reg, mem
+def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
+ (IMUL16rm GR16:$src1, addr:$src2)>;
+def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
+ (IMUL32rm GR32:$src1, addr:$src2)>;
+
+// mul reg, imm
+def : Pat<(mul GR16:$src1, imm:$src2),
+ (IMUL16rri GR16:$src1, imm:$src2)>;
+def : Pat<(mul GR32:$src1, imm:$src2),
+ (IMUL32rri GR32:$src1, imm:$src2)>;
+def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
+ (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
+ (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// reg = mul mem, imm
+def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
+ (IMUL16rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
+ (IMUL32rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
+ (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
+ (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
+
+// Optimize multiply by 2 with EFLAGS result.
+let AddedComplexity = 2 in {
+def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
+}
+
+// Patterns for nodes that do not produce flags, for instructions that do.
+
+// addition
+def : Pat<(add GR64:$src1, GR64:$src2),
+ (ADD64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt8:$src2),
+ (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt32:$src2),
+ (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
+ (ADD64rm GR64:$src1, addr:$src2)>;
+
+// subtraction
+def : Pat<(sub GR64:$src1, GR64:$src2),
+ (SUB64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
+ (SUB64rm GR64:$src1, addr:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
+ (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
+ (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Multiply
+def : Pat<(mul GR64:$src1, GR64:$src2),
+ (IMUL64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
+ (IMUL64rm GR64:$src1, addr:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
+ (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
+ (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
+ (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
+ (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
+
+// Increment reg.
+def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>;
+def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
+
+// Decrement reg.
+def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>;
+def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+
+// or reg/reg.
+def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
+
+// or reg/mem
+def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
+ (OR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
+ (OR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
+ (OR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
+ (OR64rm GR64:$src1, addr:$src2)>;
+
+// or reg/imm
+def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, i16immSExt8:$src2),
+ (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(or GR32:$src1, i32immSExt8:$src2),
+ (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt8:$src2),
+ (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt32:$src2),
+ (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// xor reg/reg
+def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
+
+// xor reg/mem
+def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
+ (XOR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
+ (XOR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
+ (XOR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
+ (XOR64rm GR64:$src1, addr:$src2)>;
+
+// xor reg/imm
+def : Pat<(xor GR8:$src1, imm:$src2),
+ (XOR8ri GR8:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, imm:$src2),
+ (XOR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(xor GR32:$src1, imm:$src2),
+ (XOR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
+ (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
+ (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
+ (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
+ (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// and reg/reg
+def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
+
+// and reg/mem
+def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
+ (AND8rm GR8:$src1, addr:$src2)>;
+def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
+ (AND16rm GR16:$src1, addr:$src2)>;
+def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
+ (AND32rm GR32:$src1, addr:$src2)>;
+def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
+ (AND64rm GR64:$src1, addr:$src2)>;
+
+// and reg/imm
+def : Pat<(and GR8:$src1, imm:$src2),
+ (AND8ri GR8:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, imm:$src2),
+ (AND16ri GR16:$src1, imm:$src2)>;
+def : Pat<(and GR32:$src1, imm:$src2),
+ (AND32ri GR32:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, i16immSExt8:$src2),
+ (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(and GR32:$src1, i32immSExt8:$src2),
+ (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt8:$src2),
+ (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt32:$src2),
+ (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
new file mode 100644
index 0000000..77f4725
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -0,0 +1,294 @@
+//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 jump, return, call, and related instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+// Return instructions.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, FPForm = SpecialFP in {
+ def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ "ret",
+ [(X86retflag 0)]>;
+ def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ "ret\t$amt",
+ [(X86retflag timm:$amt)]>;
+ def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ "retw\t$amt",
+ []>, OpSize;
+ def LRETL : I <0xCB, RawFrm, (outs), (ins),
+ "lretl", []>;
+ def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
+ "lretq", []>;
+ def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "lret\t$amt", []>;
+ def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "lretw\t$amt", []>, OpSize;
+}
+
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+ def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+ "jmp\t$dst", [(br bb:$dst)]>;
+ def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+ "jmp\t$dst", []>;
+ def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+ "jmp{q}\t$dst", []>;
+}
+
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+ multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+ def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
+ def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
+ [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
+ }
+}
+
+defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
+defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// jcx/jecx/jrcx instructions.
+let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in {
+ // These are the 32-bit versions of this instruction for the asmparser. In
+ // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
+ // jecxz.
+ let Uses = [CX] in
+ def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>;
+ let Uses = [ECX] in
+ def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jecxz\t$dst", []>, Requires<[In32BitMode]>;
+
+ // J*CXZ instruction: 64-bit versions of this instruction for the asmparser.
+ // In 64-bit mode, the address size prefix is jecxz and the unprefixed version
+ // is jrcxz.
+ let Uses = [ECX] in
+ def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>;
+ let Uses = [RCX] in
+ def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+ "jrcxz\t$dst", []>, Requires<[In64BitMode]>;
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
+ [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
+ def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
+ [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
+
+ def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
+ [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
+ def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
+ [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
+
+ def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
+ def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
+ "ljmp{q}\t{*}$dst", []>;
+
+ def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
+ "ljmp{w}\t{*}$dst", []>, OpSize;
+ def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
+ "ljmp{l}\t{*}$dst", []>;
+}
+
+
+// Loop instructions
+
+def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. ESP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in {
+ def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i32imm_pcrel:$dst,variable_ops),
+ "call{l}\t$dst", []>, Requires<[In32BitMode]>;
+ def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
+ "call{l}\t{*}$dst", [(X86call GR32:$dst)]>,
+ Requires<[In32BitMode]>;
+ def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
+ "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
+ Requires<[In32BitMode]>;
+
+ def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "lcall{l}\t{$seg, $off|$off, $seg}", []>;
+
+ def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
+ "lcall{w}\t{*}$dst", []>, OpSize;
+ def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
+ "lcall{l}\t{*}$dst", []>;
+
+ // callw for 16 bit code for the assembler.
+ let isAsmParserOnly = 1 in
+ def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+ (outs), (ins i16imm_pcrel:$dst, variable_ops),
+ "callw\t$dst", []>, OpSize;
+ }
+
+
+// Tail call stuff.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
+ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in {
+ def TCRETURNdi : PseudoI<(outs),
+ (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>;
+ def TCRETURNri : PseudoI<(outs),
+ (ins GR32_TC:$dst, i32imm:$offset, variable_ops), []>;
+ let mayLoad = 1 in
+ def TCRETURNmi : PseudoI<(outs),
+ (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+ // FIXME: The should be pseudo instructions that are lowered when going to
+ // mcinst.
+ def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
+ (ins i32imm_pcrel:$dst, variable_ops),
+ "jmp\t$dst # TAILCALL",
+ []>;
+ def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
+ "", []>; // FIXME: Remove encoding when JIT is dead.
+ let mayLoad = 1 in
+ def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
+ "jmp{l}\t{*}$dst # TAILCALL", []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. RSP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in {
+
+ // NOTE: this pattern doesn't match "X86call imm", because we do not know
+ // that the offset between an arbitrary immediate and the call will fit in
+ // the 32-bit pcrel field that we have.
+ def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", []>,
+ Requires<[In64BitMode, NotWin64]>;
+ def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
+ Requires<[In64BitMode, NotWin64]>;
+ def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+ "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
+ Requires<[In64BitMode, NotWin64]>;
+
+ def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
+ "lcall{q}\t{*}$dst", []>;
+ }
+
+ // FIXME: We need to teach codegen about single list of call-clobbered
+ // registers.
+let isCall = 1, isCodeGenOnly = 1 in
+ // All calls clobber the non-callee saved registers. RSP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+ Uses = [RSP] in {
+ def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", []>,
+ Requires<[IsWin64]>;
+ def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call{q}\t{*}$dst",
+ [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
+ def WINCALL64m : I<0xFF, MRM2m, (outs),
+ (ins i64mem:$dst,variable_ops),
+ "call{q}\t{*}$dst",
+ [(X86call (loadi64 addr:$dst))]>,
+ Requires<[IsWin64]>;
+ }
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
+ // AMD64 cc clobbers RSI, RDI, XMM6-XMM15.
+ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+ Uses = [RSP],
+ usesCustomInserter = 1 in {
+ def TCRETURNdi64 : PseudoI<(outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
+ []>;
+ def TCRETURNri64 : PseudoI<(outs),
+ (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>;
+ let mayLoad = 1 in
+ def TCRETURNmi64 : PseudoI<(outs),
+ (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+ def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
+ (ins i64i32imm_pcrel:$dst, variable_ops),
+ "jmp\t$dst # TAILCALL", []>;
+ def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
+ "jmp{q}\t{*}$dst # TAILCALL", []>;
+
+ let mayLoad = 1 in
+ def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
+ "jmp{q}\t{*}$dst # TAILCALL", []>;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrExtension.td b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
new file mode 100644
index 0000000..867c0f8
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrExtension.td
@@ -0,0 +1,172 @@
+//===- X86InstrExtension.td - Sign and Zero Extensions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the sign and zero extension operations.
+//
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+ let Defs = [AX], Uses = [AL] in
+ def CBW : I<0x98, RawFrm, (outs), (ins),
+ "{cbtw|cbw}", []>, OpSize; // AX = signext(AL)
+ let Defs = [EAX], Uses = [AX] in
+ def CWDE : I<0x98, RawFrm, (outs), (ins),
+ "{cwtl|cwde}", []>; // EAX = signext(AX)
+
+ let Defs = [AX,DX], Uses = [AX] in
+ def CWD : I<0x99, RawFrm, (outs), (ins),
+ "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
+ let Defs = [EAX,EDX], Uses = [EAX] in
+ def CDQ : I<0x99, RawFrm, (outs), (ins),
+ "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
+
+
+ let Defs = [RAX], Uses = [EAX] in
+ def CDQE : RI<0x98, RawFrm, (outs), (ins),
+ "{cltq|cdqe}", []>; // RAX = signext(EAX)
+
+ let Defs = [RAX,RDX], Uses = [RAX] in
+ def CQO : RI<0x99, RawFrm, (outs), (ins),
+ "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
+}
+
+
+// Sign/Zero extenders
+// Use movsbl intead of movsbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update. Actual movsbw included for the disassembler.
+def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// FIXME: Use a pat pattern or define a syntax here.
+let isCodeGenOnly=1 in {
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
+ "", [(set GR16:$dst, (sext GR8:$src))]>, TB;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
+ "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
+}
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR8:$src))]>, TB;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR16:$src))]>, TB;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+
+// Use movzbl intead of movzbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update. Actual movzbw included for the disassembler.
+def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+ "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+// FIXME: Use a pat pattern or define a syntax here.
+let isCodeGenOnly=1 in {
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
+ "", [(set GR16:$dst, (zext GR8:$src))]>, TB;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
+ "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
+}
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR8:$src))]>, TB;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR16:$src))]>, TB;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+
+// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
+ (outs GR32_NOREX:$dst), (ins GR8:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ []>, TB;
+let mayLoad = 1 in
+def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
+ (outs GR32_NOREX:$dst), (ins i8mem:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ []>, TB;
+
+// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
+// operand, which makes it a rare instruction with an 8-bit register
+// operand that can never access an h register. If support for h registers
+// were generalized, this would require a special register class.
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR8:$src))]>, TB;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR16:$src))]>, TB;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+
+// movzbq and movzwq encodings for the disassembler
+def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+
+// FIXME: These should be Pat patterns.
+let isCodeGenOnly = 1 in {
+
+// Use movzbl instead of movzbq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
+def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+// Use movzwl instead of movzwq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
+def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+
+// There's no movzlq instruction, but movl can be used for this purpose, using
+// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
+// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
+// zero-extension, however this isn't possible when the 32-bit value is
+// defined by a truncate or is copied from something where the high bits aren't
+// necessarily all zero. In such cases, we fall back to these explicit zext
+// instructions.
+def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
+ "", [(set GR64:$dst, (zext GR32:$src))]>;
+def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
+
+
+}
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
index 9c9bcc7..b506f5e 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -32,21 +32,24 @@ def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
- [SDNPHasChain, SDNPInFlag, SDNPMayStore]>;
+ [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+ SDNPMemOperand]>;
def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
- [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+ SDNPMemOperand]>;
def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
- [SDNPHasChain, SDNPMayStore]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
- [SDNPHasChain, SDNPMayStore]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
- [SDNPHasChain, SDNPMayStore]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
- [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>;
+ [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+ SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// FPStack pattern fragments
@@ -70,41 +73,23 @@ def fpimmneg1 : PatLeaf<(fpimm), [{
// Some 'special' instructions
let usesCustomInserter = 1 in { // Expanded after instruction selection.
- def FP32_TO_INT16_IN_MEM : I<0, Pseudo,
- (outs), (ins i16mem:$dst, RFP32:$src),
- "##FP32_TO_INT16_IN_MEM PSEUDO!",
+ def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
[(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
- def FP32_TO_INT32_IN_MEM : I<0, Pseudo,
- (outs), (ins i32mem:$dst, RFP32:$src),
- "##FP32_TO_INT32_IN_MEM PSEUDO!",
+ def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
[(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
- def FP32_TO_INT64_IN_MEM : I<0, Pseudo,
- (outs), (ins i64mem:$dst, RFP32:$src),
- "##FP32_TO_INT64_IN_MEM PSEUDO!",
+ def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
[(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
- def FP64_TO_INT16_IN_MEM : I<0, Pseudo,
- (outs), (ins i16mem:$dst, RFP64:$src),
- "##FP64_TO_INT16_IN_MEM PSEUDO!",
+ def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
[(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
- def FP64_TO_INT32_IN_MEM : I<0, Pseudo,
- (outs), (ins i32mem:$dst, RFP64:$src),
- "##FP64_TO_INT32_IN_MEM PSEUDO!",
+ def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
[(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
- def FP64_TO_INT64_IN_MEM : I<0, Pseudo,
- (outs), (ins i64mem:$dst, RFP64:$src),
- "##FP64_TO_INT64_IN_MEM PSEUDO!",
+ def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
[(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
- def FP80_TO_INT16_IN_MEM : I<0, Pseudo,
- (outs), (ins i16mem:$dst, RFP80:$src),
- "##FP80_TO_INT16_IN_MEM PSEUDO!",
+ def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
[(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
- def FP80_TO_INT32_IN_MEM : I<0, Pseudo,
- (outs), (ins i32mem:$dst, RFP80:$src),
- "##FP80_TO_INT32_IN_MEM PSEUDO!",
+ def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
[(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
- def FP80_TO_INT64_IN_MEM : I<0, Pseudo,
- (outs), (ins i64mem:$dst, RFP80:$src),
- "##FP80_TO_INT64_IN_MEM PSEUDO!",
+ def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
[(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
}
@@ -212,11 +197,11 @@ def _Fp80m64: FpI_<(outs RFP80:$dst),
[(set RFP80:$dst,
(OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
- !strconcat("f", !strconcat(asmstring, "{s}\t$src"))> {
+ !strconcat("f", asmstring, "{s}\t$src")> {
let mayLoad = 1;
}
def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
- !strconcat("f", !strconcat(asmstring, "{l}\t$src"))> {
+ !strconcat("f", asmstring, "{l}\t$src")> {
let mayLoad = 1;
}
// ST(0) = ST(0) + [memint]
@@ -245,11 +230,11 @@ def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
[(set RFP80:$dst, (OpNode RFP80:$src1,
(X86fild addr:$src2, i32)))]>;
def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
- !strconcat("fi", !strconcat(asmstring, "{s}\t$src"))> {
+ !strconcat("fi", asmstring, "{s}\t$src")> {
let mayLoad = 1;
}
def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
- !strconcat("fi", !strconcat(asmstring, "{l}\t$src"))> {
+ !strconcat("fi", asmstring, "{l}\t$src")> {
let mayLoad = 1;
}
}
@@ -580,16 +565,16 @@ def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop
def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i)
(outs), (ins RST:$reg),
- "fucomi\t{$reg, %st(0)|%ST(0), $reg}">, DB;
+ "fucomi\t$reg">, DB;
def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop
(outs), (ins RST:$reg),
- "fucomip\t{$reg, %st(0)|%ST(0), $reg}">, DF;
+ "fucompi\t$reg">, DF;
}
def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
- "fcomi\t{$reg, %st(0)|%ST(0), $reg}">, DB;
+ "fcomi\t$reg">, DB;
def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
- "fcomip\t{$reg, %st(0)|%ST(0), $reg}">, DF;
+ "fcompi\t$reg">, DF;
// Floating point flag ops.
let Defs = [AX] in
@@ -604,8 +589,8 @@ let mayLoad = 1 in
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
(outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
-// Register free
-
+// FPU control instructions
+def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
"ffree\t$reg">, DD;
@@ -613,7 +598,8 @@ def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB;
-// Operandless floating-point instructions for the disassembler
+// Operandless floating-point instructions for the disassembler.
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9;
def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9;
@@ -639,8 +625,12 @@ def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE;
def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
"fxsave\t$dst", []>, TB;
+def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+ "fxsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
"fxrstor\t$src", []>, TB;
+def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstorq\t$src", []>, TB, REX_W, Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
index 79187e9..344c14c 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
@@ -39,7 +39,8 @@ def MRM_E8 : Format<39>;
def MRM_F0 : Format<40>;
def MRM_F8 : Format<41>;
def MRM_F9 : Format<42>;
-def RawFrmImm16 : Format<43>;
+def RawFrmImm8 : Format<43>;
+def RawFrmImm16 : Format<44>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -108,6 +109,7 @@ class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
class VEX_L { bit hasVEX_L = 1; }
+class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
@@ -123,6 +125,9 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
dag InOperandList = ins;
string AsmString = AsmStr;
+ // If this is a pseudo instruction, mark it isCodeGenOnly.
+ let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
//
// Attributes specific to X86 instructions...
//
@@ -130,17 +135,18 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
bits<4> Prefix = 0; // Which prefix byte does this inst have?
- bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
+ bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
bits<2> SegOvrBits = 0; // Segment override prefix.
Domain ExeDomain = d;
- bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix?
+ bit hasVEXPrefix = 0; // Does this inst require a VEX prefix?
bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
- bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field?
- bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register
+ bit hasVEX_4VPrefix = 0; // Does this inst require the VEX.VVVV field?
+ bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register
// to be encoded in a immediate field?
- bit hasVEX_L = 0; // Does this inst uses large (256-bit) registers?
+ bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
+ bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
@@ -159,6 +165,12 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{34} = hasVEX_4VPrefix;
let TSFlags{35} = hasVEX_i8ImmReg;
let TSFlags{36} = hasVEX_L;
+ let TSFlags{37} = has3DNow0F0FOpcode;
+}
+
+class PseudoI<dag oops, dag iops, list<dag> pattern>
+ : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+ let Pattern = pattern;
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 01149b6..5016c0f 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -15,51 +15,8 @@
// MMX Pattern Fragments
//===----------------------------------------------------------------------===//
-def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>;
-
-def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>;
-def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
-def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
-def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
-
-//===----------------------------------------------------------------------===//
-// MMX Masks
-//===----------------------------------------------------------------------===//
-
-// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
-// PSHUFW imm.
-def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShuffleSHUFImmediate(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
-def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
-def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
-def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
-def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
-}], MMX_SHUFFLE_get_shuf_imm>;
+def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
+def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>;
//===----------------------------------------------------------------------===//
// SSE specific DAG Nodes.
@@ -86,6 +43,21 @@ def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
+def X86pandn : SDNode<"X86ISD::PANDN",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignb : SDNode<"X86ISD::PSIGNB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignw : SDNode<"X86ISD::PSIGNW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86psignd : SDNode<"X86ISD::PSIGND",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86pblendv : SDNode<"X86ISD::PBLENDVB",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
@@ -102,7 +74,7 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS",
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
@@ -134,18 +106,12 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
-def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
- SDTCisInt<2>]>;
-
def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
-def X86PShufhwLd : SDNode<"X86ISD::PSHUFHW_LD", SDTShuff2OpLdI>;
-def X86PShuflwLd : SDNode<"X86ISD::PSHUFLW_LD", SDTShuff2OpLdI>;
-
def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
@@ -187,9 +153,11 @@ def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
// the top elements. These are used for the SSE 'ss' and 'sd' instruction
// forms.
def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+ SDNPWantRoot]>;
def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+ SDNPWantRoot]>;
def ssmem : Operand<v4f32> {
let PrintMethod = "printf32mem";
@@ -273,6 +241,7 @@ def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
// 256-bit memop pattern fragments
@@ -289,10 +258,7 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 8;
}]>;
-def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>;
-def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
-def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
-def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
+def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>;
// MOVNT Support
// Like 'store', but requires the non-temporal bit to be set
@@ -376,6 +342,18 @@ def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePALIGNRImmediate(N));
}]>;
+// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
+// to VEXTRACTF128 imm.
+def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
+ return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N));
+}]>;
+
+// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to
+// VINSERTF128 imm.
+def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
+ return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
+}]>;
+
def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
@@ -466,3 +444,16 @@ def palign : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_palign_imm>;
+
+def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
+ (extract_subvector node:$bigvec,
+ node:$index), [{
+ return X86::isVEXTRACTF128Index(N);
+}], EXTRACT_get_vextractf128_imm>;
+
+def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
+ node:$index),
+ (insert_subvector node:$bigvec, node:$smallvec,
+ node:$index), [{
+ return X86::isVINSERTF128Index(N);
+}], INSERT_get_vinsertf128_imm>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 5280940..ceb1b65 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -34,7 +34,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/MC/MCAsmInfo.h"
-
#include <limits>
using namespace llvm;
@@ -55,7 +54,11 @@ ReMatPICStubLoad("remat-pic-stub-load",
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
TM(tm), RI(tm, *this) {
- SmallVector<unsigned,16> AmbEntries;
+ enum {
+ TB_NOT_REVERSABLE = 1U << 31,
+ TB_FLAGS = TB_NOT_REVERSABLE
+ };
+
static const unsigned OpTbl2Addr[][2] = {
{ X86::ADC32ri, X86::ADC32mi },
{ X86::ADC32ri8, X86::ADC32mi8 },
@@ -65,13 +68,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::ADC64rr, X86::ADC64mr },
{ X86::ADD16ri, X86::ADD16mi },
{ X86::ADD16ri8, X86::ADD16mi8 },
+ { X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE },
+ { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE },
{ X86::ADD16rr, X86::ADD16mr },
+ { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE },
{ X86::ADD32ri, X86::ADD32mi },
{ X86::ADD32ri8, X86::ADD32mi8 },
+ { X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE },
+ { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE },
{ X86::ADD32rr, X86::ADD32mr },
+ { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE },
{ X86::ADD64ri32, X86::ADD64mi32 },
{ X86::ADD64ri8, X86::ADD64mi8 },
+ { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE },
+ { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE },
{ X86::ADD64rr, X86::ADD64mr },
+ { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE },
{ X86::ADD8ri, X86::ADD8mi },
{ X86::ADD8rr, X86::ADD8mr },
{ X86::AND16ri, X86::AND16mi },
@@ -216,16 +228,21 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
unsigned RegOp = OpTbl2Addr[i][0];
- unsigned MemOp = OpTbl2Addr[i][1];
- if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp,
- std::make_pair(MemOp,0))).second)
- assert(false && "Duplicated entries?");
+ unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
+ assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
+ RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
+
+ // If this is not a reversable operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
// Index 0, folded load and store, no alignment requirement.
unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
- if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
- std::make_pair(RegOp,
- AuxInfo))).second)
- AmbEntries.push_back(MemOp);
+
+ assert(!MemOp2RegOpTable.count(MemOp) &&
+ "Duplicated entries in unfolding maps?");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
// If the third value is 1, then it's folding either a load or a store.
@@ -252,8 +269,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::DIV64r, X86::DIV64m, 1, 0 },
{ X86::DIV8r, X86::DIV8m, 1, 0 },
{ X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
- { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 },
- { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 },
+ { X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 },
{ X86::IDIV16r, X86::IDIV16m, 1, 0 },
{ X86::IDIV32r, X86::IDIV32m, 1, 0 },
{ X86::IDIV64r, X86::IDIV64m, 1, 0 },
@@ -268,7 +285,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOV16rr, X86::MOV16mr, 0, 0 },
{ X86::MOV32ri, X86::MOV32mi, 0, 0 },
{ X86::MOV32rr, X86::MOV32mr, 0, 0 },
- { X86::MOV32rr_TC, X86::MOV32mr_TC, 0, 0 },
{ X86::MOV64ri32, X86::MOV64mi32, 0, 0 },
{ X86::MOV64rr, X86::MOV64mr, 0, 0 },
{ X86::MOV8ri, X86::MOV8mi, 0, 0 },
@@ -312,19 +328,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
- unsigned RegOp = OpTbl0[i][0];
- unsigned MemOp = OpTbl0[i][1];
- unsigned Align = OpTbl0[i][3];
- if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp,
- std::make_pair(MemOp,Align))).second)
- assert(false && "Duplicated entries?");
+ unsigned RegOp = OpTbl0[i][0];
+ unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS;
unsigned FoldedLoad = OpTbl0[i][2];
+ unsigned Align = OpTbl0[i][3];
+ assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
+ RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
+
+ // If this is not a reversable operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
// Index 0, folded load or store.
unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
- if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
- if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
- std::make_pair(RegOp, AuxInfo))).second)
- AmbEntries.push_back(MemOp);
+ assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
static const unsigned OpTbl1[][3] = {
@@ -342,8 +361,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
{ X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
{ X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
- { X86::FsMOVAPDrr, X86::MOVSDrm, 0 },
- { X86::FsMOVAPSrr, X86::MOVSSrm, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 },
+ { X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 },
{ X86::IMUL16rri, X86::IMUL16rmi, 0 },
{ X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
{ X86::IMUL32rri, X86::IMUL32rmi, 0 },
@@ -360,8 +379,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 },
{ X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 },
{ X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
- { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 },
- { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 },
+ { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
+ { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
{ X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
{ X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
{ X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
@@ -370,8 +389,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
{ X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 },
- { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 },
- { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 },
+ { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 },
+ { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 },
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
{ X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
{ X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
@@ -380,7 +399,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
{ X86::MOV16rr, X86::MOV16rm, 0 },
{ X86::MOV32rr, X86::MOV32rm, 0 },
- { X86::MOV32rr_TC, X86::MOV32rm_TC, 0 },
{ X86::MOV64rr, X86::MOV64rm, 0 },
{ X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
{ X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
@@ -439,25 +457,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
unsigned RegOp = OpTbl1[i][0];
- unsigned MemOp = OpTbl1[i][1];
+ unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS;
unsigned Align = OpTbl1[i][2];
- if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp,
- std::make_pair(MemOp,Align))).second)
- assert(false && "Duplicated entries?");
+ assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
+ RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
+
+ // If this is not a reversable operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
// Index 1, folded load
unsigned AuxInfo = 1 | (1 << 4);
- if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
- if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
- std::make_pair(RegOp, AuxInfo))).second)
- AmbEntries.push_back(MemOp);
+ assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
static const unsigned OpTbl2[][3] = {
{ X86::ADC32rr, X86::ADC32rm, 0 },
{ X86::ADC64rr, X86::ADC64rm, 0 },
{ X86::ADD16rr, X86::ADD16rm, 0 },
+ { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
{ X86::ADD32rr, X86::ADD32rm, 0 },
+ { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
{ X86::ADD64rr, X86::ADD64rm, 0 },
+ { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
{ X86::ADD8rr, X86::ADD8rm, 0 },
{ X86::ADDPDrr, X86::ADDPDrm, 16 },
{ X86::ADDPSrr, X86::ADDPSrm, 16 },
@@ -652,20 +676,23 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
unsigned RegOp = OpTbl2[i][0];
- unsigned MemOp = OpTbl2[i][1];
+ unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
unsigned Align = OpTbl2[i][2];
- if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp,
- std::make_pair(MemOp,Align))).second)
- assert(false && "Duplicated entries?");
+
+ assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
+ RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
+
+ // If this is not a reversable operation (because there is a many->one)
+ // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+ if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
+ continue;
+
// Index 2, folded load
unsigned AuxInfo = 2 | (1 << 4);
- if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
- std::make_pair(RegOp, AuxInfo))).second)
- AmbEntries.push_back(MemOp);
+ assert(!MemOp2RegOpTable.count(MemOp) &&
+ "Duplicated entries in unfolding maps?");
+ MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
}
-
- // Remove ambiguous entries.
- assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
}
bool
@@ -745,9 +772,7 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::MOV8rm:
case X86::MOV16rm:
case X86::MOV32rm:
- case X86::MOV32rm_TC:
case X86::MOV64rm:
- case X86::MOV64rm_TC:
case X86::LD_Fp64m:
case X86::MOVSSrm:
case X86::MOVSDrm:
@@ -768,9 +793,7 @@ static bool isFrameStoreOpcode(int Opcode) {
case X86::MOV8mr:
case X86::MOV16mr:
case X86::MOV32mr:
- case X86::MOV32mr_TC:
case X86::MOV64mr:
- case X86::MOV64mr_TC:
case X86::ST_FpP64m:
case X86::MOVSSmr:
case X86::MOVSDmr:
@@ -785,7 +808,7 @@ static bool isFrameStoreOpcode(int Opcode) {
return false;
}
-unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameLoadOpcode(MI->getOpcode()))
if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
@@ -793,7 +816,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return 0;
}
-unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameLoadOpcode(MI->getOpcode())) {
unsigned Reg;
@@ -923,10 +946,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
isPICBase = true;
}
return isPICBase;
- }
+ }
return false;
}
-
+
case X86::LEA32r:
case X86::LEA64r: {
if (MI->getOperand(2).isImm() &&
@@ -1099,11 +1122,11 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
? X86::LEA64_32r : X86::LEA32r;
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
- unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-
+
// Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
+ // well be shifting and then extracting the lower 16-bits.
// This has the potential to cause partial register stall. e.g.
// movw (%rbp,%rcx,2), %dx
// leal -65(%rdx), %esi
@@ -1137,9 +1160,12 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
break;
case X86::ADD16ri:
case X86::ADD16ri8:
- addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
break;
- case X86::ADD16rr: {
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
unsigned leaInReg2 = 0;
@@ -1149,9 +1175,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
// just a single insert_subreg.
addRegReg(MIB, leaInReg, true, leaInReg, false);
} else {
- leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
+ // well be shifting and then extracting the lower 16-bits.
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
InsMI2 =
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
@@ -1218,7 +1244,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::SHUFPSrri: {
assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
-
+
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
if (B != C) return 0;
@@ -1236,6 +1262,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass))
+ return 0;
+
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
@@ -1250,6 +1281,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
+ // LEA can't handle ESP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass))
+ return 0;
+
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
@@ -1288,6 +1324,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src,
+ MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass :
+ X86::GR32_NOSPRegisterClass))
+ return 0;
+
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
@@ -1310,6 +1354,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src) &&
+ !MF.getRegInfo().constrainRegClass(Src,
+ MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass :
+ X86::GR32_NOSPRegisterClass))
+ return 0;
+
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
@@ -1327,12 +1378,29 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
Src, isKill, -1);
break;
case X86::ADD64rr:
- case X86::ADD32rr: {
+ case X86::ADD64rr_DB:
+ case X86::ADD32rr:
+ case X86::ADD32rr_DB: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r
- : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ unsigned Opc;
+ TargetRegisterClass *RC;
+ if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
+ Opc = X86::LEA64r;
+ RC = X86::GR64_NOSPRegisterClass;
+ } else {
+ Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ RC = X86::GR32_NOSPRegisterClass;
+ }
+
+
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
+
+ // LEA can't handle RSP.
+ if (TargetRegisterInfo::isVirtualRegister(Src2) &&
+ !MF.getRegInfo().constrainRegClass(Src2, RC))
+ return 0;
+
NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
@@ -1341,7 +1409,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LV->replaceKillInstruction(Src2, MI, NewMI);
break;
}
- case X86::ADD16rr: {
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB: {
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
@@ -1357,6 +1426,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::ADD64ri32:
case X86::ADD64ri8:
+ case X86::ADD64ri32_DB:
+ case X86::ADD64ri8_DB:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
.addReg(Dest, RegState::Define |
@@ -1364,7 +1435,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
Src, isKill, MI->getOperand(2).getImm());
break;
case X86::ADD32ri:
- case X86::ADD32ri8: {
+ case X86::ADD32ri8:
+ case X86::ADD32ri_DB:
+ case X86::ADD32ri8_DB: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
@@ -1375,6 +1448,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::ADD16ri:
case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
@@ -1396,7 +1471,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LV->replaceKillInstruction(Dest, MI, NewMI);
}
- MFI->insert(MBBI, NewMI); // Insert the new inst
+ MFI->insert(MBBI, NewMI); // Insert the new inst
return NewMI;
}
@@ -1617,7 +1692,7 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
const TargetInstrDesc &TID = MI->getDesc();
if (!TID.isTerminator()) return false;
-
+
// Conditional branch is a special case.
if (TID.isBranch() && !TID.isBarrier())
return true;
@@ -1626,7 +1701,7 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
return !isPredicated(MI);
}
-bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
@@ -1787,7 +1862,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
I = MBB.end();
++Count;
}
-
+
return Count;
}
@@ -1945,13 +2020,23 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
default:
llvm_unreachable("Unknown regclass");
case X86::GR64RegClassID:
+ case X86::GR64_ABCDRegClassID:
+ case X86::GR64_NOREXRegClassID:
+ case X86::GR64_NOREX_NOSPRegClassID:
case X86::GR64_NOSPRegClassID:
+ case X86::GR64_TCRegClassID:
+ case X86::GR64_TCW64RegClassID:
return load ? X86::MOV64rm : X86::MOV64mr;
case X86::GR32RegClassID:
- case X86::GR32_NOSPRegClassID:
+ case X86::GR32_ABCDRegClassID:
case X86::GR32_ADRegClassID:
+ case X86::GR32_NOREXRegClassID:
+ case X86::GR32_NOSPRegClassID:
+ case X86::GR32_TCRegClassID:
return load ? X86::MOV32rm : X86::MOV32mr;
case X86::GR16RegClassID:
+ case X86::GR16_ABCDRegClassID:
+ case X86::GR16_NOREXRegClassID:
return load ? X86::MOV16rm : X86::MOV16mr;
case X86::GR8RegClassID:
// Copying to or from a physical H register on x86-64 requires a NOREX
@@ -1961,32 +2046,14 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
else
return load ? X86::MOV8rm : X86::MOV8mr;
- case X86::GR64_ABCDRegClassID:
- return load ? X86::MOV64rm : X86::MOV64mr;
- case X86::GR32_ABCDRegClassID:
- return load ? X86::MOV32rm : X86::MOV32mr;
- case X86::GR16_ABCDRegClassID:
- return load ? X86::MOV16rm : X86::MOV16mr;
case X86::GR8_ABCD_LRegClassID:
+ case X86::GR8_NOREXRegClassID:
return load ? X86::MOV8rm :X86::MOV8mr;
case X86::GR8_ABCD_HRegClassID:
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
else
return load ? X86::MOV8rm : X86::MOV8mr;
- case X86::GR64_NOREXRegClassID:
- case X86::GR64_NOREX_NOSPRegClassID:
- return load ? X86::MOV64rm : X86::MOV64mr;
- case X86::GR32_NOREXRegClassID:
- return load ? X86::MOV32rm : X86::MOV32mr;
- case X86::GR16_NOREXRegClassID:
- return load ? X86::MOV16rm : X86::MOV16mr;
- case X86::GR8_NOREXRegClassID:
- return load ? X86::MOV8rm : X86::MOV8mr;
- case X86::GR64_TCRegClassID:
- return load ? X86::MOV64rm_TC : X86::MOV64mr_TC;
- case X86::GR32_TCRegClassID:
- return load ? X86::MOV32rm_TC : X86::MOV32mr_TC;
case X86::RFP80RegClassID:
return load ? X86::LD_Fp80m : X86::ST_FpP80m;
case X86::RFP64RegClassID:
@@ -2085,76 +2152,6 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
NewMIs.push_back(MIB);
}
-bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL = MBB.findDebugLoc(MI);
-
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
- bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
- unsigned SlotSize = is64Bit ? 8 : 4;
-
- MachineFunction &MF = *MBB.getParent();
- unsigned FPReg = RI.getFrameRegister(MF);
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- unsigned CalleeFrameSize = 0;
-
- unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
- for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
- // Add the callee-saved register as live-in. It's killed at the spill.
- MBB.addLiveIn(Reg);
- if (Reg == FPReg)
- // X86RegisterInfo::emitPrologue will handle spilling of frame register.
- continue;
- if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
- CalleeFrameSize += SlotSize;
- BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
- } else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
- RC, &RI);
- }
- }
-
- X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
- return true;
-}
-
-bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- DebugLoc DL = MBB.findDebugLoc(MI);
-
- MachineFunction &MF = *MBB.getParent();
- unsigned FPReg = RI.getFrameRegister(MF);
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
- bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
- unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (Reg == FPReg)
- // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
- continue;
- if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
- BuildMI(MBB, MI, DL, get(Opc), Reg);
- } else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
- RC, &RI);
- }
- }
- return true;
-}
-
MachineInstr*
X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -2181,7 +2178,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
MIB.addOperand(MOs[i]);
if (NumAddrOps < 4) // FrameIndex only
addOffset(MIB, 0);
-
+
// Loop over the rest of the ri operands, converting them over.
unsigned NumOps = MI->getDesc().getNumOperands()-2;
for (unsigned i = 0; i != NumOps; ++i) {
@@ -2202,7 +2199,7 @@ static MachineInstr *FuseInst(MachineFunction &MF,
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
MI->getDebugLoc(), true);
MachineInstrBuilder MIB(NewMI);
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (i == OpNo) {
@@ -2238,7 +2235,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI, unsigned i,
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Align) const {
- const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
+ const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
bool isTwoAddrFold = false;
unsigned NumOps = MI->getDesc().getNumOperands();
bool isTwoAddr = NumOps > 1 &&
@@ -2251,7 +2248,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
if (isTwoAddr && NumOps >= 2 && i < 2 &&
MI->getOperand(0).isReg() &&
MI->getOperand(1).isReg() &&
- MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
isTwoAddrFold = true;
} else if (i == 0) { // If operand 0
@@ -2265,19 +2262,19 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
if (NewMI)
return NewMI;
-
+
OpcodeTablePtr = &RegOp2MemOpTable0;
} else if (i == 1) {
OpcodeTablePtr = &RegOp2MemOpTable1;
} else if (i == 2) {
OpcodeTablePtr = &RegOp2MemOpTable2;
}
-
+
// If table selected...
if (OpcodeTablePtr) {
// Find the Opcode to fuse
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- OpcodeTablePtr->find((unsigned*)MI->getOpcode());
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ OpcodeTablePtr->find(MI->getOpcode());
if (I != OpcodeTablePtr->end()) {
unsigned Opcode = I->second.first;
unsigned MinAlign = I->second.second;
@@ -2320,8 +2317,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return NewMI;
}
}
-
- // No fusion
+
+ // No fusion
if (PrintFailedFusing && !MI->isCopy())
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
return NULL;
@@ -2332,7 +2329,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const {
- // Check switch flag
+ // Check switch flag
if (NoFusing) return NULL;
if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
@@ -2343,8 +2340,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
case X86::Int_CVTSS2SDrr:
case X86::RCPSSr:
case X86::RCPSSr_Int:
- case X86::ROUNDSDr_Int:
- case X86::ROUNDSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
@@ -2384,7 +2381,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const {
- // Check switch flag
+ // Check switch flag
if (NoFusing) return NULL;
if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
@@ -2395,8 +2392,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
case X86::Int_CVTSS2SDrr:
case X86::RCPSSr:
case X86::RCPSSr_Int:
- case X86::ROUNDSDr_Int:
- case X86::ROUNDSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
@@ -2424,9 +2421,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = 16;
break;
case X86::FsFLD0SD:
+ case X86::VFsFLD0SD:
Alignment = 8;
break;
case X86::FsFLD0SS:
+ case X86::VFsFLD0SS:
Alignment = 4;
break;
default:
@@ -2490,9 +2489,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineConstantPool &MCP = *MF.getConstantPool();
const Type *Ty;
unsigned Opc = LoadMI->getOpcode();
- if (Opc == X86::FsFLD0SS)
+ if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (Opc == X86::FsFLD0SD)
+ else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
@@ -2525,13 +2524,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const {
- // Check switch flag
+ // Check switch flag
if (NoFusing) return 0;
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
switch (MI->getOpcode()) {
default: return false;
- case X86::TEST8rr:
+ case X86::TEST8rr:
case X86::TEST16rr:
case X86::TEST32rr:
case X86::TEST64rr:
@@ -2551,16 +2550,15 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
// replacing the *two* registers with the memory location.
- const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
- if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
+ const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
} else if (OpNum == 0) { // If operand 0
switch (Opc) {
case X86::MOV8r0:
case X86::MOV16r0:
case X86::MOV32r0:
- case X86::MOV64r0:
- return true;
+ case X86::MOV64r0: return true;
default: break;
}
OpcodeTablePtr = &RegOp2MemOpTable0;
@@ -2569,22 +2567,17 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
} else if (OpNum == 2) {
OpcodeTablePtr = &RegOp2MemOpTable2;
}
-
- if (OpcodeTablePtr) {
- // Find the Opcode to fuse
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- OpcodeTablePtr->find((unsigned*)Opc);
- if (I != OpcodeTablePtr->end())
- return true;
- }
+
+ if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
+ return true;
return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
}
bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(MI->getOpcode());
if (I == MemOp2RegOpTable.end())
return false;
unsigned Opc = I->second.first;
@@ -2644,7 +2637,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
// Emit the data processing instruction.
MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true);
MachineInstrBuilder MIB(DataMI);
-
+
if (FoldedStore)
MIB.addReg(Reg, RegState::Define);
for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
@@ -2712,8 +2705,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
if (!N->isMachineOpcode())
return false;
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(N->getMachineOpcode());
if (I == MemOp2RegOpTable.end())
return false;
unsigned Opc = I->second.first;
@@ -2813,8 +2806,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex) const {
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
- MemOp2RegOpTable.find((unsigned*)Opc);
+ DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+ MemOp2RegOpTable.find(Opc);
if (I == MemOp2RegOpTable.end())
return 0;
bool FoldedLoad = I->second.second & (1 << 4);
@@ -2993,6 +2986,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
+ case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
return true;
}
return false;
@@ -3090,6 +3085,41 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ switch (DefMI->getOpcode()) {
+ default: return false;
+ case X86::DIVSDrm:
+ case X86::DIVSDrm_Int:
+ case X86::DIVSDrr:
+ case X86::DIVSDrr_Int:
+ case X86::DIVSSrm:
+ case X86::DIVSSrm_Int:
+ case X86::DIVSSrr:
+ case X86::DIVSSrr_Int:
+ case X86::SQRTPDm:
+ case X86::SQRTPDm_Int:
+ case X86::SQRTPDr:
+ case X86::SQRTPDr_Int:
+ case X86::SQRTPSm:
+ case X86::SQRTPSm_Int:
+ case X86::SQRTPSr:
+ case X86::SQRTPSr_Int:
+ case X86::SQRTSDm:
+ case X86::SQRTSDm_Int:
+ case X86::SQRTSDr:
+ case X86::SQRTSDr_Int:
+ case X86::SQRTSSm:
+ case X86::SQRTSSm_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ return true;
+ }
+}
+
namespace {
/// CGBR - Create Global Base Reg pass. This initializes the PIC
/// global base register for x86-32.
@@ -3108,6 +3138,13 @@ namespace {
if (TM->getRelocationModel() != Reloc::PIC_)
return false;
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+
+ // If we didn't need a GlobalBaseReg, don't insert code.
+ if (GlobalBaseReg == 0)
+ return false;
+
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = MF.front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
@@ -3119,16 +3156,15 @@ namespace {
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
else
- PC = TII->getGlobalBaseReg(&MF);
-
+ PC = GlobalBaseReg;
+
// Operand of MovePCtoStack is completely ignored by asm printer. It's
// only used in JIT code emission as displacement to pc.
BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
-
+
// If we're using vanilla 'GOT' PIC style, we should use relative addressing
// not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
- unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF);
// Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
.addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index f336206..1d44207 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -174,7 +174,7 @@ namespace X86II {
/// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
/// reference is actually to the "FOO$stub" symbol. This is used for calls
- /// and jumps to external functions on Tiger and before.
+ /// and jumps to external functions on Tiger and earlier.
MO_DARWIN_STUB,
/// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
@@ -311,12 +311,17 @@ namespace X86II {
MRM_F0 = 40,
MRM_F8 = 41,
MRM_F9 = 42,
+
+ /// RawFrmImm8 - This is used for the ENTER instruction, which has two
+ /// immediates, the first of which is a 16-bit immediate (specified by
+ /// the imm encoding) and the second is a 8-bit fixed value.
+ RawFrmImm8 = 43,
/// RawFrmImm16 - This is used for CALL FAR instructions, which have two
/// immediates, the first of which is a 16 or 32-bit immediate (specified by
/// the imm encoding) and the second is a 16-bit fixed value. In the AMD
/// manual, this operand is described as pntr16:32 and pntr16:16
- RawFrmImm16 = 43,
+ RawFrmImm16 = 44,
FormMask = 63,
@@ -444,28 +449,36 @@ namespace X86II {
OpcodeMask = 0xFF << OpcodeShift,
//===------------------------------------------------------------------===//
- // VEX - The opcode prefix used by AVX instructions
+ /// VEX - The opcode prefix used by AVX instructions
VEX = 1U << 0,
- // VEX_W - Has a opcode specific functionality, but is used in the same
- // way as REX_W is for regular SSE instructions.
+ /// VEX_W - Has a opcode specific functionality, but is used in the same
+ /// way as REX_W is for regular SSE instructions.
VEX_W = 1U << 1,
- // VEX_4V - Used to specify an additional AVX/SSE register. Several 2
- // address instructions in SSE are represented as 3 address ones in AVX
- // and the additional register is encoded in VEX_VVVV prefix.
+ /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+ /// address instructions in SSE are represented as 3 address ones in AVX
+ /// and the additional register is encoded in VEX_VVVV prefix.
VEX_4V = 1U << 2,
- // VEX_I8IMM - Specifies that the last register used in a AVX instruction,
- // must be encoded in the i8 immediate field. This usually happens in
- // instructions with 4 operands.
+ /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+ /// must be encoded in the i8 immediate field. This usually happens in
+ /// instructions with 4 operands.
VEX_I8IMM = 1U << 3,
- // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
- // instruction uses 256-bit wide registers. This is usually auto detected if
- // a VR256 register is used, but some AVX instructions also have this field
- // marked when using a f256 memory references.
- VEX_L = 1U << 4
+ /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+ /// instruction uses 256-bit wide registers. This is usually auto detected
+ /// if a VR256 register is used, but some AVX instructions also have this
+ /// field marked when using a f256 memory references.
+ VEX_L = 1U << 4,
+
+ /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
+ /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
+ /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
+ /// storing a classifier in the imm8 field. To simplify our implementation,
+ /// we handle this by storeing the classifier in the opcode field and using
+ /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
+ Has3DNow0F0FOpcode = 1U << 5
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -528,6 +541,7 @@ namespace X86II {
case X86II::AddRegFrm:
case X86II::MRMDestReg:
case X86II::MRMSrcReg:
+ case X86II::RawFrmImm8:
case X86II::RawFrmImm16:
return -1;
case X86II::MRMDestMem:
@@ -599,14 +613,14 @@ class X86InstrInfo : public TargetInstrInfoImpl {
/// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
/// RegOp2MemOpTable2 - Load / store folding opcode maps.
///
- DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
- DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
- DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
- DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
+ DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
+ DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
+ DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
+ DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
- DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+ DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -728,17 +742,6 @@ public:
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
- virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
- virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
virtual
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx, uint64_t Offset,
@@ -845,18 +848,23 @@ public:
/// SetSSEDomain - Set the SSEDomain of MI.
void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
+ MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ unsigned OpNum,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ unsigned Size, unsigned Alignment) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- unsigned OpNum,
- const SmallVectorImpl<MachineOperand> &MOs,
- unsigned Size, unsigned Alignment) const;
-
/// isFrameOperand - Return true and the FrameIndex if the specified
/// operand and follow operands form a reference to the stack frame.
bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index 09b7721..87dc4be 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -1,10 +1,10 @@
-//===----------------------------------------------------------------------===//
-//
+//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 instruction set, defining the instructions, and
@@ -35,6 +35,20 @@ def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
+// RES1, RES2, FLAGS = op LHS, RHS
+def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
def SDTX86BrCond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>,
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
@@ -46,7 +60,7 @@ def SDTX86SetCC_C : SDTypeProfile<1, 2,
[SDTCisInt<0>,
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
-def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
SDTCisVT<2, i8>]>;
def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -64,6 +78,12 @@ def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
SDTCisVT<1, iPTR>,
SDTCisVT<2, iPTR>]>;
+def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i8>,
+ SDTCisVT<4, i32>]>;
+
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
def SDTX86Void : SDTypeProfile<0, 0, []>;
@@ -72,9 +92,7 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-
-def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
+def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
@@ -110,82 +128,85 @@ def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
- SDNPMayLoad]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
- SDNPMayLoad]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
+ [SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
- [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def X86vastart_save_xmm_regs :
SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
SDT_X86VASTART_SAVE_XMM_REGS,
[SDNPHasChain, SDNPVariadic]>;
-
+def X86vaarg64 :
+ SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
def X86callseq_start :
SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def X86callseq_end :
SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
- [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>;
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>;
def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad]>;
def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
- [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>;
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-def X86SegmentBaseAddress : SDNode<"X86ISD::SegmentBaseAddress",
- SDT_X86SegmentBaseAddress, []>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
[SDNPHasChain]>;
-def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
- [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
-def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
[SDNPCommutative]>;
-
+def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
+def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
+
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
@@ -197,11 +218,11 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
-def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void,
- [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
-
+def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
- []>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
@@ -252,6 +273,10 @@ def i8mem_NOREX : Operand<i64> {
let ParserMatchClass = X86MemAsmOperand;
}
+// GPRs available for tailcall.
+// It represents GR64_TC or GR64_TCW64.
+def ptr_rc_tailcall : PointerLikeRegClass<2>;
+
// Special i32mem for addresses of load folding tail calls. These are not
// allowed to use callee-saved registers since they must be scheduled
// after callee-saved register are popped.
@@ -261,6 +286,15 @@ def i32mem_TC : Operand<i32> {
let ParserMatchClass = X86MemAsmOperand;
}
+// Special i64mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i64mem_TC : Operand<i64> {
+ let PrintMethod = "printi64mem";
+ let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
+ ptr_rc_tailcall, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
let ParserMatchClass = X86AbsMemAsmOperand,
PrintMethod = "print_pcrel_imm" in {
@@ -332,43 +366,77 @@ def i32i8imm : Operand<i32> {
let ParserMatchClass = ImmSExti32i8AsmOperand;
}
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i32AsmOperand;
+}
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+ let PrintMethod = "print_pcrel_imm";
+ let ParserMatchClass = X86AbsMemAsmOperand;
+}
+
+// 64-bits but only 8 bits are significant.
+def i64i8imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i8AsmOperand;
+}
+
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let AsmOperandLowerMethod = "lower_lea64_32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+
//===----------------------------------------------------------------------===//
// X86 Complex Pattern Definitions.
//
// Define X86 specific addressing mode.
-def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
+def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex],
[]>;
def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
+def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+ [add, sub, mul, X86mul_imm, shl, or, frameindex,
+ X86WrapperRIP], []>;
+
+def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
def HasCMov : Predicate<"Subtarget->hasCMov()">;
def NoCMov : Predicate<"!Subtarget->hasCMov()">;
-// FIXME: temporary hack to let codegen assert or generate poor code in case
-// no AVX version of the desired intructions is present, this is better for
-// incremental dev (without fallbacks it's easier to spot what's missing)
-def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">;
-def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
-def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
-def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
-def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
-def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
-def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
-def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">;
+def HasMMX : Predicate<"Subtarget->hasMMX()">;
+def Has3DNow : Predicate<"Subtarget->has3DNow()">;
+def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
+def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
+
+def HasAES : Predicate<"Subtarget->hasAES()">;
def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
-def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
-def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
-def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
-def In64BitMode : Predicate<"Subtarget->is64Bit()">;
+def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
+def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
+def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate;
+def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
@@ -383,7 +451,6 @@ def OptForSize : Predicate<"OptForSize">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
-def HasAES : Predicate<"Subtarget->hasAES()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@@ -418,40 +485,24 @@ def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
def i16immSExt8 : PatLeaf<(i16 immSext8)>;
def i32immSExt8 : PatLeaf<(i32 immSext8)>;
-
-/// Load patterns: these constraint the match to the right address space.
-def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
-}]>;
-
-def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- return PT->getAddressSpace() == 256;
- return false;
+def i64immSExt8 : PatLeaf<(i64 immSext8)>;
+def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
+def i64immZExt32 : PatLeaf<(i64 imm), [{
+ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // unsignedsign extended field.
+ return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
}]>;
-def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- return PT->getAddressSpace() == 257;
- return false;
+def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{
+ uint64_t v = N->getZExtValue();
+ return v == (uint32_t)v && (int32_t)v == (int8_t)v;
}]>;
-
// Helper fragments for loads.
// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
// known to be 32-bit aligned or better. Ditto for i8 to i16.
def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (const Value *Src = LD->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD)
return true;
@@ -462,10 +513,6 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (const Value *Src = LD->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::EXTLOAD)
return LD->getAlignment() >= 2 && !LD->isVolatile();
@@ -474,10 +521,6 @@ def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (const Value *Src = LD->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD)
return true;
@@ -486,15 +529,18 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
return false;
}]>;
-def loadi8 : PatFrag<(ops node:$ptr), (i8 (dsload node:$ptr))>;
-def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>;
-def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>;
-def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>;
-def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>;
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>;
def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
@@ -502,6 +548,10 @@ def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>;
def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
@@ -509,6 +559,10 @@ def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
// An 'and' node with a single use.
@@ -524,66 +578,10 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
return N->hasOneUse();
}]>;
-// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
-def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
-
- unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
- APInt Mask = APInt::getAllOnesValue(BitWidth);
- APInt KnownZero0, KnownOne0;
- CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
- APInt KnownZero1, KnownOne1;
- CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
- return (~KnownZero0 & ~KnownZero1) == 0;
-}]>;
-
//===----------------------------------------------------------------------===//
-// Instruction list...
+// Instruction list.
//
-// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
-// a stack adjustment and the codegen must know that they may modify the stack
-// pointer before prolog-epilog rewriting occurs.
-// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
-// sub / add which can clobber EFLAGS.
-let Defs = [ESP, EFLAGS], Uses = [ESP] in {
-def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
- "#ADJCALLSTACKDOWN",
- [(X86callseq_start timm:$amt)]>,
- Requires<[In32BitMode]>;
-def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKUP",
- [(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[In32BitMode]>;
-}
-
-// x86-64 va_start lowering magic.
-let usesCustomInserter = 1 in {
-def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
- (outs),
- (ins GR8:$al,
- i64imm:$regsavefi, i64imm:$offset,
- variable_ops),
- "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
- [(X86vastart_save_xmm_regs GR8:$al,
- imm:$regsavefi,
- imm:$offset)]>;
-
-// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets. Calls
-// to _alloca is needed to probe the stack when allocating more than 4k bytes in
-// one go. Touching the stack at 4K increments is necessary to ensure that the
-// guard pages used by the OS virtual memory manager are allocated in correct
-// sequence.
-// The main point of having separate instruction are extra unmodelled effects
-// (compared to ordinary calls) like stack pointer change.
-
-let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
- def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
- "# dynamic stack allocation",
- [(X86MingwAlloca)]>;
-}
-
// Nop
let neverHasSideEffects = 1 in {
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
@@ -593,206 +591,22 @@ let neverHasSideEffects = 1 in {
"nop{l}\t$zero", []>, TB;
}
-// Trap
-let Uses = [EFLAGS] in {
- def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
-}
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
- [(int_x86_int (i8 3))]>;
-// FIXME: need to make sure that "int $3" matches int3
-def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
- [(int_x86_int imm:$trap)]>;
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
-def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>;
-
-// PIC base construction. This expands to code that looks like this:
-// call $next_inst
-// popl %destreg"
-let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
- def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
- "", []>;
-
-//===----------------------------------------------------------------------===//
-// Control Flow Instructions.
-//
-
-// Return instructions.
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1, FPForm = SpecialFP in {
- def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
- "ret",
- [(X86retflag 0)]>;
- def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
- "ret\t$amt",
- [(X86retflag timm:$amt)]>;
- def LRET : I <0xCB, RawFrm, (outs), (ins),
- "lret", []>;
- def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "lret\t$amt", []>;
-}
-
-// Unconditional branches.
-let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
- def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
- "jmp\t$dst", [(br bb:$dst)]>;
- def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
- "jmp\t$dst", []>;
-}
-
-// Conditional Branches.
-let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
- multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
- def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
- def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
- [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
- }
-}
-
-defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
-defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
-defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
-defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
-defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
-defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
-defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
-defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
-defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
-defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
-defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
-defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
-defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
-defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
-defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
-defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
-
-// FIXME: What about the CX/RCX versions of this instruction?
-let Uses = [ECX], isBranch = 1, isTerminator = 1 in
- def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jcxz\t$dst", []>;
-
-
-// Indirect branches
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
- def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
- [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
- def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
- [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
-
- def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
- (ins i16imm:$off, i16imm:$seg),
- "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
- def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
- (ins i32imm:$off, i16imm:$seg),
- "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
-
- def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
- "ljmp{w}\t{*}$dst", []>, OpSize;
- def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
- "ljmp{l}\t{*}$dst", []>;
-}
-
-
-// Loop instructions
-
-def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
-def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
-def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
-
-//===----------------------------------------------------------------------===//
-// Call Instructions...
-//
-let isCall = 1 in
- // All calls clobber the non-callee saved registers. ESP is marked as
- // a use to prevent stack-pointer assignments that appear immediately
- // before calls from potentially appearing dead. Uses for argument
- // registers are added manually.
- let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in {
- def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
- (outs), (ins i32imm_pcrel:$dst,variable_ops),
- "call\t$dst", []>;
- def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
- "call\t{*}$dst", [(X86call GR32:$dst)]>;
- def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
- "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
-
- def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
- (ins i16imm:$off, i16imm:$seg),
- "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
- def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
- (ins i32imm:$off, i16imm:$seg),
- "lcall{l}\t{$seg, $off|$off, $seg}", []>;
-
- def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
- "lcall{w}\t{*}$dst", []>, OpSize;
- def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
- "lcall{l}\t{*}$dst", []>;
-
- // callw for 16 bit code for the assembler.
- let isAsmParserOnly = 1 in
- def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
- (outs), (ins i16imm_pcrel:$dst, variable_ops),
- "callw\t$dst", []>, OpSize;
- }
// Constructing a stack frame.
+def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
+ "enter\t$len, $lvl", []>;
-def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
- "enter\t$len, $lvl", []>;
-
-// Tail call stuff.
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
- isCodeGenOnly = 1 in
- let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in {
- def TCRETURNdi : I<0, Pseudo, (outs),
- (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
- def TCRETURNri : I<0, Pseudo, (outs),
- (ins GR32_TC:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
- let mayLoad = 1 in
- def TCRETURNmi : I<0, Pseudo, (outs),
- (ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
- "#TC_RETURN $dst $offset", []>;
-
- // FIXME: The should be pseudo instructions that are lowered when going to
- // mcinst.
- def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
- (ins i32imm_pcrel:$dst, variable_ops),
- "jmp\t$dst # TAILCALL",
- []>;
- def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
- "", []>; // FIXME: Remove encoding when JIT is dead.
- let mayLoad = 1 in
- def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
- "jmp{l}\t{*}$dst # TAILCALL", []>;
-}
-
-//===----------------------------------------------------------------------===//
-// Miscellaneous Instructions...
-//
let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
def LEAVE : I<0xC9, RawFrm,
(outs), (ins), "leave", []>, Requires<[In32BitMode]>;
-def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
-let mayLoad = 1 in
-def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
-def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
-def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+def LEAVE64 : I<0xC9, RawFrm,
+ (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
let mayLoad = 1 in {
@@ -805,6 +619,10 @@ def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>,
OpSize;
def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>;
+
+def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+ Requires<[In32BitMode]>;
}
let mayStore = 1 in {
@@ -817,29 +635,54 @@ def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>,
OpSize;
def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
-}
-}
-let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
+def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
"push{l}\t$imm", []>;
-def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
"push{w}\t$imm", []>, OpSize;
-def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
"push{l}\t$imm", []>;
-}
-let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in {
-def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
-def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
- Requires<[In32BitMode]>;
-}
-let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in {
def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
Requires<[In32BitMode]>;
+
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP64r : I<0x58, AddRegFrm,
+ (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
+}
+let mayStore = 1 in {
+def PUSH64r : I<0x50, AddRegFrm,
+ (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
+}
}
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
+ "push{q}\t$imm", []>;
+def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{q}\t$imm", []>;
+def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
+ "push{q}\t$imm", []>;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+ Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+ Requires<[In64BitMode]>;
+
+
+
let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
mayLoad=1, neverHasSideEffects=1 in {
def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>,
@@ -851,12 +694,16 @@ def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>,
Requires<[In32BitMode]>;
}
-let Uses = [EFLAGS], Constraints = "$src = $dst" in // GR32 = bswap GR32
- def BSWAP32r : I<0xC8, AddRegFrm,
- (outs GR32:$dst), (ins GR32:$src),
- "bswap{l}\t$dst",
- [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+let Constraints = "$src = $dst" in { // GR32 = bswap GR32
+def BSWAP32r : I<0xC8, AddRegFrm,
+ (outs GR32:$dst), (ins GR32:$src),
+ "bswap{l}\t$dst",
+ [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+ "bswap{q}\t$dst",
+ [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+} // Constraints = "$src = $dst"
// Bit scan instructions.
let Defs = [EFLAGS] in {
@@ -873,6 +720,12 @@ def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB;
+def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
+def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
@@ -887,44 +740,23 @@ def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB;
+def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
+def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
} // Defs = [EFLAGS]
-let neverHasSideEffects = 1 in
-def LEA16r : I<0x8D, MRMSrcMem,
- (outs GR16:$dst), (ins i32mem:$src),
- "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
-let isReMaterializable = 1 in
-def LEA32r : I<0x8D, MRMSrcMem,
- (outs GR32:$dst), (ins i32mem:$src),
- "lea{l}\t{$src|$dst}, {$dst|$src}",
- [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
-
-let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
-def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
- [(X86rep_movs i8)]>, REP;
-def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
- [(X86rep_movs i16)]>, REP, OpSize;
-def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
- [(X86rep_movs i32)]>, REP;
-}
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>;
def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize;
def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>;
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
}
-let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
- [(X86rep_stos i8)]>, REP;
-let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
- [(X86rep_stos i16)]>, REP, OpSize;
-let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
- [(X86rep_stos i32)]>, REP;
-
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>;
@@ -932,91 +764,24 @@ let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize;
let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>;
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>;
def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize;
def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>;
-
-let Defs = [RAX, RDX] in
-def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
- TB;
-
-let Defs = [RAX, RCX, RDX] in
-def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
-
-let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
-def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
-}
-
-def SYSCALL : I<0x05, RawFrm,
- (outs), (ins), "syscall", []>, TB;
-def SYSRET : I<0x07, RawFrm,
- (outs), (ins), "sysret", []>, TB;
-def SYSENTER : I<0x34, RawFrm,
- (outs), (ins), "sysenter", []>, TB;
-def SYSEXIT : I<0x35, RawFrm,
- (outs), (ins), "sysexit", []>, TB, Requires<[In32BitMode]>;
-
-def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
//===----------------------------------------------------------------------===//
-// Input/Output Instructions...
+// Move Instructions.
//
-let Defs = [AL], Uses = [DX] in
-def IN8rr : I<0xEC, RawFrm, (outs), (ins),
- "in{b}\t{%dx, %al|%AL, %DX}", []>;
-let Defs = [AX], Uses = [DX] in
-def IN16rr : I<0xED, RawFrm, (outs), (ins),
- "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize;
-let Defs = [EAX], Uses = [DX] in
-def IN32rr : I<0xED, RawFrm, (outs), (ins),
- "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
-
-let Defs = [AL] in
-def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i16i8imm:$port),
- "in{b}\t{$port, %al|%AL, $port}", []>;
-let Defs = [AX] in
-def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
- "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
-let Defs = [EAX] in
-def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
- "in{l}\t{$port, %eax|%EAX, $port}", []>;
-
-let Uses = [DX, AL] in
-def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
- "out{b}\t{%al, %dx|%DX, %AL}", []>;
-let Uses = [DX, AX] in
-def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
- "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
-let Uses = [DX, EAX] in
-def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
- "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
-
-let Uses = [AL] in
-def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i16i8imm:$port),
- "out{b}\t{%al, $port|$port, %AL}", []>;
-let Uses = [AX] in
-def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
- "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
-let Uses = [EAX] in
-def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
- "out{l}\t{%eax, $port|$port, %EAX}", []>;
-
-def IN8 : I<0x6C, RawFrm, (outs), (ins),
- "ins{b}", []>;
-def IN16 : I<0x6D, RawFrm, (outs), (ins),
- "ins{w}", []>, OpSize;
-def IN32 : I<0x6D, RawFrm, (outs), (ins),
- "ins{l}", []>;
-//===----------------------------------------------------------------------===//
-// Move Instructions...
-//
let neverHasSideEffects = 1 in {
def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}", []>;
@@ -1024,6 +789,8 @@ def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
@@ -1035,6 +802,12 @@ def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, imm:$src)]>;
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+ "movabs{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, i64immSExt32:$src)]>;
}
def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
@@ -1046,6 +819,9 @@ def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(store (i32 imm:$src), addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store i64immSExt32:$src, addr:$dst)]>;
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
@@ -1067,24 +843,22 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
"mov{l}\t{%eax, $dst|$dst, %eax}", []>,
Requires<[In32BitMode]>;
-
-// Moves to and from segment registers
-def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
+
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
+def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
+ "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
+ "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
+ "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+*/
+
let isCodeGenOnly = 1 in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
@@ -1093,6 +867,8 @@ def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
@@ -1105,6 +881,9 @@ def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (loadi32 addr:$src))]>;
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (load addr:$src))]>;
}
def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
@@ -1116,24 +895,9 @@ def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(store GR32:$src, addr:$dst)]>;
-
-/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC.
-let isCodeGenOnly = 1 in {
-let neverHasSideEffects = 1 in
-def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>;
-
-let mayLoad = 1,
- canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV32rm_TC : I<0x8B, MRMSrcMem, (outs GR32_TC:$dst), (ins i32mem_TC:$src),
- "mov{l}\t{$src, $dst|$dst, $src}",
- []>;
-
-let mayStore = 1 in
-def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src),
- "mov{l}\t{$src, $dst|$dst, $src}",
- []>;
-}
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store GR64:$src, addr:$dst)]>;
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
// that they can be used for copying and storing h registers, which can't be
@@ -1154,2219 +918,6 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
}
-// Moves to and from debug registers
-def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-// Moves to and from control registers
-def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-//===----------------------------------------------------------------------===//
-// Fixed-Register Multiplication and Division Instructions...
-//
-
-// Extra precision multiplication
-
-// AL is really implied by AX, but the registers in Defs must match the
-// SDNode results (i8, i32).
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
- // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
- // This probably ought to be moved to a def : Pat<> if the
- // syntax can be accepted.
- [(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)]>; // AL,AH = AL*GR8
-
-let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
-def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
- "mul{w}\t$src",
- []>, OpSize; // AX,DX = AX*GR16
-
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
-def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
- "mul{l}\t$src",
- []>; // EAX,EDX = EAX*GR32
-
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
- "mul{b}\t$src",
- // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
- // This probably ought to be moved to a def : Pat<> if the
- // syntax can be accepted.
- [(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)]>; // AL,AH = AL*[mem8]
-
-let mayLoad = 1, neverHasSideEffects = 1 in {
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
- "mul{w}\t$src",
- []>, OpSize; // AX,DX = AX*[mem16]
-
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
- "mul{l}\t$src",
- []>; // EAX,EDX = EAX*[mem32]
-}
-
-let neverHasSideEffects = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>;
- // AL,AH = AL*GR8
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
- OpSize; // AX,DX = AX*GR16
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>;
- // EAX,EDX = EAX*GR32
-let mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", []>; // AL,AH = AL*[mem8]
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
-}
-} // neverHasSideEffects
-
-// unsigned division/remainder
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
- "div{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
- "div{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
- "div{l}\t$src", []>;
-let mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
- // EDX:EAX/[mem32] = EAX,EDX
-def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", []>;
-}
-
-// Signed division/remainder.
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
- "idiv{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
- "idiv{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
- "idiv{l}\t$src", []>;
-let mayLoad = 1, mayLoad = 1 in {
-let Defs = [AL,EFLAGS,AX], Uses = [AX] in
-def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", []>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", []>, OpSize;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- // EDX:EAX/[mem32] = EAX,EDX
- "idiv{l}\t$src", []>;
-}
-
-//===----------------------------------------------------------------------===//
-// Two address Instructions.
-//
-let Constraints = "$src1 = $dst" in {
-
-// Conditional moves
-let Uses = [EFLAGS] in {
-
-let Predicates = [HasCMov] in {
-let isCommutable = 1 in {
-def CMOVB16rr : I<0x42, MRMSrcReg, // if <u, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_B, EFLAGS))]>,
- TB, OpSize;
-def CMOVB32rr : I<0x42, MRMSrcReg, // if <u, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_B, EFLAGS))]>,
- TB;
-def CMOVAE16rr: I<0x43, MRMSrcReg, // if >=u, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovae{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_AE, EFLAGS))]>,
- TB, OpSize;
-def CMOVAE32rr: I<0x43, MRMSrcReg, // if >=u, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovae{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_AE, EFLAGS))]>,
- TB;
-def CMOVE16rr : I<0x44, MRMSrcReg, // if ==, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmove{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_E, EFLAGS))]>,
- TB, OpSize;
-def CMOVE32rr : I<0x44, MRMSrcReg, // if ==, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmove{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_E, EFLAGS))]>,
- TB;
-def CMOVNE16rr: I<0x45, MRMSrcReg, // if !=, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovne{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_NE, EFLAGS))]>,
- TB, OpSize;
-def CMOVNE32rr: I<0x45, MRMSrcReg, // if !=, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovne{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_NE, EFLAGS))]>,
- TB;
-def CMOVBE16rr: I<0x46, MRMSrcReg, // if <=u, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovbe{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_BE, EFLAGS))]>,
- TB, OpSize;
-def CMOVBE32rr: I<0x46, MRMSrcReg, // if <=u, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovbe{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_BE, EFLAGS))]>,
- TB;
-def CMOVA16rr : I<0x47, MRMSrcReg, // if >u, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmova{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_A, EFLAGS))]>,
- TB, OpSize;
-def CMOVA32rr : I<0x47, MRMSrcReg, // if >u, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmova{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_A, EFLAGS))]>,
- TB;
-def CMOVL16rr : I<0x4C, MRMSrcReg, // if <s, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovl{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_L, EFLAGS))]>,
- TB, OpSize;
-def CMOVL32rr : I<0x4C, MRMSrcReg, // if <s, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovl{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_L, EFLAGS))]>,
- TB;
-def CMOVGE16rr: I<0x4D, MRMSrcReg, // if >=s, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovge{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_GE, EFLAGS))]>,
- TB, OpSize;
-def CMOVGE32rr: I<0x4D, MRMSrcReg, // if >=s, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovge{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_GE, EFLAGS))]>,
- TB;
-def CMOVLE16rr: I<0x4E, MRMSrcReg, // if <=s, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovle{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_LE, EFLAGS))]>,
- TB, OpSize;
-def CMOVLE32rr: I<0x4E, MRMSrcReg, // if <=s, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovle{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_LE, EFLAGS))]>,
- TB;
-def CMOVG16rr : I<0x4F, MRMSrcReg, // if >s, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovg{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_G, EFLAGS))]>,
- TB, OpSize;
-def CMOVG32rr : I<0x4F, MRMSrcReg, // if >s, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovg{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_G, EFLAGS))]>,
- TB;
-def CMOVS16rr : I<0x48, MRMSrcReg, // if signed, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovs{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_S, EFLAGS))]>,
- TB, OpSize;
-def CMOVS32rr : I<0x48, MRMSrcReg, // if signed, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovs{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_S, EFLAGS))]>,
- TB;
-def CMOVNS16rr: I<0x49, MRMSrcReg, // if !signed, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovns{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_NS, EFLAGS))]>,
- TB, OpSize;
-def CMOVNS32rr: I<0x49, MRMSrcReg, // if !signed, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovns{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_NS, EFLAGS))]>,
- TB;
-def CMOVP16rr : I<0x4A, MRMSrcReg, // if parity, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovp{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_P, EFLAGS))]>,
- TB, OpSize;
-def CMOVP32rr : I<0x4A, MRMSrcReg, // if parity, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovp{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_P, EFLAGS))]>,
- TB;
-def CMOVNP16rr : I<0x4B, MRMSrcReg, // if !parity, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovnp{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_NP, EFLAGS))]>,
- TB, OpSize;
-def CMOVNP32rr : I<0x4B, MRMSrcReg, // if !parity, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovnp{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_NP, EFLAGS))]>,
- TB;
-def CMOVO16rr : I<0x40, MRMSrcReg, // if overflow, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovo{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_O, EFLAGS))]>,
- TB, OpSize;
-def CMOVO32rr : I<0x40, MRMSrcReg, // if overflow, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovo{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_O, EFLAGS))]>,
- TB;
-def CMOVNO16rr : I<0x41, MRMSrcReg, // if !overflow, GR16 = GR16
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "cmovno{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
- X86_COND_NO, EFLAGS))]>,
- TB, OpSize;
-def CMOVNO32rr : I<0x41, MRMSrcReg, // if !overflow, GR32 = GR32
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "cmovno{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
- X86_COND_NO, EFLAGS))]>,
- TB;
-} // isCommutable = 1
-
-def CMOVB16rm : I<0x42, MRMSrcMem, // if <u, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_B, EFLAGS))]>,
- TB, OpSize;
-def CMOVB32rm : I<0x42, MRMSrcMem, // if <u, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_B, EFLAGS))]>,
- TB;
-def CMOVAE16rm: I<0x43, MRMSrcMem, // if >=u, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovae{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_AE, EFLAGS))]>,
- TB, OpSize;
-def CMOVAE32rm: I<0x43, MRMSrcMem, // if >=u, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovae{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_AE, EFLAGS))]>,
- TB;
-def CMOVE16rm : I<0x44, MRMSrcMem, // if ==, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmove{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_E, EFLAGS))]>,
- TB, OpSize;
-def CMOVE32rm : I<0x44, MRMSrcMem, // if ==, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmove{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_E, EFLAGS))]>,
- TB;
-def CMOVNE16rm: I<0x45, MRMSrcMem, // if !=, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovne{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_NE, EFLAGS))]>,
- TB, OpSize;
-def CMOVNE32rm: I<0x45, MRMSrcMem, // if !=, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovne{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NE, EFLAGS))]>,
- TB;
-def CMOVBE16rm: I<0x46, MRMSrcMem, // if <=u, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovbe{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_BE, EFLAGS))]>,
- TB, OpSize;
-def CMOVBE32rm: I<0x46, MRMSrcMem, // if <=u, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovbe{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_BE, EFLAGS))]>,
- TB;
-def CMOVA16rm : I<0x47, MRMSrcMem, // if >u, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmova{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_A, EFLAGS))]>,
- TB, OpSize;
-def CMOVA32rm : I<0x47, MRMSrcMem, // if >u, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmova{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_A, EFLAGS))]>,
- TB;
-def CMOVL16rm : I<0x4C, MRMSrcMem, // if <s, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovl{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_L, EFLAGS))]>,
- TB, OpSize;
-def CMOVL32rm : I<0x4C, MRMSrcMem, // if <s, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovl{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_L, EFLAGS))]>,
- TB;
-def CMOVGE16rm: I<0x4D, MRMSrcMem, // if >=s, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovge{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_GE, EFLAGS))]>,
- TB, OpSize;
-def CMOVGE32rm: I<0x4D, MRMSrcMem, // if >=s, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovge{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_GE, EFLAGS))]>,
- TB;
-def CMOVLE16rm: I<0x4E, MRMSrcMem, // if <=s, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovle{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_LE, EFLAGS))]>,
- TB, OpSize;
-def CMOVLE32rm: I<0x4E, MRMSrcMem, // if <=s, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovle{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_LE, EFLAGS))]>,
- TB;
-def CMOVG16rm : I<0x4F, MRMSrcMem, // if >s, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovg{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_G, EFLAGS))]>,
- TB, OpSize;
-def CMOVG32rm : I<0x4F, MRMSrcMem, // if >s, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovg{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_G, EFLAGS))]>,
- TB;
-def CMOVS16rm : I<0x48, MRMSrcMem, // if signed, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovs{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_S, EFLAGS))]>,
- TB, OpSize;
-def CMOVS32rm : I<0x48, MRMSrcMem, // if signed, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovs{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_S, EFLAGS))]>,
- TB;
-def CMOVNS16rm: I<0x49, MRMSrcMem, // if !signed, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovns{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_NS, EFLAGS))]>,
- TB, OpSize;
-def CMOVNS32rm: I<0x49, MRMSrcMem, // if !signed, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovns{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NS, EFLAGS))]>,
- TB;
-def CMOVP16rm : I<0x4A, MRMSrcMem, // if parity, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovp{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_P, EFLAGS))]>,
- TB, OpSize;
-def CMOVP32rm : I<0x4A, MRMSrcMem, // if parity, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovp{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_P, EFLAGS))]>,
- TB;
-def CMOVNP16rm : I<0x4B, MRMSrcMem, // if !parity, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovnp{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_NP, EFLAGS))]>,
- TB, OpSize;
-def CMOVNP32rm : I<0x4B, MRMSrcMem, // if !parity, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovnp{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NP, EFLAGS))]>,
- TB;
-def CMOVO16rm : I<0x40, MRMSrcMem, // if overflow, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovo{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_O, EFLAGS))]>,
- TB, OpSize;
-def CMOVO32rm : I<0x40, MRMSrcMem, // if overflow, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovo{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_O, EFLAGS))]>,
- TB;
-def CMOVNO16rm : I<0x41, MRMSrcMem, // if !overflow, GR16 = [mem16]
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "cmovno{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- X86_COND_NO, EFLAGS))]>,
- TB, OpSize;
-def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovno{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NO, EFLAGS))]>,
- TB;
-} // Predicates = [HasCMov]
-
-// X86 doesn't have 8-bit conditional moves. Use a customInserter to
-// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
-// however that requires promoting the operands, and can induce additional
-// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
-// clobber EFLAGS, because if one of the operands is zero, the expansion
-// could involve an xor.
-let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
-def CMOV_GR8 : I<0, Pseudo,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
- "#CMOV_GR8 PSEUDO!",
- [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
- imm:$cond, EFLAGS))]>;
-
-let Predicates = [NoCMov] in {
-def CMOV_GR32 : I<0, Pseudo,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
- "#CMOV_GR32* PSEUDO!",
- [(set GR32:$dst,
- (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
-def CMOV_GR16 : I<0, Pseudo,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
- "#CMOV_GR16* PSEUDO!",
- [(set GR16:$dst,
- (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
-def CMOV_RFP32 : I<0, Pseudo,
- (outs RFP32:$dst),
- (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
- "#CMOV_RFP32 PSEUDO!",
- [(set RFP32:$dst,
- (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
- EFLAGS))]>;
-def CMOV_RFP64 : I<0, Pseudo,
- (outs RFP64:$dst),
- (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
- "#CMOV_RFP64 PSEUDO!",
- [(set RFP64:$dst,
- (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
- EFLAGS))]>;
-def CMOV_RFP80 : I<0, Pseudo,
- (outs RFP80:$dst),
- (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
- "#CMOV_RFP80 PSEUDO!",
- [(set RFP80:$dst,
- (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
- EFLAGS))]>;
-} // Predicates = [NoCMov]
-} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
-} // Uses = [EFLAGS]
-
-
-// unary instructions
-let CodeSize = 2 in {
-let Defs = [EFLAGS] in {
-def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
- "neg{b}\t$dst",
- [(set GR8:$dst, (ineg GR8:$src1)),
- (implicit EFLAGS)]>;
-def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "neg{w}\t$dst",
- [(set GR16:$dst, (ineg GR16:$src1)),
- (implicit EFLAGS)]>, OpSize;
-def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "neg{l}\t$dst",
- [(set GR32:$dst, (ineg GR32:$src1)),
- (implicit EFLAGS)]>;
-
-let Constraints = "" in {
- def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
- "neg{b}\t$dst",
- [(store (ineg (loadi8 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
- def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
- "neg{w}\t$dst",
- [(store (ineg (loadi16 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
- "neg{l}\t$dst",
- [(store (ineg (loadi32 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-// Match xor -1 to not. Favors these over a move imm + xor to save code size.
-let AddedComplexity = 15 in {
-def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
- "not{b}\t$dst",
- [(set GR8:$dst, (not GR8:$src1))]>;
-def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "not{w}\t$dst",
- [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
-def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "not{l}\t$dst",
- [(set GR32:$dst, (not GR32:$src1))]>;
-}
-let Constraints = "" in {
- def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
- "not{b}\t$dst",
- [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
- def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
- "not{w}\t$dst",
- [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
- def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
- "not{l}\t$dst",
- [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
-} // Constraints = ""
-} // CodeSize
-
-// TODO: inc/dec is slow for P4, but fast for Pentium-M.
-let Defs = [EFLAGS] in {
-let CodeSize = 2 in
-def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
- "inc{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
-
-let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
- "inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
- OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
- "inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
- Requires<[In32BitMode]>;
-}
-let Constraints = "", CodeSize = 2 in {
- def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
- [(store (add (loadi8 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>;
- def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
- [(store (add (loadi16 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In32BitMode]>;
- def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
- [(store (add (loadi32 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In32BitMode]>;
-} // Constraints = "", CodeSize = 2
-
-let CodeSize = 2 in
-def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
- "dec{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
-let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
- "dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
- OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
- "dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
- Requires<[In32BitMode]>;
-} // CodeSize = 2
-
-let Constraints = "", CodeSize = 2 in {
- def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
- [(store (add (loadi8 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>;
- def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
- [(store (add (loadi16 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In32BitMode]>;
- def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
- [(store (add (loadi32 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In32BitMode]>;
-} // Constraints = "", CodeSize = 2
-} // Defs = [EFLAGS]
-
-// Logical operators...
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in { // X = AND Y, Z --> X = AND Z, Y
-def AND8rr : I<0x20, MRMDestReg,
- (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, GR8:$src2))]>;
-def AND16rr : I<0x21, MRMDestReg,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
- GR16:$src2))]>, OpSize;
-def AND32rr : I<0x21, MRMDestReg,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
- GR32:$src2))]>;
-}
-
-// AND instructions with the destination register in REG and the source register
-// in R/M. Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}", []>;
-def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def AND8rm : I<0x22, MRMSrcMem,
- (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
- (loadi8 addr:$src2)))]>;
-def AND16rm : I<0x23, MRMSrcMem,
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
- (loadi16 addr:$src2)))]>,
- OpSize;
-def AND32rm : I<0x23, MRMSrcMem,
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
- (loadi32 addr:$src2)))]>;
-
-def AND8ri : Ii8<0x80, MRM4r,
- (outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
- imm:$src2))]>;
-def AND16ri : Ii16<0x81, MRM4r,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
- imm:$src2))]>, OpSize;
-def AND32ri : Ii32<0x81, MRM4r,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
- imm:$src2))]>;
-def AND16ri8 : Ii8<0x83, MRM4r,
- (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
- i16immSExt8:$src2))]>,
- OpSize;
-def AND32ri8 : Ii8<0x83, MRM4r,
- (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
- i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
- def AND8mr : I<0x20, MRMDestMem,
- (outs), (ins i8mem :$dst, GR8 :$src),
- "and{b}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), GR8:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def AND16mr : I<0x21, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src),
- "and{w}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), GR16:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def AND32mr : I<0x21, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src),
- "and{l}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), GR32:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def AND8mi : Ii8<0x80, MRM4m,
- (outs), (ins i8mem :$dst, i8imm :$src),
- "and{b}\t{$src, $dst|$dst, $src}",
- [(store (and (loadi8 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def AND16mi : Ii16<0x81, MRM4m,
- (outs), (ins i16mem:$dst, i16imm:$src),
- "and{w}\t{$src, $dst|$dst, $src}",
- [(store (and (loadi16 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def AND32mi : Ii32<0x81, MRM4m,
- (outs), (ins i32mem:$dst, i32imm:$src),
- "and{l}\t{$src, $dst|$dst, $src}",
- [(store (and (loadi32 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def AND16mi8 : Ii8<0x83, MRM4m,
- (outs), (ins i16mem:$dst, i16i8imm :$src),
- "and{w}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), i16immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def AND32mi8 : Ii8<0x83, MRM4m,
- (outs), (ins i32mem:$dst, i32i8imm :$src),
- "and{l}\t{$src, $dst|$dst, $src}",
- [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
- def AND8i8 : Ii8<0x24, RawFrm, (outs), (ins i8imm:$src),
- "and{b}\t{$src, %al|%al, $src}", []>;
- def AND16i16 : Ii16<0x25, RawFrm, (outs), (ins i16imm:$src),
- "and{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src),
- "and{l}\t{$src, %eax|%eax, $src}", []>;
-
-} // Constraints = ""
-
-
-let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y
-def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst),
- (ins GR8 :$src1, GR8 :$src2),
- "or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, GR8:$src2))]>;
-def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,GR16:$src2))]>,
- OpSize;
-def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,GR32:$src2))]>;
-}
-
-// OR instructions with the destination register in REG and the source register
-// in R/M. Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "or{b}\t{$src2, $dst|$dst, $src2}", []>;
-def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst),
- (ins GR8 :$src1, i8mem :$src2),
- "or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1,
- (load addr:$src2)))]>;
-def OR16rm : I<0x0B, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
- (load addr:$src2)))]>,
- OpSize;
-def OR32rm : I<0x0B, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
- (load addr:$src2)))]>;
-
-def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst),
- (ins GR8 :$src1, i8imm:$src2),
- "or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst,EFLAGS, (X86or_flag GR8:$src1, imm:$src2))]>;
-def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
- imm:$src2))]>, OpSize;
-def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
- imm:$src2))]>;
-
-def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
- i16immSExt8:$src2))]>, OpSize;
-def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
- i32immSExt8:$src2))]>;
-let Constraints = "" in {
- def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
- "or{b}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), GR8:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def OR16mr : I<0x09, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
- "or{w}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), GR16:$src), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def OR32mr : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "or{l}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), GR32:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def OR8mi : Ii8<0x80, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
- "or{b}\t{$src, $dst|$dst, $src}",
- [(store (or (loadi8 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def OR16mi : Ii16<0x81, MRM1m, (outs), (ins i16mem:$dst, i16imm:$src),
- "or{w}\t{$src, $dst|$dst, $src}",
- [(store (or (loadi16 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def OR32mi : Ii32<0x81, MRM1m, (outs), (ins i32mem:$dst, i32imm:$src),
- "or{l}\t{$src, $dst|$dst, $src}",
- [(store (or (loadi32 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def OR16mi8 : Ii8<0x83, MRM1m, (outs), (ins i16mem:$dst, i16i8imm:$src),
- "or{w}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), i16immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def OR32mi8 : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src),
- "or{l}\t{$src, $dst|$dst, $src}",
- [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
- def OR8i8 : Ii8 <0x0C, RawFrm, (outs), (ins i8imm:$src),
- "or{b}\t{$src, %al|%al, $src}", []>;
- def OR16i16 : Ii16 <0x0D, RawFrm, (outs), (ins i16imm:$src),
- "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src),
- "or{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-
-let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
- def XOR8rr : I<0x30, MRMDestReg,
- (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
- GR8:$src2))]>;
- def XOR16rr : I<0x31, MRMDestReg,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
- GR16:$src2))]>, OpSize;
- def XOR32rr : I<0x31, MRMDestReg,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
- GR32:$src2))]>;
-} // isCommutable = 1
-
-// XOR instructions with the destination register in REG and the source register
-// in R/M. Included for the disassembler.
-let isCodeGenOnly = 1 in {
-def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}", []>;
-def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def XOR8rm : I<0x32, MRMSrcMem,
- (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
- (load addr:$src2)))]>;
-def XOR16rm : I<0x33, MRMSrcMem,
- (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
- (load addr:$src2)))]>,
- OpSize;
-def XOR32rm : I<0x33, MRMSrcMem,
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
- (load addr:$src2)))]>;
-
-def XOR8ri : Ii8<0x80, MRM6r,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, imm:$src2))]>;
-def XOR16ri : Ii16<0x81, MRM6r,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
- imm:$src2))]>, OpSize;
-def XOR32ri : Ii32<0x81, MRM6r,
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
- imm:$src2))]>;
-def XOR16ri8 : Ii8<0x83, MRM6r,
- (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
- i16immSExt8:$src2))]>,
- OpSize;
-def XOR32ri8 : Ii8<0x83, MRM6r,
- (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
- i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
- def XOR8mr : I<0x30, MRMDestMem,
- (outs), (ins i8mem :$dst, GR8 :$src),
- "xor{b}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def XOR16mr : I<0x31, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src),
- "xor{w}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def XOR32mr : I<0x31, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src),
- "xor{l}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), GR32:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def XOR8mi : Ii8<0x80, MRM6m,
- (outs), (ins i8mem :$dst, i8imm :$src),
- "xor{b}\t{$src, $dst|$dst, $src}",
- [(store (xor (loadi8 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def XOR16mi : Ii16<0x81, MRM6m,
- (outs), (ins i16mem:$dst, i16imm:$src),
- "xor{w}\t{$src, $dst|$dst, $src}",
- [(store (xor (loadi16 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def XOR32mi : Ii32<0x81, MRM6m,
- (outs), (ins i32mem:$dst, i32imm:$src),
- "xor{l}\t{$src, $dst|$dst, $src}",
- [(store (xor (loadi32 addr:$dst), imm:$src), addr:$dst),
- (implicit EFLAGS)]>;
- def XOR16mi8 : Ii8<0x83, MRM6m,
- (outs), (ins i16mem:$dst, i16i8imm :$src),
- "xor{w}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), i16immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize;
- def XOR32mi8 : Ii8<0x83, MRM6m,
- (outs), (ins i32mem:$dst, i32i8imm :$src),
- "xor{l}\t{$src, $dst|$dst, $src}",
- [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst),
- (implicit EFLAGS)]>;
-
- def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src),
- "xor{b}\t{$src, %al|%al, $src}", []>;
- def XOR16i16 : Ii16<0x35, RawFrm, (outs), (ins i16imm:$src),
- "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src),
- "xor{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-// Shift instructions
-let Defs = [EFLAGS] in {
-let Uses = [CL] in {
-def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
- "shl{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (shl GR8:$src1, CL))]>;
-def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
- "shl{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
-def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
- "shl{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (shl GR32:$src1, CL))]>;
-} // Uses = [CL]
-
-def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
- "shl{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
-
-let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
-def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "shl{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
-def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "shl{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
-
-// NOTE: We don't include patterns for shifts of a register by one, because
-// 'add reg,reg' is cheaper.
-
-def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
- "shl{b}\t$dst", []>;
-def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
- "shl{w}\t$dst", []>, OpSize;
-def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
- "shl{l}\t$dst", []>;
-
-} // isConvertibleToThreeAddress = 1
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
- "shl{b}\t{%cl, $dst|$dst, CL}",
- [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
- def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
- "shl{w}\t{%cl, $dst|$dst, CL}",
- [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
- def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
- "shl{l}\t{%cl, $dst|$dst, CL}",
- [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
- "shl{b}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
- "shl{w}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
- "shl{l}\t{$src, $dst|$dst, $src}",
- [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Shift by 1
- def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
- "shl{b}\t$dst",
- [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
- "shl{w}\t$dst",
- [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize;
- def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
- "shl{l}\t$dst",
- [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
- "shr{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (srl GR8:$src1, CL))]>;
-def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
- "shr{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
-def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
- "shr{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (srl GR32:$src1, CL))]>;
-}
-
-def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "shr{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
-def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "shr{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
-def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "shr{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
-
-// Shift by 1
-def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
- "shr{b}\t$dst",
- [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
-def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
- "shr{w}\t$dst",
- [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
-def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
- "shr{l}\t$dst",
- [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
- "shr{b}\t{%cl, $dst|$dst, CL}",
- [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
- def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
- "shr{w}\t{%cl, $dst|$dst, CL}",
- [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
- OpSize;
- def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
- "shr{l}\t{%cl, $dst|$dst, CL}",
- [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
- "shr{b}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
- "shr{w}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
- "shr{l}\t{$src, $dst|$dst, $src}",
- [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Shift by 1
- def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
- "shr{b}\t$dst",
- [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
- "shr{w}\t$dst",
- [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
- def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
- "shr{l}\t$dst",
- [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
- "sar{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (sra GR8:$src1, CL))]>;
-def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
- "sar{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
-def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
- "sar{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (sra GR32:$src1, CL))]>;
-}
-
-def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
- "sar{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
-def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "sar{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
- OpSize;
-def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "sar{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
-
-// Shift by 1
-def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
- "sar{b}\t$dst",
- [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
-def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
- "sar{w}\t$dst",
- [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
-def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
- "sar{l}\t$dst",
- [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
- "sar{b}\t{%cl, $dst|$dst, CL}",
- [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
- def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
- "sar{w}\t{%cl, $dst|$dst, CL}",
- [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
- def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
- "sar{l}\t{%cl, $dst|$dst, CL}",
- [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
- "sar{b}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
- "sar{w}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
- "sar{l}\t{$src, $dst|$dst, $src}",
- [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Shift by 1
- def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
- "sar{b}\t$dst",
- [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
- "sar{w}\t$dst",
- [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize;
- def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
- "sar{l}\t$dst",
- [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-// Rotate instructions
-
-def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
- "rcl{b}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
- "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
- "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-let Uses = [CL] in {
-def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-}
-def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
- "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-
-def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "rcl{l}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
- "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
- "rcr{b}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
- "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
- "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-let Uses = [CL] in {
-def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-}
-def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
- "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-
-def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "rcr{l}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
- "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Constraints = "" in {
-def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
- "rcl{b}\t{1, $dst|$dst, 1}", []>;
-def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
- "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
- "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
- "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
- "rcl{l}\t{1, $dst|$dst, 1}", []>;
-def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
- "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
- "rcr{b}\t{1, $dst|$dst, 1}", []>;
-def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
- "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
- "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
- "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
- "rcr{l}\t{1, $dst|$dst, 1}", []>;
-def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
- "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-
-let Uses = [CL] in {
-def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
- "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
- "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
- "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
- "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
- "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
- "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-}
-} // Constraints = ""
-
-// FIXME: provide shorter instructions when imm8 == 1
-let Uses = [CL] in {
-def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
- "rol{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
-def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
- "rol{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
-def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
- "rol{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
-}
-
-def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
- "rol{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
-def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "rol{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>,
- OpSize;
-def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "rol{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
-
-// Rotate by 1
-def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
- "rol{b}\t$dst",
- [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
-def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
- "rol{w}\t$dst",
- [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
-def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
- "rol{l}\t$dst",
- [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
- "rol{b}\t{%cl, $dst|$dst, CL}",
- [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
- def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
- "rol{w}\t{%cl, $dst|$dst, CL}",
- [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
- def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
- "rol{l}\t{%cl, $dst|$dst, CL}",
- [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src),
- "rol{b}\t{$src, $dst|$dst, $src}",
- [(store (rotl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src),
- "rol{w}\t{$src, $dst|$dst, $src}",
- [(store (rotl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src),
- "rol{l}\t{$src, $dst|$dst, $src}",
- [(store (rotl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Rotate by 1
- def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
- "rol{b}\t$dst",
- [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
- "rol{w}\t$dst",
- [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize;
- def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
- "rol{l}\t$dst",
- [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-let Uses = [CL] in {
-def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
- "ror{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
-def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
- "ror{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
-def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
- "ror{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
-}
-
-def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
- "ror{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
-def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
- "ror{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>,
- OpSize;
-def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
- "ror{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
-
-// Rotate by 1
-def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
- "ror{b}\t$dst",
- [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
-def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
- "ror{w}\t$dst",
- [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
-def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
- "ror{l}\t$dst",
- [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
- "ror{b}\t{%cl, $dst|$dst, CL}",
- [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
- def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
- "ror{w}\t{%cl, $dst|$dst, CL}",
- [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
- def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
- "ror{l}\t{%cl, $dst|$dst, CL}",
- [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
- }
- def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
- "ror{b}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
- def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
- "ror{w}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
- OpSize;
- def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
- "ror{l}\t{$src, $dst|$dst, $src}",
- [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-
- // Rotate by 1
- def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
- "ror{b}\t$dst",
- [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
- def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
- "ror{w}\t$dst",
- [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize;
- def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
- "ror{l}\t$dst",
- [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-} // Constraints = ""
-
-
-// Double shift instructions (generalizations of rotate)
-let Uses = [CL] in {
-def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
-def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
-def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
- TB, OpSize;
-def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
- TB, OpSize;
-}
-
-let isCommutable = 1 in { // These instructions commute to each other.
-def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
- (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2, i8imm:$src3),
- "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
- (i8 imm:$src3)))]>,
- TB;
-def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
- (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2, i8imm:$src3),
- "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
- (i8 imm:$src3)))]>,
- TB;
-def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
- (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2, i8imm:$src3),
- "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
- (i8 imm:$src3)))]>,
- TB, OpSize;
-def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
- (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2, i8imm:$src3),
- "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
- (i8 imm:$src3)))]>,
- TB, OpSize;
-}
-
-let Constraints = "" in {
- let Uses = [CL] in {
- def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
- addr:$dst)]>, TB;
- def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
- addr:$dst)]>, TB;
- }
- def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
- "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB;
- def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
- "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB;
-
- let Uses = [CL] in {
- def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize;
- def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
- [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
- addr:$dst)]>, TB, OpSize;
- }
- def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
- "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB, OpSize;
- def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
- "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
- (i8 imm:$src3)), addr:$dst)]>,
- TB, OpSize;
-} // Constraints = ""
-} // Defs = [EFLAGS]
-
-
-// Arithmetic.
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y
-// Register-Register Addition
-def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst),
- (ins GR8 :$src1, GR8 :$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, GR8:$src2))]>;
-
-let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
-// Register-Register Addition
-def ADD16rr : I<0x01, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
- GR16:$src2))]>, OpSize;
-def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
- GR32:$src2))]>;
-} // end isConvertibleToThreeAddress
-} // end isCommutable
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
- def ADD8rr_alt: I<0x02, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}", []>;
- def ADD16rr_alt: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
- def ADD32rr_alt: I<0x03, MRMSrcReg,(outs GR32:$dst),(ins GR32:$src1, GR32:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Addition
-def ADD8rm : I<0x02, MRMSrcMem, (outs GR8 :$dst),
- (ins GR8 :$src1, i8mem :$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1,
- (load addr:$src2)))]>;
-def ADD16rm : I<0x03, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
- (load addr:$src2)))]>, OpSize;
-def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
- (load addr:$src2)))]>;
-
-// Register-Integer Addition
-def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS,
- (X86add_flag GR8:$src1, imm:$src2))]>;
-
-let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
-// Register-Integer Addition
-def ADD16ri : Ii16<0x81, MRM0r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86add_flag GR16:$src1, imm:$src2))]>, OpSize;
-def ADD32ri : Ii32<0x81, MRM0r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86add_flag GR32:$src1, imm:$src2))]>;
-def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86add_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
-def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86add_flag GR32:$src1, i32immSExt8:$src2))]>;
-}
-
-let Constraints = "" in {
- // Memory-Register Addition
- def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR8:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR16:$src2), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR32:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "add{w}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), i16immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "add{l}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), i32immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>;
-
- // addition to rAX
- def ADD8i8 : Ii8<0x04, RawFrm, (outs), (ins i8imm:$src),
- "add{b}\t{$src, %al|%al, $src}", []>;
- def ADD16i16 : Ii16<0x05, RawFrm, (outs), (ins i16imm:$src),
- "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src),
- "add{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let Uses = [EFLAGS] in {
-let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y
-def ADC8rr : I<0x10, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (adde GR8:$src1, GR8:$src2))]>;
-def ADC16rr : I<0x11, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (adde GR16:$src1, GR16:$src2))]>, OpSize;
-def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
-}
-
-let isCodeGenOnly = 1 in {
-def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}", []>;
-def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst),
- (ins GR8:$src1, i8mem:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2)))]>;
-def ADC16rm : I<0x13, MRMSrcMem , (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2)))]>,
- OpSize;
-def ADC32rm : I<0x13, MRMSrcMem , (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, (load addr:$src2)))]>;
-def ADC8ri : Ii8<0x80, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (adde GR8:$src1, imm:$src2))]>;
-def ADC16ri : Ii16<0x81, MRM2r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (adde GR16:$src1, imm:$src2))]>, OpSize;
-def ADC16ri8 : Ii8<0x83, MRM2r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (adde GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def ADC32ri : Ii32<0x81, MRM2r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, imm:$src2))]>;
-def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
- def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>;
- def ADC16mr : I<0x11, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), GR16:$src2), addr:$dst)]>,
- OpSize;
- def ADC32mr : I<0x11, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), GR32:$src2), addr:$dst)]>;
- def ADC8mi : Ii8<0x80, MRM2m, (outs), (ins i8mem:$dst, i8imm:$src2),
- "adc{b}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
- def ADC16mi : Ii16<0x81, MRM2m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
- OpSize;
- def ADC16mi8 : Ii8<0x83, MRM2m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "adc{w}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
- OpSize;
- def ADC32mi : Ii32<0x81, MRM2m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
- def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "adc{l}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
-
- def ADC8i8 : Ii8<0x14, RawFrm, (outs), (ins i8imm:$src),
- "adc{b}\t{$src, %al|%al, $src}", []>;
- def ADC16i16 : Ii16<0x15, RawFrm, (outs), (ins i16imm:$src),
- "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src),
- "adc{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-} // Uses = [EFLAGS]
-
-// Register-Register Subtraction
-def SUB8rr : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS,
- (X86sub_flag GR8:$src1, GR8:$src2))]>;
-def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86sub_flag GR16:$src1, GR16:$src2))]>, OpSize;
-def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86sub_flag GR32:$src1, GR32:$src2))]>;
-
-let isCodeGenOnly = 1 in {
-def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>;
-def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-// Register-Memory Subtraction
-def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
- (ins GR8 :$src1, i8mem :$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS,
- (X86sub_flag GR8:$src1, (load addr:$src2)))]>;
-def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86sub_flag GR16:$src1, (load addr:$src2)))]>, OpSize;
-def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86sub_flag GR32:$src1, (load addr:$src2)))]>;
-
-// Register-Integer Subtraction
-def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst),
- (ins GR8:$src1, i8imm:$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, EFLAGS,
- (X86sub_flag GR8:$src1, imm:$src2))]>;
-def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86sub_flag GR16:$src1, imm:$src2))]>, OpSize;
-def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86sub_flag GR32:$src1, imm:$src2))]>;
-def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86sub_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
-def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>;
-
-let Constraints = "" in {
- // Memory-Register Subtraction
- def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR8:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR16:$src2), addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR32:$src2), addr:$dst),
- (implicit EFLAGS)]>;
-
- // Memory-Integer Subtraction
- def SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst),
- (implicit EFLAGS)]>;
- def SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (loadi16 addr:$dst), imm:$src2),addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (loadi32 addr:$dst), imm:$src2),addr:$dst),
- (implicit EFLAGS)]>;
- def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), i16immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>, OpSize;
- def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), i32immSExt8:$src2),
- addr:$dst),
- (implicit EFLAGS)]>;
-
- def SUB8i8 : Ii8<0x2C, RawFrm, (outs), (ins i8imm:$src),
- "sub{b}\t{$src, %al|%al, $src}", []>;
- def SUB16i16 : Ii16<0x2D, RawFrm, (outs), (ins i16imm:$src),
- "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src),
- "sub{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let Uses = [EFLAGS] in {
-def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst),
- (ins GR8:$src1, GR8:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sube GR8:$src1, GR8:$src2))]>;
-def SBB16rr : I<0x19, MRMDestReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sube GR16:$src1, GR16:$src2))]>, OpSize;
-def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
-
-let Constraints = "" in {
- def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>;
- def SBB16mr : I<0x19, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), GR16:$src2), addr:$dst)]>,
- OpSize;
- def SBB32mr : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>;
- def SBB8mi : Ii8<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
- def SBB16mi : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
- OpSize;
- def SBB16mi8 : Ii8<0x83, MRM3m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
- OpSize;
- def SBB32mi : Ii32<0x81, MRM3m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
- def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
-
- def SBB8i8 : Ii8<0x1C, RawFrm, (outs), (ins i8imm:$src),
- "sbb{b}\t{$src, %al|%al, $src}", []>;
- def SBB16i16 : Ii16<0x1D, RawFrm, (outs), (ins i16imm:$src),
- "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src),
- "sbb{l}\t{$src, %eax|%eax, $src}", []>;
-} // Constraints = ""
-
-let isCodeGenOnly = 1 in {
-def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}", []>;
-def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize;
-def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}", []>;
-}
-
-def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2)))]>;
-def SBB16rm : I<0x1B, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2)))]>,
- OpSize;
-def SBB32rm : I<0x1B, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, (load addr:$src2)))]>;
-def SBB8ri : Ii8<0x80, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "sbb{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sube GR8:$src1, imm:$src2))]>;
-def SBB16ri : Ii16<0x81, MRM3r, (outs GR16:$dst),
- (ins GR16:$src1, i16imm:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sube GR16:$src1, imm:$src2))]>, OpSize;
-def SBB16ri8 : Ii8<0x83, MRM3r, (outs GR16:$dst),
- (ins GR16:$src1, i16i8imm:$src2),
- "sbb{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sube GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def SBB32ri : Ii32<0x81, MRM3r, (outs GR32:$dst),
- (ins GR32:$src1, i32imm:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, imm:$src2))]>;
-def SBB32ri8 : Ii8<0x83, MRM3r, (outs GR32:$dst),
- (ins GR32:$src1, i32i8imm:$src2),
- "sbb{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sube GR32:$src1, i32immSExt8:$src2))]>;
-} // Uses = [EFLAGS]
-} // Defs = [EFLAGS]
-
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
-// Register-Register Signed Integer Multiply
-def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
- "imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
-def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
- "imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
-}
-
-// Register-Memory Signed Integer Multiply
-def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
- TB, OpSize;
-def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
-} // Defs = [EFLAGS]
-} // end Two Address instructions
-
-// Suprisingly enough, these are not two address instructions!
-let Defs = [EFLAGS] in {
-// Register-Integer Signed Integer Multiply
-def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
-def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, imm:$src2))]>;
-def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
- (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
- (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
-
-// Memory-Integer Signed Integer Multiply
-def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
- (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1), imm:$src2))]>,
- OpSize;
-def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
- (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1), imm:$src2))]>;
-def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
- (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1),
- i16immSExt8:$src2))]>, OpSize;
-def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
- (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag (load addr:$src1),
- i32immSExt8:$src2))]>;
-} // Defs = [EFLAGS]
-
-//===----------------------------------------------------------------------===//
-// Test instructions are just like AND, except they don't generate a result.
-//
-let Defs = [EFLAGS] in {
-let isCommutable = 1 in { // TEST X, Y --> TEST Y, X
-def TEST8rr : I<0x84, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2),
- "test{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR8:$src1, GR8:$src2), 0))]>;
-def TEST16rr : I<0x85, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
- "test{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR16:$src1, GR16:$src2),
- 0))]>,
- OpSize;
-def TEST32rr : I<0x85, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
- "test{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR32:$src1, GR32:$src2),
- 0))]>;
-}
-
-def TEST8i8 : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src),
- "test{b}\t{$src, %al|%al, $src}", []>;
-def TEST16i16 : Ii16<0xA9, RawFrm, (outs), (ins i16imm:$src),
- "test{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src),
- "test{l}\t{$src, %eax|%eax, $src}", []>;
-
-def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2),
- "test{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR8:$src1, (loadi8 addr:$src2)),
- 0))]>;
-def TEST16rm : I<0x85, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2),
- "test{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR16:$src1,
- (loadi16 addr:$src2)), 0))]>, OpSize;
-def TEST32rm : I<0x85, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2),
- "test{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and GR32:$src1,
- (loadi32 addr:$src2)), 0))]>;
-
-def TEST8ri : Ii8 <0xF6, MRM0r, // flags = GR8 & imm8
- (outs), (ins GR8:$src1, i8imm:$src2),
- "test{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR8:$src1, imm:$src2), 0))]>;
-def TEST16ri : Ii16<0xF7, MRM0r, // flags = GR16 & imm16
- (outs), (ins GR16:$src1, i16imm:$src2),
- "test{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR16:$src1, imm:$src2), 0))]>,
- OpSize;
-def TEST32ri : Ii32<0xF7, MRM0r, // flags = GR32 & imm32
- (outs), (ins GR32:$src1, i32imm:$src2),
- "test{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and_su GR32:$src1, imm:$src2), 0))]>;
-
-def TEST8mi : Ii8 <0xF6, MRM0m, // flags = [mem8] & imm8
- (outs), (ins i8mem:$src1, i8imm:$src2),
- "test{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and (loadi8 addr:$src1), imm:$src2),
- 0))]>;
-def TEST16mi : Ii16<0xF7, MRM0m, // flags = [mem16] & imm16
- (outs), (ins i16mem:$src1, i16imm:$src2),
- "test{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and (loadi16 addr:$src1), imm:$src2),
- 0))]>, OpSize;
-def TEST32mi : Ii32<0xF7, MRM0m, // flags = [mem32] & imm32
- (outs), (ins i32mem:$src1, i32imm:$src2),
- "test{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (and (loadi32 addr:$src1), imm:$src2),
- 0))]>;
-} // Defs = [EFLAGS]
-
// Condition code ops, incl. set if equal/not equal/...
let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
@@ -3374,305 +925,10 @@ def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH
let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
-let Uses = [EFLAGS] in {
-// Use sbb to materialize carry bit.
-let Defs = [EFLAGS], isCodeGenOnly = 1 in {
-// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
-// However, Pat<> can't replicate the destination reg into the inputs of the
-// result.
-// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
-// X86CodeEmitter.
-def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
- [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
- [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
- OpSize;
-def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-} // isCodeGenOnly
-
-def SETEr : I<0x94, MRM0r,
- (outs GR8 :$dst), (ins),
- "sete\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_E, EFLAGS))]>,
- TB; // GR8 = ==
-def SETEm : I<0x94, MRM0m,
- (outs), (ins i8mem:$dst),
- "sete\t$dst",
- [(store (X86setcc X86_COND_E, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = ==
-
-def SETNEr : I<0x95, MRM0r,
- (outs GR8 :$dst), (ins),
- "setne\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_NE, EFLAGS))]>,
- TB; // GR8 = !=
-def SETNEm : I<0x95, MRM0m,
- (outs), (ins i8mem:$dst),
- "setne\t$dst",
- [(store (X86setcc X86_COND_NE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = !=
-
-def SETLr : I<0x9C, MRM0r,
- (outs GR8 :$dst), (ins),
- "setl\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_L, EFLAGS))]>,
- TB; // GR8 = < signed
-def SETLm : I<0x9C, MRM0m,
- (outs), (ins i8mem:$dst),
- "setl\t$dst",
- [(store (X86setcc X86_COND_L, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = < signed
-
-def SETGEr : I<0x9D, MRM0r,
- (outs GR8 :$dst), (ins),
- "setge\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_GE, EFLAGS))]>,
- TB; // GR8 = >= signed
-def SETGEm : I<0x9D, MRM0m,
- (outs), (ins i8mem:$dst),
- "setge\t$dst",
- [(store (X86setcc X86_COND_GE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = >= signed
-
-def SETLEr : I<0x9E, MRM0r,
- (outs GR8 :$dst), (ins),
- "setle\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_LE, EFLAGS))]>,
- TB; // GR8 = <= signed
-def SETLEm : I<0x9E, MRM0m,
- (outs), (ins i8mem:$dst),
- "setle\t$dst",
- [(store (X86setcc X86_COND_LE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = <= signed
-
-def SETGr : I<0x9F, MRM0r,
- (outs GR8 :$dst), (ins),
- "setg\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_G, EFLAGS))]>,
- TB; // GR8 = > signed
-def SETGm : I<0x9F, MRM0m,
- (outs), (ins i8mem:$dst),
- "setg\t$dst",
- [(store (X86setcc X86_COND_G, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = > signed
-
-def SETBr : I<0x92, MRM0r,
- (outs GR8 :$dst), (ins),
- "setb\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_B, EFLAGS))]>,
- TB; // GR8 = < unsign
-def SETBm : I<0x92, MRM0m,
- (outs), (ins i8mem:$dst),
- "setb\t$dst",
- [(store (X86setcc X86_COND_B, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = < unsign
-
-def SETAEr : I<0x93, MRM0r,
- (outs GR8 :$dst), (ins),
- "setae\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_AE, EFLAGS))]>,
- TB; // GR8 = >= unsign
-def SETAEm : I<0x93, MRM0m,
- (outs), (ins i8mem:$dst),
- "setae\t$dst",
- [(store (X86setcc X86_COND_AE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = >= unsign
-
-def SETBEr : I<0x96, MRM0r,
- (outs GR8 :$dst), (ins),
- "setbe\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_BE, EFLAGS))]>,
- TB; // GR8 = <= unsign
-def SETBEm : I<0x96, MRM0m,
- (outs), (ins i8mem:$dst),
- "setbe\t$dst",
- [(store (X86setcc X86_COND_BE, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = <= unsign
-
-def SETAr : I<0x97, MRM0r,
- (outs GR8 :$dst), (ins),
- "seta\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_A, EFLAGS))]>,
- TB; // GR8 = > signed
-def SETAm : I<0x97, MRM0m,
- (outs), (ins i8mem:$dst),
- "seta\t$dst",
- [(store (X86setcc X86_COND_A, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = > signed
-
-def SETSr : I<0x98, MRM0r,
- (outs GR8 :$dst), (ins),
- "sets\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_S, EFLAGS))]>,
- TB; // GR8 = <sign bit>
-def SETSm : I<0x98, MRM0m,
- (outs), (ins i8mem:$dst),
- "sets\t$dst",
- [(store (X86setcc X86_COND_S, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = <sign bit>
-def SETNSr : I<0x99, MRM0r,
- (outs GR8 :$dst), (ins),
- "setns\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_NS, EFLAGS))]>,
- TB; // GR8 = !<sign bit>
-def SETNSm : I<0x99, MRM0m,
- (outs), (ins i8mem:$dst),
- "setns\t$dst",
- [(store (X86setcc X86_COND_NS, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = !<sign bit>
-
-def SETPr : I<0x9A, MRM0r,
- (outs GR8 :$dst), (ins),
- "setp\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_P, EFLAGS))]>,
- TB; // GR8 = parity
-def SETPm : I<0x9A, MRM0m,
- (outs), (ins i8mem:$dst),
- "setp\t$dst",
- [(store (X86setcc X86_COND_P, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = parity
-def SETNPr : I<0x9B, MRM0r,
- (outs GR8 :$dst), (ins),
- "setnp\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_NP, EFLAGS))]>,
- TB; // GR8 = not parity
-def SETNPm : I<0x9B, MRM0m,
- (outs), (ins i8mem:$dst),
- "setnp\t$dst",
- [(store (X86setcc X86_COND_NP, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = not parity
-
-def SETOr : I<0x90, MRM0r,
- (outs GR8 :$dst), (ins),
- "seto\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_O, EFLAGS))]>,
- TB; // GR8 = overflow
-def SETOm : I<0x90, MRM0m,
- (outs), (ins i8mem:$dst),
- "seto\t$dst",
- [(store (X86setcc X86_COND_O, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = overflow
-def SETNOr : I<0x91, MRM0r,
- (outs GR8 :$dst), (ins),
- "setno\t$dst",
- [(set GR8:$dst, (X86setcc X86_COND_NO, EFLAGS))]>,
- TB; // GR8 = not overflow
-def SETNOm : I<0x91, MRM0m,
- (outs), (ins i8mem:$dst),
- "setno\t$dst",
- [(store (X86setcc X86_COND_NO, EFLAGS), addr:$dst)]>,
- TB; // [mem8] = not overflow
-} // Uses = [EFLAGS]
-
-
-// Integer comparisons
-let Defs = [EFLAGS] in {
-def CMP8i8 : Ii8<0x3C, RawFrm, (outs), (ins i8imm:$src),
- "cmp{b}\t{$src, %al|%al, $src}", []>;
-def CMP16i16 : Ii16<0x3D, RawFrm, (outs), (ins i16imm:$src),
- "cmp{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
-def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src),
- "cmp{l}\t{$src, %eax|%eax, $src}", []>;
-
-def CMP8rr : I<0x38, MRMDestReg,
- (outs), (ins GR8 :$src1, GR8 :$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR8:$src1, GR8:$src2))]>;
-def CMP16rr : I<0x39, MRMDestReg,
- (outs), (ins GR16:$src1, GR16:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR16:$src1, GR16:$src2))]>, OpSize;
-def CMP32rr : I<0x39, MRMDestReg,
- (outs), (ins GR32:$src1, GR32:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR32:$src1, GR32:$src2))]>;
-def CMP8mr : I<0x38, MRMDestMem,
- (outs), (ins i8mem :$src1, GR8 :$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi8 addr:$src1), GR8:$src2))]>;
-def CMP16mr : I<0x39, MRMDestMem,
- (outs), (ins i16mem:$src1, GR16:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi16 addr:$src1), GR16:$src2))]>,
- OpSize;
-def CMP32mr : I<0x39, MRMDestMem,
- (outs), (ins i32mem:$src1, GR32:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi32 addr:$src1), GR32:$src2))]>;
-def CMP8rm : I<0x3A, MRMSrcMem,
- (outs), (ins GR8 :$src1, i8mem :$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR8:$src1, (loadi8 addr:$src2)))]>;
-def CMP16rm : I<0x3B, MRMSrcMem,
- (outs), (ins GR16:$src1, i16mem:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR16:$src1, (loadi16 addr:$src2)))]>,
- OpSize;
-def CMP32rm : I<0x3B, MRMSrcMem,
- (outs), (ins GR32:$src1, i32mem:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR32:$src1, (loadi32 addr:$src2)))]>;
-
-// These are alternate spellings for use by the disassembler, we mark them as
-// code gen only to ensure they aren't matched by the assembler.
-let isCodeGenOnly = 1 in {
- def CMP8rr_alt : I<0x3A, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}", []>;
- def CMP16rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize;
- def CMP32rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}", []>;
-}
-def CMP8ri : Ii8<0x80, MRM7r,
- (outs), (ins GR8:$src1, i8imm:$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR8:$src1, imm:$src2))]>;
-def CMP16ri : Ii16<0x81, MRM7r,
- (outs), (ins GR16:$src1, i16imm:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR16:$src1, imm:$src2))]>, OpSize;
-def CMP32ri : Ii32<0x81, MRM7r,
- (outs), (ins GR32:$src1, i32imm:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR32:$src1, imm:$src2))]>;
-def CMP8mi : Ii8 <0x80, MRM7m,
- (outs), (ins i8mem :$src1, i8imm :$src2),
- "cmp{b}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi8 addr:$src1), imm:$src2))]>;
-def CMP16mi : Ii16<0x81, MRM7m,
- (outs), (ins i16mem:$src1, i16imm:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi16 addr:$src1), imm:$src2))]>,
- OpSize;
-def CMP32mi : Ii32<0x81, MRM7m,
- (outs), (ins i32mem:$src1, i32imm:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi32 addr:$src1), imm:$src2))]>;
-def CMP16ri8 : Ii8<0x83, MRM7r,
- (outs), (ins GR16:$src1, i16i8imm:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def CMP16mi8 : Ii8<0x83, MRM7m,
- (outs), (ins i16mem:$src1, i16i8imm:$src2),
- "cmp{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi16 addr:$src1),
- i16immSExt8:$src2))]>, OpSize;
-def CMP32mi8 : Ii8<0x83, MRM7m,
- (outs), (ins i32mem:$src1, i32i8imm:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp (loadi32 addr:$src1),
- i32immSExt8:$src2))]>;
-def CMP32ri8 : Ii8<0x83, MRM7r,
- (outs), (ins GR32:$src1, i32i8imm:$src2),
- "cmp{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp GR32:$src1, i32immSExt8:$src2))]>;
-} // Defs = [EFLAGS]
+//===----------------------------------------------------------------------===//
+// Bit tests instructions: BT, BTS, BTR, BTC.
-// Bit tests.
-// TODO: BTC, BTR, and BTS
let Defs = [EFLAGS] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
@@ -3680,6 +936,9 @@ def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB;
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
// Unlike with the register+register form, the memory+register form of the
// bt instruction does not ignore the high bits of the index. From ISel's
@@ -3687,17 +946,23 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
// only for now.
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
- "bt{w}\t{$src2, $src1|$src1, $src2}",
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
// [(X86bt (loadi16 addr:$src1), GR16:$src2),
// (implicit EFLAGS)]
[]
>, OpSize, TB, Requires<[FastBTMem]>;
def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
- "bt{l}\t{$src2, $src1|$src1, $src2}",
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
// [(X86bt (loadi32 addr:$src1), GR32:$src2),
// (implicit EFLAGS)]
[]
>, TB, Requires<[FastBTMem]>;
+def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi64 addr:$src1), GR64:$src2),
+// (implicit EFLAGS)]
+ []
+ >, TB;
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
@@ -3706,6 +971,10 @@ def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB;
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
@@ -3717,307 +986,129 @@ def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
]>, TB;
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+ i64immSExt8:$src2))]>, TB;
+
def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
} // Defs = [EFLAGS]
-// Sign/Zero extenders
-// Use movsbl intead of movsbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update. Actual movsbw included for the disassembler.
-def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
- "", [(set GR16:$dst, (sext GR8:$src))]>, TB;
-def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
- "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
-def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
- "movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR8:$src))]>, TB;
-def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
- "movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
-def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
- "movs{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR16:$src))]>, TB;
-def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
- "movs{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
-
-// Use movzbl intead of movzbw; we don't care about the high 16 bits
-// of the register here. This has a smaller encoding and avoids a
-// partial-register update. Actual movzbw included for the disassembler.
-def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
- "", [(set GR16:$dst, (zext GR8:$src))]>, TB;
-def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
- "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
-def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR8:$src))]>, TB;
-def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
-def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
- "movz{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR16:$src))]>, TB;
-def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
- "movz{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
-
-// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
-// except that they use GR32_NOREX for the output operand register class
-// instead of GR32. This allows them to operate on h registers on x86-64.
-def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
- (outs GR32_NOREX:$dst), (ins GR8:$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- []>, TB;
-let mayLoad = 1 in
-def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
- (outs GR32_NOREX:$dst), (ins i8mem:$src),
- "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- []>, TB;
-
-let neverHasSideEffects = 1 in {
- let Defs = [AX], Uses = [AL] in
- def CBW : I<0x98, RawFrm, (outs), (ins),
- "{cbtw|cbw}", []>, OpSize; // AX = signext(AL)
- let Defs = [EAX], Uses = [AX] in
- def CWDE : I<0x98, RawFrm, (outs), (ins),
- "{cwtl|cwde}", []>; // EAX = signext(AX)
-
- let Defs = [AX,DX], Uses = [AX] in
- def CWD : I<0x99, RawFrm, (outs), (ins),
- "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
- let Defs = [EAX,EDX], Uses = [EAX] in
- def CDQ : I<0x99, RawFrm, (outs), (ins),
- "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
-}
-
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// Alias instructions that map movr0 to xor.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-// FIXME: Set encoding to pseudo.
-let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
- isCodeGenOnly = 1 in {
-def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
- [(set GR8:$dst, 0)]>;
-
-// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
-// encoding and avoids a partial-register update sometimes, but doing so
-// at isel time interferes with rematerialization in the current register
-// allocator. For now, this is rewritten when the instruction is lowered
-// to an MCInst.
-def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
- "",
- [(set GR16:$dst, 0)]>, OpSize;
-
-// FIXME: Set encoding to pseudo.
-def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, 0)]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Thread Local Storage Instructions
-//
-
-// ELF TLS Support
-// All calls clobber the non-callee saved registers. ESP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
-let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
- MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP] in
-def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
- "leal\t$sym, %eax; "
- "call\t___tls_get_addr@PLT",
- [(X86tlsaddr tls32addr:$sym)]>,
- Requires<[In32BitMode]>;
-
-// Darwin TLS Support
-// For i386, the address of the thunk is passed on the stack, on return the
-// address of the variable is in %eax. %ecx is trashed during the function
-// call. All other registers are preserved.
-let Defs = [EAX, ECX],
- Uses = [ESP],
- usesCustomInserter = 1 in
-def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
- "# TLSCall_32",
- [(X86TLSCall addr:$sym)]>,
- Requires<[In32BitMode]>;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "movl\t%gs:$src, $dst",
- [(set GR32:$dst, (gsload addr:$src))]>, SegGS;
-
-let AddedComplexity = 5, isCodeGenOnly = 1 in
-def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "movl\t%fs:$src, $dst",
- [(set GR32:$dst, (fsload addr:$src))]>, SegFS;
-
-//===----------------------------------------------------------------------===//
-// EH Pseudo Instructions
-//
-let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1, isCodeGenOnly = 1 in {
-def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
- "ret\t#eh_return, addr: $addr",
- [(X86ehret GR32:$addr)]>;
-
-}
//===----------------------------------------------------------------------===//
// Atomic support
//
-// Memory barriers
-
-// TODO: Get this to fold the constant into the instruction.
-def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
- "lock\n\t"
- "or{l}\t{$zero, $dst|$dst, $zero}",
- []>, Requires<[In32BitMode]>, LOCK;
-
-let hasSideEffects = 1 in {
-def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
- "#MEMBARRIER",
- [(X86MemBarrier)]>, Requires<[HasSSE2]>;
-}
// Atomic swap. These are just normal xchg instructions. But since a memory
// operand is referenced, the atomicity is ensured.
let Constraints = "$val = $dst" in {
-def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$val, i32mem:$ptr),
- "xchg{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
-def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$val, i16mem:$ptr),
- "xchg{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
- OpSize;
def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
- "xchg{b}\t{$val, $ptr|$ptr, $val}",
+ "xchg{b}\t{$val, $ptr|$ptr, $val}",
[(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>;
+def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr),
+ "xchg{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
+ OpSize;
+def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr),
+ "xchg{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
+def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr),
+ "xchg{q}\t{$val, $ptr|$ptr, $val}",
+ [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
-def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
- "xchg{l}\t{$val, $src|$src, $val}", []>;
-def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
- "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
"xchg{b}\t{$val, $src|$src, $val}", []>;
+def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
+ "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
+def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
+ "xchg{l}\t{$val, $src|$src, $val}", []>;
+def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
+ "xchg{q}\t{$val, $src|$src, $val}", []>;
}
def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
"xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
"xchg{l}\t{$src, %eax|%eax, $src}", []>;
+def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
+ "xchg{q}\t{$src, %rax|%rax, $src}", []>;
-// Atomic compare and swap.
-let Defs = [EAX, EFLAGS], Uses = [EAX] in {
-def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
- "lock\n\t"
- "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
-}
-let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
-def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
- "lock\n\t"
- "cmpxchg8b\t$ptr",
- [(X86cas8 addr:$ptr)]>, TB, LOCK;
-}
-
-let Defs = [AX, EFLAGS], Uses = [AX] in {
-def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
- "lock\n\t"
- "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
-}
-let Defs = [AL, EFLAGS], Uses = [AL] in {
-def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
- "lock\n\t"
- "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
-}
-// Atomic exchange and add
-let Constraints = "$val = $dst", Defs = [EFLAGS] in {
-def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
- "lock\n\t"
- "xadd{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
- TB, LOCK;
-def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
- "lock\n\t"
- "xadd{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
- TB, OpSize, LOCK;
-def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
- "lock\n\t"
- "xadd{b}\t{$val, $ptr|$ptr, $val}",
- [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
- TB, LOCK;
-}
def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
"xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
@@ -4025,6 +1116,8 @@ def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
"xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
let mayLoad = 1, mayStore = 1 in {
def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -4033,6 +1126,9 @@ def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
}
def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
@@ -4041,6 +1137,8 @@ def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
"cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
let mayLoad = 1, mayStore = 1 in {
def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
@@ -4049,284 +1147,29 @@ def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
"cmpxchg8b\t$dst", []>, TB;
-// Optimized codegen when the non-memory output is not used.
-// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
-def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
- "lock\n\t"
- "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
- "lock\n\t"
- "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "lock\n\t"
- "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "lock\n\t"
- "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
- "lock\n\t"
- "inc{b}\t$dst", []>, LOCK;
-def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
- "lock\n\t"
- "inc{w}\t$dst", []>, OpSize, LOCK;
-def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
- "lock\n\t"
- "inc{l}\t$dst", []>, LOCK;
-
-def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
- "lock\n\t"
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
- "lock\n\t"
- "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
- "lock\n\t"
- "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
-def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
- "lock\n\t"
- "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
-
-def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
- "lock\n\t"
- "dec{b}\t$dst", []>, LOCK;
-def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
- "lock\n\t"
- "dec{w}\t$dst", []>, OpSize, LOCK;
-def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
- "lock\n\t"
- "dec{l}\t$dst", []>, LOCK;
-}
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
+ "cmpxchg16b\t$dst", []>, TB;
-// Atomic exchange, and, or, xor
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomInserter = 1 in {
-def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMAND32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
-def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMOR32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
-def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMXOR32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
-def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMNAND32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
-def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
- "#ATOMMIN32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
-def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMMAX32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMUMIN32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
-def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMUMAX32 PSEUDO!",
- [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
-
-def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMAND16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
-def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMOR16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
-def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMXOR16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
-def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMNAND16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
-def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
- "#ATOMMIN16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
-def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMMAX16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMUMIN16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
-def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
- "#ATOMUMAX16 PSEUDO!",
- [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
-
-def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMAND8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
-def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMOR8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
-def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMXOR8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
-def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
- "#ATOMNAND8 PSEUDO!",
- [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
-}
-let Constraints = "$val1 = $dst1, $val2 = $dst2",
- Defs = [EFLAGS, EAX, EBX, ECX, EDX],
- Uses = [EAX, EBX, ECX, EDX],
- mayLoad = 1, mayStore = 1,
- usesCustomInserter = 1 in {
-def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMAND6432 PSEUDO!", []>;
-def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMOR6432 PSEUDO!", []>;
-def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMXOR6432 PSEUDO!", []>;
-def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMNAND6432 PSEUDO!", []>;
-def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMADD6432 PSEUDO!", []>;
-def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMSUB6432 PSEUDO!", []>;
-def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- "#ATOMSWAP6432 PSEUDO!", []>;
-}
-// Segmentation support instructions.
-
-def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-
-// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
-def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
- "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
-
-def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
-def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
-def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
- "ltr{w}\t{$src}", []>, TB;
-def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
- "ltr{w}\t{$src}", []>, TB;
-
-def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
- "push{w}\t%fs", []>, OpSize, TB;
-def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
- "push{l}\t%fs", []>, TB;
-def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
- "push{w}\t%gs", []>, OpSize, TB;
-def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
- "push{l}\t%gs", []>, TB;
-
-def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
- "pop{w}\t%fs", []>, OpSize, TB;
-def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
- "pop{l}\t%fs", []>, TB;
-def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
- "pop{w}\t%gs", []>, OpSize, TB;
-def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
- "pop{l}\t%gs", []>, TB;
-
-def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lds{l}\t{$src, $dst|$dst, $src}", []>;
-def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "les{l}\t{$src, $dst|$dst, $src}", []>;
-def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
- "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
-def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
- "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
-
-def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
- "verr\t$seg", []>, TB;
-def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
- "verr\t$seg", []>, TB;
-def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
- "verw\t$seg", []>, TB;
-def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
- "verw\t$seg", []>, TB;
-
-// Descriptor-table support instructions
-
-def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
- "sgdt\t$dst", []>, TB;
-def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
- "sidt\t$dst", []>, TB;
-def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
- "sldt{w}\t$dst", []>, TB;
-def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
- "sldt{w}\t$dst", []>, TB;
-def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
- "lgdt\t$src", []>, TB;
-def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
- "lidt\t$src", []>, TB;
-def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
- "lldt{w}\t$src", []>, TB;
-def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
- "lldt{w}\t$src", []>, TB;
-
// Lock instruction prefix
def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>;
+// Rex64 instruction prefix
+def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>;
+
+// Data16 instruction prefix
+def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>;
+
// Repeat string operation instruction prefixes
// These uses the DF flag in the EFLAGS register to inc or dec ECX
let Defs = [ECX], Uses = [ECX,EFLAGS] in {
@@ -4336,35 +1179,19 @@ def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>;
def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>;
}
-// Segment override instruction prefixes
-def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
-def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
-def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
-def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
-def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
-def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
// String manipulation instructions
-
def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize;
def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>;
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>;
def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize;
def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>;
-// CPU flow control instructions
-
-def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
-def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
-
-// FPU control instructions
-
-def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
// Flag instructions
-
def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>;
def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>;
def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
@@ -4376,620 +1203,423 @@ def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>;
def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
// Table lookup instructions
-
def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>;
-// Specialized register support
-
-def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
-def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
-def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
-
-def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
- "smsw{w}\t$dst", []>, OpSize, TB;
-def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
- "smsw{l}\t$dst", []>, TB;
-// For memory operands, there is only a 16-bit form
-def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
- "smsw{w}\t$dst", []>, TB;
-
-def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
- "lmsw{w}\t$src", []>, TB;
-def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
- "lmsw{w}\t$src", []>, TB;
-
-def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
-
-// Cache instructions
-
-def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
-def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
-
-// VMX instructions
-
-// 66 0F 38 80
-def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
-// 66 0F 38 81
-def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
-// 0F 01 C1
-def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
-def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
- "vmclear\t$vmcs", []>, OpSize, TB;
-// 0F 01 C2
-def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
-// 0F 01 C3
-def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
-def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
- "vmptrld\t$vmcs", []>, TB;
-def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
- "vmptrst\t$vmcs", []>, TB;
-def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
-def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
-// 0F 01 C4
-def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
-def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
- "vmxon\t{$vmxon}", []>, XS;
+// ASCII Adjust After Addition
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>;
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
+// ASCII Adjust AX Before Division
+// sets AL, AH and EFLAGS and uses AL and AH
+def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
+ "aad\t$src", []>, Requires<[In32BitMode]>;
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
-def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
-def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
-def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
-def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
-def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
-def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
-
-def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
- (ADD32ri GR32:$src1, tconstpool:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
- (ADD32ri GR32:$src1, tjumptable:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
- (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
- (ADD32ri GR32:$src1, texternalsym:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
- (ADD32ri GR32:$src1, tblockaddress:$src2)>;
-
-def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
- (MOV32mi addr:$dst, tglobaladdr:$src)>;
-def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
- (MOV32mi addr:$dst, texternalsym:$src)>;
-def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
- (MOV32mi addr:$dst, tblockaddress:$src)>;
-
-// Calls
-// tailcall stuff
-def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
- (TCRETURNri GR32_TC:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
-
-// FIXME: This is disabled for 32-bit PIC mode because the global base
-// register which is part of the address mode may be assigned a
-// callee-saved register.
-def : Pat<(X86tcret (load addr:$dst), imm:$off),
- (TCRETURNmi addr:$dst, imm:$off)>,
- Requires<[In32BitMode, IsNotPIC]>;
-
-def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
- (TCRETURNdi texternalsym:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
-
-def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
- (TCRETURNdi texternalsym:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
-
-// Normal calls, with various flavors of addresses.
-def : Pat<(X86call (i32 tglobaladdr:$dst)),
- (CALLpcrel32 tglobaladdr:$dst)>;
-def : Pat<(X86call (i32 texternalsym:$dst)),
- (CALLpcrel32 texternalsym:$dst)>;
-def : Pat<(X86call (i32 imm:$dst)),
- (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
-
-// X86 specific add which produces a flag.
-def : Pat<(addc GR32:$src1, GR32:$src2),
- (ADD32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(addc GR32:$src1, (load addr:$src2)),
- (ADD32rm GR32:$src1, addr:$src2)>;
-def : Pat<(addc GR32:$src1, imm:$src2),
- (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
- (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-def : Pat<(subc GR32:$src1, GR32:$src2),
- (SUB32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(subc GR32:$src1, (load addr:$src2)),
- (SUB32rm GR32:$src1, addr:$src2)>;
-def : Pat<(subc GR32:$src1, imm:$src2),
- (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
- (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// Comparisons.
-
-// TEST R,R is smaller than CMP R,0
-def : Pat<(X86cmp GR8:$src1, 0),
- (TEST8rr GR8:$src1, GR8:$src1)>;
-def : Pat<(X86cmp GR16:$src1, 0),
- (TEST16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(X86cmp GR32:$src1, 0),
- (TEST32rr GR32:$src1, GR32:$src1)>;
-
-// Conditional moves with folded loads with operands swapped and conditions
-// inverted.
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS),
- (CMOVAE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS),
- (CMOVAE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS),
- (CMOVB16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS),
- (CMOVB32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS),
- (CMOVNE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS),
- (CMOVNE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS),
- (CMOVE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS),
- (CMOVE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS),
- (CMOVA16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS),
- (CMOVA32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS),
- (CMOVBE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS),
- (CMOVBE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS),
- (CMOVGE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS),
- (CMOVGE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS),
- (CMOVL16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS),
- (CMOVL32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS),
- (CMOVG16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS),
- (CMOVG32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS),
- (CMOVLE16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS),
- (CMOVLE32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS),
- (CMOVNP16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS),
- (CMOVNP32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS),
- (CMOVP16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS),
- (CMOVP32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS),
- (CMOVNS16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS),
- (CMOVNS32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS),
- (CMOVS16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS),
- (CMOVS32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS),
- (CMOVNO16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS),
- (CMOVNO32rm GR32:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS),
- (CMOVO16rm GR16:$src2, addr:$src1)>;
-def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS),
- (CMOVO32rm GR32:$src2, addr:$src1)>;
-
-// zextload bool -> zextload byte
-def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-
-// extload bool -> extload byte
-def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
-def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
-def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
-
-// anyext. Define these to do an explicit zero-extend to
-// avoid partial-register updates.
-def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
-def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
-
-// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
-def : Pat<(i32 (anyext GR16:$src)),
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
+// ASCII Adjust AX After Multiply
+// sets AL, AH and EFLAGS and uses AL
+def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
+ "aam\t$src", []>, Requires<[In32BitMode]>;
+// ASCII Adjust AL After Subtraction - sets
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>;
-//===----------------------------------------------------------------------===//
-// Some peepholes
-//===----------------------------------------------------------------------===//
-
-// Odd encoding trick: -128 fits into an 8-bit immediate field while
-// +128 doesn't, so in this special case use a sub instead of an add.
-def : Pat<(add GR16:$src1, 128),
- (SUB16ri8 GR16:$src1, -128)>;
-def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
- (SUB16mi8 addr:$dst, -128)>;
-def : Pat<(add GR32:$src1, 128),
- (SUB32ri8 GR32:$src1, -128)>;
-def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
- (SUB32mi8 addr:$dst, -128)>;
-
-// r & (2^16-1) ==> movz
-def : Pat<(and GR32:$src1, 0xffff),
- (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR32:$src1, 0xff),
- (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
- GR32_ABCD)),
- sub_8bit))>,
- Requires<[In32BitMode]>;
-// r & (2^8-1) ==> movz
-def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
- GR16_ABCD)),
- sub_8bit))>,
- Requires<[In32BitMode]>;
-
-// sext_inreg patterns
-def : Pat<(sext_inreg GR32:$src, i16),
- (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
-def : Pat<(sext_inreg GR32:$src, i8),
- (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
- GR32_ABCD)),
- sub_8bit))>,
- Requires<[In32BitMode]>;
-def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
- GR16_ABCD)),
- sub_8bit))>,
- Requires<[In32BitMode]>;
-
-// trunc patterns
-def : Pat<(i16 (trunc GR32:$src)),
- (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
-def : Pat<(i8 (trunc GR32:$src)),
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- sub_8bit)>,
- Requires<[In32BitMode]>;
-def : Pat<(i8 (trunc GR16:$src)),
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit)>,
- Requires<[In32BitMode]>;
-
-// h-register tricks
-def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi)>,
- Requires<[In32BitMode]>;
-def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- sub_8bit_hi)>,
- Requires<[In32BitMode]>;
-def : Pat<(srl GR16:$src, (i8 8)),
- (EXTRACT_SUBREG
- (MOVZX32rr8
- (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- sub_8bit_hi)),
- sub_16bit)>,
- Requires<[In32BitMode]>;
-def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
- (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
- GR16_ABCD)),
- sub_8bit_hi))>,
- Requires<[In32BitMode]>;
-def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
- (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
- GR16_ABCD)),
- sub_8bit_hi))>,
- Requires<[In32BitMode]>;
-def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
- (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
- GR32_ABCD)),
- sub_8bit_hi))>,
- Requires<[In32BitMode]>;
-def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
- (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
- GR32_ABCD)),
- sub_8bit_hi))>,
- Requires<[In32BitMode]>;
-
-// (shl x, 1) ==> (add x, x)
-def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
-def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
-
-// (shl x (and y, 31)) ==> (shl x, y)
-def : Pat<(shl GR8:$src1, (and CL, 31)),
- (SHL8rCL GR8:$src1)>;
-def : Pat<(shl GR16:$src1, (and CL, 31)),
- (SHL16rCL GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (and CL, 31)),
- (SHL32rCL GR32:$src1)>;
-def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
- (SHL8mCL addr:$dst)>;
-def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
- (SHL16mCL addr:$dst)>;
-def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
- (SHL32mCL addr:$dst)>;
-
-def : Pat<(srl GR8:$src1, (and CL, 31)),
- (SHR8rCL GR8:$src1)>;
-def : Pat<(srl GR16:$src1, (and CL, 31)),
- (SHR16rCL GR16:$src1)>;
-def : Pat<(srl GR32:$src1, (and CL, 31)),
- (SHR32rCL GR32:$src1)>;
-def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
- (SHR8mCL addr:$dst)>;
-def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
- (SHR16mCL addr:$dst)>;
-def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
- (SHR32mCL addr:$dst)>;
-
-def : Pat<(sra GR8:$src1, (and CL, 31)),
- (SAR8rCL GR8:$src1)>;
-def : Pat<(sra GR16:$src1, (and CL, 31)),
- (SAR16rCL GR16:$src1)>;
-def : Pat<(sra GR32:$src1, (and CL, 31)),
- (SAR32rCL GR32:$src1)>;
-def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
- (SAR8mCL addr:$dst)>;
-def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
- (SAR16mCL addr:$dst)>;
-def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
- (SAR32mCL addr:$dst)>;
-
-// (anyext (setcc_carry)) -> (setcc_carry)
-def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C16r)>;
-def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C32r)>;
-def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
- (SETB_C32r)>;
-
-// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
-let AddedComplexity = 5 in { // Try this before the selecting to OR
-def : Pat<(or_is_add GR16:$src1, imm:$src2),
- (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(or_is_add GR32:$src1, imm:$src2),
- (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
- (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
- (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(or_is_add GR16:$src1, GR16:$src2),
- (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(or_is_add GR32:$src1, GR32:$src2),
- (ADD32rr GR32:$src1, GR32:$src2)>;
-} // AddedComplexity
+// Decimal Adjust AL after Addition
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>;
-//===----------------------------------------------------------------------===//
-// EFLAGS-defining Patterns
-//===----------------------------------------------------------------------===//
+// Decimal Adjust AL after Subtraction
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>;
-// add reg, reg
-def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
-
-// add reg, mem
-def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
- (ADD8rm GR8:$src1, addr:$src2)>;
-def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
- (ADD16rm GR16:$src1, addr:$src2)>;
-def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
- (ADD32rm GR32:$src1, addr:$src2)>;
-
-// add reg, imm
-def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
-def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(add GR16:$src1, i16immSExt8:$src2),
- (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(add GR32:$src1, i32immSExt8:$src2),
- (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// sub reg, reg
-def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
-
-// sub reg, mem
-def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
- (SUB8rm GR8:$src1, addr:$src2)>;
-def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
- (SUB16rm GR16:$src1, addr:$src2)>;
-def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
- (SUB32rm GR32:$src1, addr:$src2)>;
-
-// sub reg, imm
-def : Pat<(sub GR8:$src1, imm:$src2),
- (SUB8ri GR8:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, imm:$src2),
- (SUB16ri GR16:$src1, imm:$src2)>;
-def : Pat<(sub GR32:$src1, imm:$src2),
- (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
- (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
- (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// mul reg, reg
-def : Pat<(mul GR16:$src1, GR16:$src2),
- (IMUL16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(mul GR32:$src1, GR32:$src2),
- (IMUL32rr GR32:$src1, GR32:$src2)>;
-
-// mul reg, mem
-def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
- (IMUL16rm GR16:$src1, addr:$src2)>;
-def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
- (IMUL32rm GR32:$src1, addr:$src2)>;
-
-// mul reg, imm
-def : Pat<(mul GR16:$src1, imm:$src2),
- (IMUL16rri GR16:$src1, imm:$src2)>;
-def : Pat<(mul GR32:$src1, imm:$src2),
- (IMUL32rri GR32:$src1, imm:$src2)>;
-def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
- (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
- (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// reg = mul mem, imm
-def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
- (IMUL16rmi addr:$src1, imm:$src2)>;
-def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
- (IMUL32rmi addr:$src1, imm:$src2)>;
-def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
- (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
-def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
- (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
-
-// Optimize multiply by 2 with EFLAGS result.
-let AddedComplexity = 2 in {
-def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
-}
+// Check Array Index Against Bounds
+def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bound\t{$src, $dst|$dst, $src}", []>, OpSize,
+ Requires<[In32BitMode]>;
+def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bound\t{$src, $dst|$dst, $src}", []>,
+ Requires<[In32BitMode]>;
-// Patterns for nodes that do not produce flags, for instructions that do.
-
-// Increment reg.
-def : Pat<(add GR8:$src1 , 1), (INC8r GR8:$src1)>;
-def : Pat<(add GR16:$src1, 1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src1, 1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>;
-
-// Decrement reg.
-def : Pat<(add GR8:$src1 , -1), (DEC8r GR8:$src1)>;
-def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>;
-
-// or reg/reg.
-def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
-
-// or reg/mem
-def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
- (OR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
- (OR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
- (OR32rm GR32:$src1, addr:$src2)>;
-
-// or reg/imm
-def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, i16immSExt8:$src2),
- (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(or GR32:$src1, i32immSExt8:$src2),
- (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// xor reg/reg
-def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
-
-// xor reg/mem
-def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
- (XOR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
- (XOR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
- (XOR32rm GR32:$src1, addr:$src2)>;
-
-// xor reg/imm
-def : Pat<(xor GR8:$src1, imm:$src2),
- (XOR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, imm:$src2),
- (XOR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(xor GR32:$src1, imm:$src2),
- (XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
- (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
- (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-
-// and reg/reg
-def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
-
-// and reg/mem
-def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
- (AND8rm GR8:$src1, addr:$src2)>;
-def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
- (AND16rm GR16:$src1, addr:$src2)>;
-def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
- (AND32rm GR32:$src1, addr:$src2)>;
-
-// and reg/imm
-def : Pat<(and GR8:$src1, imm:$src2),
- (AND8ri GR8:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, imm:$src2),
- (AND16ri GR16:$src1, imm:$src2)>;
-def : Pat<(and GR32:$src1, imm:$src2),
- (AND32ri GR32:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, i16immSExt8:$src2),
- (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(and GR32:$src1, i32immSExt8:$src2),
- (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+// Adjust RPL Field of Segment Selector
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
+ "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
+ "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
//===----------------------------------------------------------------------===//
-// Floating Point Stack Support
+// Subsystems.
//===----------------------------------------------------------------------===//
-include "X86InstrFPStack.td"
-
-//===----------------------------------------------------------------------===//
-// X86-64 Support
-//===----------------------------------------------------------------------===//
+include "X86InstrArithmetic.td"
+include "X86InstrCMovSetCC.td"
+include "X86InstrExtension.td"
+include "X86InstrControl.td"
+include "X86InstrShiftRotate.td"
-include "X86Instr64bit.td"
+// X87 Floating Point Stack.
+include "X86InstrFPStack.td"
-//===----------------------------------------------------------------------===//
// SIMD support (SSE, MMX and AVX)
-//===----------------------------------------------------------------------===//
-
include "X86InstrFragmentsSIMD.td"
-//===----------------------------------------------------------------------===//
// FMA - Fused Multiply-Add support (requires FMA)
-//===----------------------------------------------------------------------===//
-
include "X86InstrFMA.td"
+// SSE, MMX and 3DNow! vector support.
+include "X86InstrSSE.td"
+include "X86InstrMMX.td"
+include "X86Instr3DNow.td"
+
+include "X86InstrVMX.td"
+
+// System instructions.
+include "X86InstrSystem.td"
+
+// Compiler Pseudo Instructions and Pat Patterns
+include "X86InstrCompiler.td"
+
//===----------------------------------------------------------------------===//
-// XMM Floating point support (requires SSE / SSE2)
+// Assembler Mnemonic Aliases
//===----------------------------------------------------------------------===//
-include "X86InstrSSE.td"
+def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"cbw", "cbtw">;
+def : MnemonicAlias<"cwd", "cwtd">;
+def : MnemonicAlias<"cdq", "cltd">;
+def : MnemonicAlias<"cwde", "cwtl">;
+def : MnemonicAlias<"cdqe", "cltq">;
+
+// lret maps to lretl, it is not ambiguous with lretq.
+def : MnemonicAlias<"lret", "lretl">;
+
+def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd", "popfl">;
+
+// FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in
+// all modes. However: "push (addr)" and "push $42" should default to
+// pushl/pushq depending on the current mode. Similar for "pop %bx"
+def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl">;
+
+def : MnemonicAlias<"repe", "rep">;
+def : MnemonicAlias<"repz", "rep">;
+def : MnemonicAlias<"repnz", "repne">;
+
+def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"salb", "shlb">;
+def : MnemonicAlias<"salw", "shlw">;
+def : MnemonicAlias<"sall", "shll">;
+def : MnemonicAlias<"salq", "shlq">;
+
+def : MnemonicAlias<"smovb", "movsb">;
+def : MnemonicAlias<"smovw", "movsw">;
+def : MnemonicAlias<"smovl", "movsl">;
+def : MnemonicAlias<"smovq", "movsq">;
+
+def : MnemonicAlias<"ud2a", "ud2">;
+def : MnemonicAlias<"verrw", "verr">;
+
+// System instruction aliases.
+def : MnemonicAlias<"iret", "iretl">;
+def : MnemonicAlias<"sysret", "sysretl">;
+
+def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>;
+
+
+// Floating point stack aliases.
+def : MnemonicAlias<"fcmovz", "fcmove">;
+def : MnemonicAlias<"fcmova", "fcmovnbe">;
+def : MnemonicAlias<"fcmovnae", "fcmovb">;
+def : MnemonicAlias<"fcmovna", "fcmovbe">;
+def : MnemonicAlias<"fcmovae", "fcmovnb">;
+def : MnemonicAlias<"fcomip", "fcompi">;
+def : MnemonicAlias<"fildq", "fildll">;
+def : MnemonicAlias<"fldcww", "fldcw">;
+def : MnemonicAlias<"fnstcww", "fnstcw">;
+def : MnemonicAlias<"fnstsww", "fnstsw">;
+def : MnemonicAlias<"fucomip", "fucompi">;
+def : MnemonicAlias<"fwait", "wait">;
+
+
+class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond>
+ : MnemonicAlias<!strconcat(Prefix, OldCond, Suffix),
+ !strconcat(Prefix, NewCond, Suffix)>;
+
+/// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of
+/// MnemonicAlias's that canonicalize the condition code in a mnemonic, for
+/// example "setz" -> "sete".
+multiclass IntegerCondCodeMnemonicAlias<string Prefix, string Suffix> {
+ def C : CondCodeAlias<Prefix, Suffix, "c", "b">; // setc -> setb
+ def Z : CondCodeAlias<Prefix, Suffix, "z" , "e">; // setz -> sete
+ def NA : CondCodeAlias<Prefix, Suffix, "na", "be">; // setna -> setbe
+ def NB : CondCodeAlias<Prefix, Suffix, "nb", "ae">; // setnb -> setae
+ def NC : CondCodeAlias<Prefix, Suffix, "nc", "ae">; // setnc -> setae
+ def NG : CondCodeAlias<Prefix, Suffix, "ng", "le">; // setng -> setle
+ def NL : CondCodeAlias<Prefix, Suffix, "nl", "ge">; // setnl -> setge
+ def NZ : CondCodeAlias<Prefix, Suffix, "nz", "ne">; // setnz -> setne
+ def PE : CondCodeAlias<Prefix, Suffix, "pe", "p">; // setpe -> setp
+ def PO : CondCodeAlias<Prefix, Suffix, "po", "np">; // setpo -> setnp
+
+ def NAE : CondCodeAlias<Prefix, Suffix, "nae", "b">; // setnae -> setb
+ def NBE : CondCodeAlias<Prefix, Suffix, "nbe", "a">; // setnbe -> seta
+ def NGE : CondCodeAlias<Prefix, Suffix, "nge", "l">; // setnge -> setl
+ def NLE : CondCodeAlias<Prefix, Suffix, "nle", "g">; // setnle -> setg
+}
+
+// Aliases for set<CC>
+defm : IntegerCondCodeMnemonicAlias<"set", "">;
+// Aliases for j<CC>
+defm : IntegerCondCodeMnemonicAlias<"j", "">;
+// Aliases for cmov<CC>{w,l,q}
+defm : IntegerCondCodeMnemonicAlias<"cmov", "w">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "l">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "q">;
+
//===----------------------------------------------------------------------===//
-// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
+// Assembler Instruction Aliases
//===----------------------------------------------------------------------===//
-include "X86InstrMMX.td"
+// aad/aam default to base 10 if no operand is specified.
+def : InstAlias<"aad", (AAD8i8 10)>;
+def : InstAlias<"aam", (AAM8i8 10)>;
+
+// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
+def : InstAlias<"bt $imm, $mem", (BT32mi8 i32mem:$mem, i32i8imm:$imm)>;
+
+// clr aliases.
+def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg)>;
+def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>;
+def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>;
+def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>;
+
+// div and idiv aliases for explicit A register.
+def : InstAlias<"divb $src, %al", (DIV8r GR8 :$src)>;
+def : InstAlias<"divw $src, %ax", (DIV16r GR16:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32r GR32:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64r GR64:$src)>;
+def : InstAlias<"divb $src, %al", (DIV8m i8mem :$src)>;
+def : InstAlias<"divw $src, %ax", (DIV16m i16mem:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32m i32mem:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64m i64mem:$src)>;
+def : InstAlias<"idivb $src, %al", (IDIV8r GR8 :$src)>;
+def : InstAlias<"idivw $src, %ax", (IDIV16r GR16:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32r GR32:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64r GR64:$src)>;
+def : InstAlias<"idivb $src, %al", (IDIV8m i8mem :$src)>;
+def : InstAlias<"idivw $src, %ax", (IDIV16m i16mem:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32m i32mem:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>;
+
+
+
+// Various unary fpstack operations default to operating on on ST1.
+// For example, "fxch" -> "fxch %st(1)"
+def : InstAlias<"faddp", (ADD_FPrST0 ST1)>;
+def : InstAlias<"fsubp", (SUBR_FPrST0 ST1)>;
+def : InstAlias<"fsubrp", (SUB_FPrST0 ST1)>;
+def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>;
+def : InstAlias<"fdivp", (DIVR_FPrST0 ST1)>;
+def : InstAlias<"fdivrp", (DIV_FPrST0 ST1)>;
+def : InstAlias<"fxch", (XCH_F ST1)>;
+def : InstAlias<"fcomi", (COM_FIr ST1)>;
+def : InstAlias<"fcompi", (COM_FIPr ST1)>;
+def : InstAlias<"fucom", (UCOM_Fr ST1)>;
+def : InstAlias<"fucomp", (UCOM_FPr ST1)>;
+def : InstAlias<"fucomi", (UCOM_FIr ST1)>;
+def : InstAlias<"fucompi", (UCOM_FIPr ST1)>;
+
+// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
+// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
+// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
+// gas.
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst> {
+ def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"), (Inst RST:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"), (Inst ST0)>;
+}
+
+defm : FpUnaryAlias<"fadd", ADD_FST0r>;
+defm : FpUnaryAlias<"faddp", ADD_FPrST0>;
+defm : FpUnaryAlias<"fsub", SUB_FST0r>;
+defm : FpUnaryAlias<"fsubp", SUBR_FPrST0>;
+defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
+defm : FpUnaryAlias<"fsubrp", SUB_FPrST0>;
+defm : FpUnaryAlias<"fmul", MUL_FST0r>;
+defm : FpUnaryAlias<"fmulp", MUL_FPrST0>;
+defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
+defm : FpUnaryAlias<"fdivp", DIVR_FPrST0>;
+defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
+defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>;
+defm : FpUnaryAlias<"fcomi", COM_FIr>;
+defm : FpUnaryAlias<"fucomi", UCOM_FIr>;
+defm : FpUnaryAlias<"fcompi", COM_FIPr>;
+defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
+
+
+// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
+// solely because gas supports it.
+def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>;
+def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
+def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
+def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
+def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
+
+// We accept "fnstsw %eax" even though it only writes %ax.
+def : InstAlias<"fnstsw %eax", (FNSTSW8r)>;
+def : InstAlias<"fnstsw %al" , (FNSTSW8r)>;
+def : InstAlias<"fnstsw" , (FNSTSW8r)>;
+
+// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but
+// this is compatible with what GAS does.
+def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"lcall *$dst", (FARCALL32m opaque48mem:$dst)>;
+def : InstAlias<"ljmp *$dst", (FARJMP32m opaque48mem:$dst)>;
+
+// "imul <imm>, B" is an alias for "imul <imm>, B, B".
+def : InstAlias<"imulw $imm, $r", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm)>;
+def : InstAlias<"imulw $imm, $r", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm)>;
+def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>;
+def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>;
+
+// inb %dx -> inb %al, %dx
+def : InstAlias<"inb %dx", (IN8rr)>;
+def : InstAlias<"inw %dx", (IN16rr)>;
+def : InstAlias<"inl %dx", (IN32rr)>;
+def : InstAlias<"inb $port", (IN8ri i8imm:$port)>;
+def : InstAlias<"inw $port", (IN16ri i8imm:$port)>;
+def : InstAlias<"inl $port", (IN32ri i8imm:$port)>;
+
+
+// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp
+def : InstAlias<"call $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpw $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+
+// Force mov without a suffix with a segment and mem to prefer the 'l' form of
+// the move. All segment/mem forms are equivalent, this has the shortest
+// encoding.
+def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem)>;
+def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
+
+// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
+def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
+
+// Match 'movq GR64, MMX' as an alias for movd.
+def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>;
+def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>;
+
+// movsd with no operands (as opposed to the SSE scalar move of a double) is an
+// alias for movsl. (as in rep; movsd)
+def : InstAlias<"movsd", (MOVSD)>;
+
+// movsx aliases
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src)>;
+
+// movzx aliases
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src)>;
+// Note: No GR32->GR64 movzx form.
+
+// outb %dx -> outb %al, %dx
+def : InstAlias<"outb %dx", (OUT8rr)>;
+def : InstAlias<"outw %dx", (OUT16rr)>;
+def : InstAlias<"outl %dx", (OUT32rr)>;
+def : InstAlias<"outb $port", (OUT8ir i8imm:$port)>;
+def : InstAlias<"outw $port", (OUT16ir i8imm:$port)>;
+def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
+
+// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
+// effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
+// errors, since its encoding is the most compact.
+def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
+
+// shld/shrd op,op -> shld op, op, 1
+def : InstAlias<"shldw $r1, $r2", (SHLD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shldl $r1, $r2", (SHLD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shldq $r1, $r2", (SHLD64rri8 GR64:$r1, GR64:$r2, 1)>;
+def : InstAlias<"shrdw $r1, $r2", (SHRD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shrdl $r1, $r2", (SHRD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shrdq $r1, $r2", (SHRD64rri8 GR64:$r1, GR64:$r2, 1)>;
+
+def : InstAlias<"shldw $mem, $reg", (SHLD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shldl $mem, $reg", (SHLD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shldq $mem, $reg", (SHLD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+def : InstAlias<"shrdw $mem, $reg", (SHRD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shrdl $mem, $reg", (SHRD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shrdq $mem, $reg", (SHRD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+
+/* FIXME: This is disabled because the asm matcher is currently incapable of
+ * matching a fixed immediate like $1.
+// "shl X, $1" is an alias for "shl X".
+multiclass ShiftRotateByOneAlias<string Mnemonic, string Opc> {
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "8r1")) GR8:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "16r1")) GR16:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "32r1")) GR32:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "64r1")) GR64:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "8m1")) i8mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "16m1")) i16mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "32m1")) i32mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+ (!cast<Instruction>(!strconcat(Opc, "64m1")) i64mem:$op)>;
+}
+
+defm : ShiftRotateByOneAlias<"rcl", "RCL">;
+defm : ShiftRotateByOneAlias<"rcr", "RCR">;
+defm : ShiftRotateByOneAlias<"rol", "ROL">;
+defm : ShiftRotateByOneAlias<"ror", "ROR">;
+FIXME */
+
+// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
+def : InstAlias<"testb $val, $mem", (TEST8rm GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"testw $val, $mem", (TEST16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"testl $val, $mem", (TEST32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"testq $val, $mem", (TEST64rm GR64:$val, i64mem:$mem)>;
+
+// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms.
+def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
index 11d4179..bb2165a 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -1,4 +1,4 @@
-//====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,6 +11,9 @@
// and properties of the instructions which are needed for code generation,
// machine code emission, and analysis.
//
+// All instructions that use MMX should be in this file, even if they also use
+// SSE.
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -18,58 +21,23 @@
//===----------------------------------------------------------------------===//
let Constraints = "$src1 = $dst" in {
- // MMXI_binop_rm - Simple MMX binary operator.
- multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, bit Commutable = 0> {
- def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> {
- let isCommutable = Commutable;
- }
- def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (OpVT (OpNode VR64:$src1,
- (bitconvert
- (load_mmx addr:$src2)))))]>;
- }
-
+ // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+ // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
bit Commutable = 0> {
- def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
let isCommutable = Commutable;
}
- def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>;
}
- // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64.
- //
- // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew
- // to collapse (bitconvert VT to VT) into its operand.
- //
- multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
- bit Commutable = 0> {
- def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> {
- let isCommutable = Commutable;
- }
- def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (OpNode VR64:$src1,(load_mmx addr:$src2)))]>;
- }
-
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr, Intrinsic IntId,
Intrinsic IntId2> {
@@ -89,14 +57,75 @@ let Constraints = "$src1 = $dst" in {
}
}
+/// Unary MMX instructions requiring SSSE3.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst,
+ (IntId64 (bitconvert (memopmmx addr:$src))))]>;
+}
+
+/// Binary MMX instructions requiring SSSE3.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64> {
+ let isCommutable = 0 in
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopmmx addr:$src2))))]>;
+}
+}
+
+/// PALIGN MMX instructions (require SSSE3).
+multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
+ def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
+ def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
+}
+
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+ def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm, Domain d> {
+ def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+ def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
+
//===----------------------------------------------------------------------===//
-// MMX EMMS & FEMMS Instructions
+// MMX EMMS Instruction
//===----------------------------------------------------------------------===//
def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms",
[(int_x86_mmx_emms)]>;
-def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms",
- [(int_x86_mmx_femms)]>;
//===----------------------------------------------------------------------===//
// MMX Scalar Instructions
@@ -106,12 +135,12 @@ def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms",
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v2i32 (scalar_to_vector GR32:$src)))]>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+ (x86mmx (scalar_to_vector GR32:$src)))]>;
+let canFoldAsLoad = 1 in
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+ (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>;
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
@@ -123,42 +152,41 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[]>;
-let neverHasSideEffects = 1 in
// These are 64 bit moves, but since the OS X assembler doesn't
// recognize a register-register movq, we write them as
// movd.
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
- "movd\t{$src, $dst|$dst, $src}", []>;
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (bitconvert VR64:$src))]>;
def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v1i64 (scalar_to_vector GR64:$src)))]>;
-
+ (bitconvert GR64:$src))]>;
let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1 in
def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (load_mmx addr:$src))]>;
def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(store (v1i64 VR64:$src), addr:$dst)]>;
+ [(store (x86mmx VR64:$src), addr:$dst)]>;
def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v1i64 (bitconvert
+ (x86mmx (bitconvert
(i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))))))]>;
def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
"movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (movl immAllZerosV,
- (v2i64 (scalar_to_vector
- (i64 (bitconvert (v1i64 VR64:$src)))))))]>;
+ (v2i64 (scalar_to_vector
+ (i64 (bitconvert (x86mmx VR64:$src))))))]>;
let neverHasSideEffects = 1 in
def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
@@ -176,34 +204,40 @@ let AddedComplexity = 15 in
def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
+ (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>;
let AddedComplexity = 20 in
def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
(ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
- (v2i32 (X86vzmovl (v2i32
+ (x86mmx (X86vzmovl (x86mmx
(scalar_to_vector (loadi32 addr:$src))))))]>;
// Arithmetic Instructions
-
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>;
// -- Addition
-defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8, 1>;
-defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>;
-defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>;
-defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>;
-
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>;
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>;
defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
+defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>;
+defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>;
+
+
// -- Subtraction
-defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>;
-defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>;
-defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>;
-defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>;
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>;
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>;
defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
@@ -211,16 +245,25 @@ defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
+defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>;
+defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>;
+
// -- Multiplication
-defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>;
+defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>;
defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>;
defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+let isCommutable = 1 in
+defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
+ int_x86_ssse3_pmul_hr_sw>;
// -- Miscellanea
defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
+defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
+ int_x86_ssse3_pmadd_ub_sw>;
defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
@@ -232,23 +275,17 @@ defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>;
defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>;
-// Logical Instructions
-defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>;
-defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>;
-defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>;
+defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>;
+defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>;
+defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>;
+let Constraints = "$src1 = $dst" in
+ defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>;
-let Constraints = "$src1 = $dst" in {
- def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
- VR64:$src2)))]>;
- def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
- (load addr:$src2))))]>;
-}
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>;
+defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, 1>;
// Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
@@ -270,12 +307,6 @@ defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
-// Shift up / down and insert zero's.
-def : Pat<(v1i64 (X86vshl VR64:$src, (i8 imm:$amt))),
- (MMX_PSLLQri VR64:$src, (GetLo32XForm imm:$amt))>;
-def : Pat<(v1i64 (X86vshr VR64:$src, (i8 imm:$amt))),
- (MMX_PSRLQri VR64:$src, (GetLo32XForm imm:$amt))>;
-
// Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
@@ -285,84 +316,19 @@ defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>;
defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
-// Conversion Instructions
-
// -- Unpack Instructions
-let Constraints = "$src1 = $dst" in {
- // Unpack High Packed Data Instructions
- def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpckhbw\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpckhbw\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v8i8 (mmx_unpckh VR64:$src1,
- (bc_v8i8 (load_mmx addr:$src2)))))]>;
-
- def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpckhwd\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpckhwd\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v4i16 (mmx_unpckh VR64:$src1,
- (bc_v4i16 (load_mmx addr:$src2)))))]>;
-
- def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpckhdq\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpckhdq\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v2i32 (mmx_unpckh VR64:$src1,
- (bc_v2i32 (load_mmx addr:$src2)))))]>;
-
- // Unpack Low Packed Data Instructions
- def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpcklbw\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpcklbw\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v8i8 (mmx_unpckl VR64:$src1,
- (bc_v8i8 (load_mmx addr:$src2)))))]>;
-
- def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpcklwd\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpcklwd\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v4i16 (mmx_unpckl VR64:$src1,
- (bc_v4i16 (load_mmx addr:$src2)))))]>;
-
- def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
- (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
- "punpckldq\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
- def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
- (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
- "punpckldq\t{$src2, $dst|$dst, $src2}",
- [(set VR64:$dst,
- (v2i32 (mmx_unpckl VR64:$src1,
- (bc_v2i32 (load_mmx addr:$src2)))))]>;
-}
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
+ int_x86_mmx_punpckhbw>;
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
+ int_x86_mmx_punpckhwd>;
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
+ int_x86_mmx_punpckhdq>;
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
+ int_x86_mmx_punpcklbw>;
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
+ int_x86_mmx_punpcklwd>;
+defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
+ int_x86_mmx_punpckldq>;
// -- Pack Instructions
defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
@@ -370,93 +336,80 @@ defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>;
defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
// -- Shuffle Instructions
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>;
+
def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
- (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>;
+ (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
- (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)),
- (undef)))]>;
+ (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+ imm:$src2))]>;
-// -- Conversion Instructions
-let neverHasSideEffects = 1 in {
-def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst),
- (ins f128mem:$src),
- "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
- "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst),
- (ins i64mem:$src),
- "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
- "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst),
- (ins i64mem:$src),
- "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
- "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst),
- (ins f128mem:$src),
- "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
-
-def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
-def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
- "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
-} // end neverHasSideEffects
-// Extract / Insert
-def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW",
- SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>,
- SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
-def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg,
- (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2),
+// -- Conversion Instructions
+defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+ f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+ f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+ f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+ f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+ i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+let Constraints = "$src1 = $dst" in {
+ defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+ int_x86_sse_cvtpi2ps,
+ i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+}
+
+// Extract / Insert
+def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1),
+ [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
(iPTR imm:$src2)))]>;
let Constraints = "$src1 = $dst" in {
- def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
+ def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
(outs VR64:$dst),
- (ins VR64:$src1, GR32:$src2,i16i8imm:$src3),
+ (ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
- GR32:$src2,(iPTR imm:$src3))))]>;
- def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem,
+ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+ GR32:$src2, (iPTR imm:$src3)))]>;
+
+ def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
(outs VR64:$dst),
- (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3),
+ (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR64:$dst,
- (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
- (i32 (anyext (loadi16 addr:$src2))),
- (iPTR imm:$src3))))]>;
+ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+ (i32 (anyext (loadi16 addr:$src2))),
+ (iPTR imm:$src3)))]>;
}
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+
// MMX to XMM for vector types
def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
- [SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>;
+ [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
(v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
@@ -464,14 +417,19 @@ def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
(v2i64 (MOVQI2PQIrm addr:$src))>;
-def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert
- (v2i32 (scalar_to_vector (loadi32 addr:$src))))))),
+def : Pat<(v2i64 (MMX_X86movq2dq
+ (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
(v2i64 (MOVDI2PDIrm addr:$src))>;
-// Mask creation
-def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>;
+// Low word of XMM to MMX.
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+ [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
+ (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+ (x86mmx (MMX_MOVQ64rm addr:$src))>;
// Misc.
let Uses = [EDI] in
@@ -483,181 +441,14 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
-
-// Alias instructions that map zero vector to pxor.
-let isReMaterializable = 1, isCodeGenOnly = 1 in {
- // FIXME: Change encoding to pseudo.
- def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "",
- [(set VR64:$dst, (v2i32 immAllZerosV))]>;
- def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), "",
- [(set VR64:$dst, (v2i32 immAllOnesV))]>;
-}
-
-let Predicates = [HasMMX] in {
- def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>;
- def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>;
- def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>;
-}
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// Store 64-bit integer vector values.
-def : Pat<(store (v8i8 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v4i16 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v2i32 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v1i64 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-
-// Bit convert.
-def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>;
-
// 64-bit bit convert.
-def : Pat<(v1i64 (bitconvert (i64 GR64:$src))),
+def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v2i32 (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v4i16 (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v8i8 (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(i64 (bitconvert (v1i64 VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v2i32 VR64:$src))),
+def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v4i16 VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v8i8 VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v1i64 VR64:$src))),
- (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v2i32 VR64:$src))),
- (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
+def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
- (MMX_MOVQ2FR64rr VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (f64 FR64:$src))),
- (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v2i32 (bitconvert (f64 FR64:$src))),
- (MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v4i16 (bitconvert (f64 FR64:$src))),
+def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
(MMX_MOVFR642Qrr FR64:$src)>;
-def : Pat<(v8i8 (bitconvert (f64 FR64:$src))),
- (MMX_MOVFR642Qrr FR64:$src)>;
-
-let AddedComplexity = 20 in {
- def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
- (MMX_MOVZDI2PDIrm addr:$src)>;
-}
-
-// Clear top half.
-let AddedComplexity = 15 in {
- def : Pat<(v2i32 (X86vzmovl VR64:$src)),
- (MMX_PUNPCKLDQrr VR64:$src, (v2i32 (MMX_V_SET0)))>;
-}
-
-// Patterns to perform canonical versions of vector shuffling.
-let AddedComplexity = 10 in {
- def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))),
- (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>;
- def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))),
- (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>;
- def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))),
- (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>;
-}
-let AddedComplexity = 10 in {
- def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))),
- (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>;
- def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))),
- (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>;
- def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))),
- (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
-}
-// Some special case PANDN patterns.
-// FIXME: Get rid of these.
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
- VR64:$src2)),
- (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
- (load addr:$src2))),
- (MMX_PANDNrm VR64:$src1, addr:$src2)>;
-
-// Move MMX to lower 64-bit of XMM
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v4i16 VR64:$src))))),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v2i32 VR64:$src))))),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v1i64 VR64:$src))))),
- (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-
-// Move lower 64-bit of XMM to MMX.
-def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))),
- (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>;
-def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))),
- (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>;
-def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))))),
- (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
-
-// Patterns for vector comparisons
-def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, VR64:$src2)),
- (MMX_PCMPEQBrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPEQBrm VR64:$src1, addr:$src2)>;
-def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, VR64:$src2)),
- (MMX_PCMPEQWrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPEQWrm VR64:$src1, addr:$src2)>;
-def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, VR64:$src2)),
- (MMX_PCMPEQDrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPEQDrm VR64:$src1, addr:$src2)>;
-
-def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, VR64:$src2)),
- (MMX_PCMPGTBrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPGTBrm VR64:$src1, addr:$src2)>;
-def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, VR64:$src2)),
- (MMX_PCMPGTWrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPGTWrm VR64:$src1, addr:$src2)>;
-def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)),
- (MMX_PCMPGTDrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))),
- (MMX_PCMPGTDrm VR64:$src1, addr:$src2)>;
-
-// CMOV* - Used to implement the SELECT DAG operation. Expanded after
-// instruction selection into a branch sequence.
-let Uses = [EFLAGS], usesCustomInserter = 1 in {
- def CMOV_V1I64 : I<0, Pseudo,
- (outs VR64:$dst), (ins VR64:$t, VR64:$f, i8imm:$cond),
- "#CMOV_V1I64 PSEUDO!",
- [(set VR64:$dst,
- (v1i64 (X86cmov VR64:$t, VR64:$f, imm:$cond,
- EFLAGS)))]>;
-}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index f5466f8..b912949 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -15,43 +15,6 @@
//===----------------------------------------------------------------------===//
-// SSE scalar FP Instructions
-//===----------------------------------------------------------------------===//
-
-// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
-// instruction selection into a branch sequence.
-let Uses = [EFLAGS], usesCustomInserter = 1 in {
- def CMOV_FR32 : I<0, Pseudo,
- (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
- "#CMOV_FR32 PSEUDO!",
- [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
- EFLAGS))]>;
- def CMOV_FR64 : I<0, Pseudo,
- (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
- "#CMOV_FR64 PSEUDO!",
- [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
- EFLAGS))]>;
- def CMOV_V4F32 : I<0, Pseudo,
- (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
- "#CMOV_V4F32 PSEUDO!",
- [(set VR128:$dst,
- (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
- EFLAGS)))]>;
- def CMOV_V2F64 : I<0, Pseudo,
- (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
- "#CMOV_V2F64 PSEUDO!",
- [(set VR128:$dst,
- (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
- EFLAGS)))]>;
- def CMOV_V2I64 : I<0, Pseudo,
- (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
- "#CMOV_V2I64 PSEUDO!",
- [(set VR128:$dst,
- (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
- EFLAGS)))]>;
-}
-
-//===----------------------------------------------------------------------===//
// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//
@@ -82,17 +45,15 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
- !strconcat(SSEVer, !strconcat("_",
- !strconcat(OpcodeStr, FPSizeStr))))
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, RC:$src2))]>;
def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
- !strconcat(SSEVer, !strconcat("_",
- !strconcat(OpcodeStr, FPSizeStr))))
+ [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
+ SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, mem_cpat:$src2))]>;
}
@@ -142,17 +103,15 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_",
- !strconcat(SSEVer, !strconcat("_",
- !strconcat(OpcodeStr, FPSizeStr))))
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, RC:$src2))], d>;
def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_",
- !strconcat(SSEVer, !strconcat("_",
- !strconcat(OpcodeStr, FPSizeStr))))
+ [(set RC:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
RC:$src1, (mem_frag addr:$src2)))], d>;
}
@@ -221,6 +180,12 @@ def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
// Implicitly promote a 64-bit scalar to a vector.
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
@@ -403,7 +368,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
string asm_opr> {
def PSrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- !strconcat(!strconcat(base_opc,"s"), asm_opr),
+ !strconcat(base_opc, "s", asm_opr),
[(set RC:$dst,
(mov_frag RC:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
@@ -411,7 +376,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
def PDrm : PI<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, f64mem:$src2),
- !strconcat(!strconcat(base_opc,"d"), asm_opr),
+ !strconcat(base_opc, "d", asm_opr),
[(set RC:$dst, (v2f64 (mov_frag RC:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
SSEPackedDouble>, TB, OpSize;
@@ -598,14 +563,6 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
-multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
- string asm, Domain d> {
- def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (Int SrcRC:$src))], d>;
- def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
-}
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
@@ -618,16 +575,6 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
[(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
}
-multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
- RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
- PatFrag ld_frag, string asm, Domain d> {
- def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
- asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
- def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src2), asm,
- [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
-}
-
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
PatFrag ld_frag, string asm, bit Is2Addr = 1> {
@@ -669,13 +616,11 @@ defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
f32mem, load, "cvtss2si">, XS;
defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
f32mem, load, "cvtss2si{q}">, XS, REX_W;
-defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si">, XD;
-defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
- f128mem, load, "cvtsd2si">, XD, REX_W;
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si{l}">, XD;
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+ f128mem, load, "cvtsd2si{q}">, XD, REX_W;
-defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD,
- REX_W;
let isAsmParserOnly = 1 in {
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
@@ -705,29 +650,6 @@ let Constraints = "$src1 = $dst" in {
"cvtsi2sd">, XD, REX_W;
}
-// Instructions below don't have an AVX form.
-defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
- f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB;
-defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
- f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
- SSEPackedDouble>, TB, OpSize;
-defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
- f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB;
-defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
- f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
- SSEPackedDouble>, TB, OpSize;
-defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
- i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
- SSEPackedDouble>, TB, OpSize;
-let Constraints = "$src1 = $dst" in {
- defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
- int_x86_sse_cvtpi2ps,
- i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- SSEPackedSingle>, TB;
-}
-
/// SSE 1 Only
// Aliases for intrinsics
@@ -738,10 +660,10 @@ defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, f32mem, load,
"cvttss2si">, XS, VEX, VEX_W;
defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttss2si">, XD, VEX;
+ f128mem, load, "cvttsd2si">, XD, VEX;
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
- "cvttss2si">, XD, VEX, VEX_W;
+ "cvttsd2si">, XD, VEX, VEX_W;
}
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
f32mem, load, "cvttss2si">, XS;
@@ -749,10 +671,10 @@ defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse_cvttss2si64, f32mem, load,
"cvttss2si{q}">, XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttss2si">, XD;
+ f128mem, load, "cvttsd2si">, XD;
defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
- "cvttss2si{q}">, XD, REX_W;
+ "cvttsd2si{q}">, XD, REX_W;
let isAsmParserOnly = 1, Pattern = []<dag> in {
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
@@ -790,6 +712,9 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
}
+def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+ Requires<[HasAVX]>;
+
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))]>;
@@ -817,6 +742,9 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
}
+def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
+ Requires<[HasAVX]>;
+
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))]>, XS,
@@ -973,9 +901,13 @@ def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))]>;
def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
let isAsmParserOnly = 1 in {
@@ -990,16 +922,6 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(memop addr:$src)))]>,
XS, VEX, Requires<[HasAVX]>;
}
-def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (int_x86_sse2_cvttps2dq VR128:$src))]>,
- XS, Requires<[HasSSE2]>;
-def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (memop addr:$src)))]>,
- XS, Requires<[HasSSE2]>;
let isAsmParserOnly = 1 in {
def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
@@ -1013,13 +935,13 @@ def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>, VEX;
}
-def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
-def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
- "cvttpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memop addr:$src)))]>;
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>;
let isAsmParserOnly = 1 in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
@@ -1469,9 +1391,11 @@ let AddedComplexity = 10 in {
/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
Domain d> {
- def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set GR32:$dst, (Int RC:$src))], d>;
+ def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
}
// Mask creation
@@ -1522,6 +1446,12 @@ def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
[(set FR64:$dst, fpimm0)]>,
Requires<[HasSSE2]>, TB, OpSize;
+def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasAVX]>, TB, OpSize, VEX_4V;
}
// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
@@ -1654,19 +1584,13 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
let isCommutable = 0 in
defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
// single r+r
- [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
- (bc_v2i64 (v4i32 immAllOnesV))),
- VR128:$src2)))],
+ [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))],
// double r+r
- [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (bc_v2i64 (v2f64 VR128:$src2))))],
+ [],
// single r+m
- [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
- (bc_v2i64 (v4i32 immAllOnesV))),
- (memopv2i64 addr:$src2))))],
+ [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))],
// double r+m
- [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (memopv2i64 addr:$src2)))]]>;
+ []]>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Arithmetic Instructions
@@ -2170,7 +2094,7 @@ def : Pat<(X86SFence), (SFENCE)>;
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
// FIXME: Change encoding to pseudo! This is blocked right now by the x86
-// JIT implementatioan, it does not expand the instructions below like
+// JIT implementation, it does not expand the instructions below like
// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isCodeGenOnly = 1 in {
@@ -2277,6 +2201,10 @@ let neverHasSideEffects = 1 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>;
+def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ []>, XS, Requires<[HasSSE2]>;
+
let canFoldAsLoad = 1, mayLoad = 1 in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
@@ -2606,15 +2534,11 @@ let ExeDomain = SSEPackedInt in {
}
def PANDNrr : PDI<0xDF, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- VR128:$src2)))]>;
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
def PANDNrm : PDI<0xDF, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- (memopv2i64 addr:$src2))))]>;
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
}
} // Constraints = "$src1 = $dst"
@@ -3009,6 +2933,13 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))]>;
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>;
// Move Int Doubleword to Single Scalar
@@ -3051,6 +2982,21 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>;
+def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>;
+def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+
+def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
// Move Scalar Single to Double Int
let isAsmParserOnly = 1 in {
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
@@ -3532,18 +3478,6 @@ let Constraints = "$src1 = $dst" in {
// SSSE3 - Packed Absolute Instructions
//===---------------------------------------------------------------------===//
-/// SS3I_unop_rm_int_mm - Simple SSSE3 unary whose type can be v*{i8,i16,i32}.
-multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag64, Intrinsic IntId64> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst,
- (IntId64 (bitconvert (mem_frag64 addr:$src))))]>;
-}
/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
@@ -3572,19 +3506,11 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in {
}
defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
- int_x86_ssse3_pabs_b_128>,
- SS3I_unop_rm_int_mm<0x1C, "pabsb", memopv8i8,
- int_x86_ssse3_pabs_b>;
-
+ int_x86_ssse3_pabs_b_128>;
defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
- int_x86_ssse3_pabs_w_128>,
- SS3I_unop_rm_int_mm<0x1D, "pabsw", memopv4i16,
- int_x86_ssse3_pabs_w>;
-
+ int_x86_ssse3_pabs_w_128>;
defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
- int_x86_ssse3_pabs_d_128>,
- SS3I_unop_rm_int_mm<0x1E, "pabsd", memopv2i32,
- int_x86_ssse3_pabs_d>;
+ int_x86_ssse3_pabs_d_128>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
@@ -3611,20 +3537,6 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
(IntId128 VR128:$src1,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag64, Intrinsic IntId64> {
- let isCommutable = 1 in
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv8i8 addr:$src2))))]>;
-}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
let isCommutable = 0 in {
@@ -3659,54 +3571,30 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
- int_x86_ssse3_phadd_w_128>,
- SS3I_binop_rm_int_mm<0x01, "phaddw", memopv4i16,
- int_x86_ssse3_phadd_w>;
+ int_x86_ssse3_phadd_w_128>;
defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
- int_x86_ssse3_phadd_d_128>,
- SS3I_binop_rm_int_mm<0x02, "phaddd", memopv2i32,
- int_x86_ssse3_phadd_d>;
+ int_x86_ssse3_phadd_d_128>;
defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
- int_x86_ssse3_phadd_sw_128>,
- SS3I_binop_rm_int_mm<0x03, "phaddsw", memopv4i16,
- int_x86_ssse3_phadd_sw>;
+ int_x86_ssse3_phadd_sw_128>;
defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
- int_x86_ssse3_phsub_w_128>,
- SS3I_binop_rm_int_mm<0x05, "phsubw", memopv4i16,
- int_x86_ssse3_phsub_w>;
+ int_x86_ssse3_phsub_w_128>;
defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
- int_x86_ssse3_phsub_d_128>,
- SS3I_binop_rm_int_mm<0x06, "phsubd", memopv2i32,
- int_x86_ssse3_phsub_d>;
+ int_x86_ssse3_phsub_d_128>;
defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
- int_x86_ssse3_phsub_sw_128>,
- SS3I_binop_rm_int_mm<0x07, "phsubsw", memopv4i16,
- int_x86_ssse3_phsub_sw>;
+ int_x86_ssse3_phsub_sw_128>;
defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
- int_x86_ssse3_pmadd_ub_sw_128>,
- SS3I_binop_rm_int_mm<0x04, "pmaddubsw", memopv8i8,
- int_x86_ssse3_pmadd_ub_sw>;
- defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8,
- int_x86_ssse3_pshuf_b_128>,
- SS3I_binop_rm_int_mm<0x00, "pshufb", memopv8i8,
- int_x86_ssse3_pshuf_b>;
+ int_x86_ssse3_pmadd_ub_sw_128>;
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
+ int_x86_ssse3_pshuf_b_128>;
defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
- int_x86_ssse3_psign_b_128>,
- SS3I_binop_rm_int_mm<0x08, "psignb", memopv8i8,
- int_x86_ssse3_psign_b>;
+ int_x86_ssse3_psign_b_128>;
defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
- int_x86_ssse3_psign_w_128>,
- SS3I_binop_rm_int_mm<0x09, "psignw", memopv4i16,
- int_x86_ssse3_psign_w>;
+ int_x86_ssse3_psign_w_128>;
defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
- int_x86_ssse3_psign_d_128>,
- SS3I_binop_rm_int_mm<0x0A, "psignd", memopv2i32,
- int_x86_ssse3_psign_d>;
+ int_x86_ssse3_psign_d_128>;
}
defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
- int_x86_ssse3_pmul_hr_sw_128>,
- SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", memopv4i16,
- int_x86_ssse3_pmul_hr_sw>;
+ int_x86_ssse3_pmul_hr_sw_128>;
}
def : Pat<(X86pshufb VR128:$src, VR128:$mask),
@@ -3714,19 +3602,17 @@ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
(PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+ (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+ (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+ (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
-multiclass ssse3_palign_mm<string asm> {
- def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2, i8imm:$src3),
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
- def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
-}
-
multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
@@ -3747,28 +3633,9 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
let isAsmParserOnly = 1, Predicates = [HasAVX] in
defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
- defm PALIGN : ssse3_palign<"palignr">,
- ssse3_palign_mm<"palignr">;
+ defm PALIGN : ssse3_palign<"palignr">;
let AddedComplexity = 5 in {
-
-def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
-def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
-
def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
(PALIGNR128rr VR128:$src2, VR128:$src1,
(SHUFFLE_get_palign_imm VR128:$src3))>,
@@ -3792,10 +3659,27 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
//===---------------------------------------------------------------------===//
// Thread synchronization
-def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
- [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
-def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait",
- [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
+let usesCustomInserter = 1 in {
+def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
+ [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
+def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
+ [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
+}
+
+let Uses = [EAX, ECX, EDX] in
+def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
+ Requires<[HasSSE3]>;
+let Uses = [ECX, EAX] in
+def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
+ Requires<[HasSSE3]>;
+
+def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
+def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
+
+def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
+ Requires<[In32BitMode]>;
+def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
+ Requires<[In64BitMode]>;
//===---------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -3811,7 +3695,7 @@ let Predicates = [HasSSE2] in
(CVTSS2SDrm addr:$src)>;
// bit_convert
-let Predicates = [HasSSE2] in {
+let Predicates = [HasXMMInt] in {
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
@@ -3844,6 +3728,10 @@ let Predicates = [HasSSE2] in {
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
}
+let Predicates = [HasAVX] in {
+ def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+}
+
// Move scalar to XMM zero-extended
// movd to XMM register zero-extends
let AddedComplexity = 15 in {
@@ -4017,36 +3905,11 @@ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
-// Some special case pandn patterns.
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
- VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
- VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
- VR128:$src2)),
- (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
- (memop addr:$src2))),
- (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
- (memop addr:$src2))),
- (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
- (memop addr:$src2))),
- (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-
// vector -> vector casts
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
- (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))),
- (Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>;
-def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
- (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
+ (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
// Use movaps / movups for SSE integer load / store (one byte shorter).
let Predicates = [HasSSE1] in {
@@ -4504,7 +4367,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
Intrinsic V4F32Int, Intrinsic V2F64Int> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
- def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
+ def PSr : SS4AIi8<opcps, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4512,7 +4375,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
OpSize;
// Vector intrinsic operation, mem
- def PSm_Int : Ii8<opcps, MRMSrcMem,
+ def PSm : Ii8<opcps, MRMSrcMem,
(outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4522,7 +4385,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
- def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
+ def PDr : SS4AIi8<opcpd, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4530,7 +4393,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
OpSize;
// Vector intrinsic operation, mem
- def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
+ def PDm : SS4AIi8<opcpd, MRMSrcMem,
(outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4543,28 +4406,28 @@ multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
- def PSr : SS4AIi8<opcps, MRMSrcReg,
+ def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// Vector intrinsic operation, mem
- def PSm : Ii8<opcps, MRMSrcMem,
+ def PSm_AVX : Ii8<opcps, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TA, OpSize, Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
- def PDr : SS4AIi8<opcpd, MRMSrcReg,
+ def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// Vector intrinsic operation, mem
- def PDm : SS4AIi8<opcpd, MRMSrcMem,
+ def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -4576,7 +4439,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
Intrinsic F32Int,
Intrinsic F64Int, bit Is2Addr = 1> {
// Intrinsic operation, reg.
- def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+ def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -4587,7 +4450,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
OpSize;
// Intrinsic operation, mem.
- def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+ def SSm : SS4AIi8<opcss, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -4599,7 +4462,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
OpSize;
// Intrinsic operation, reg.
- def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+ def SDr : SS4AIi8<opcsd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -4610,7 +4473,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
OpSize;
// Intrinsic operation, mem.
- def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
+ def SDm : SS4AIi8<opcsd, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -4625,28 +4488,28 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
string OpcodeStr> {
// Intrinsic operation, reg.
- def SSr : SS4AIi8<opcss, MRMSrcReg,
+ def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
// Intrinsic operation, mem.
- def SSm : SS4AIi8<opcss, MRMSrcMem,
+ def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
// Intrinsic operation, reg.
- def SDr : SS4AIi8<opcsd, MRMSrcReg,
+ def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
// Intrinsic operation, mem.
- def SDm : SS4AIi8<opcsd, MRMSrcMem,
+ def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -4743,6 +4606,29 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
// SSE4.1 - Misc Instructions
//===----------------------------------------------------------------------===//
+def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS;
+def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS;
+
+def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop GR32:$src))]>, XS;
+def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS;
+
+def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop GR64:$src))]>, XS;
+def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS;
+
+
+
// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
Intrinsic IntId128> {
@@ -4981,6 +4867,9 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
+ (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
+
let isAsmParserOnly = 1, Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
@@ -5032,12 +4921,12 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
// Packed Compare Implicit Length Strings, Return Mask
multiclass pseudo_pcmpistrm<string asm> {
- def REG : Ii8<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"),
+ def REG : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
imm:$src3))]>;
- def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"),
+ def MEM : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128
VR128:$src1, (load addr:$src2), imm:$src3))]>;
}
@@ -5068,12 +4957,12 @@ let Defs = [XMM0, EFLAGS] in {
// Packed Compare Explicit Length Strings, Return Mask
multiclass pseudo_pcmpestrm<string asm> {
- def REG : Ii8<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"),
+ def REG : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
- def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"),
+ def MEM : PseudoI<(outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
}
@@ -5555,6 +5444,23 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
(VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
@@ -5562,6 +5468,23 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4f32 (VEXTRACTF128rr
+ (v8f32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2f64 (VEXTRACTF128rr
+ (v4f64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTF128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
(VBROADCASTF128 addr:$src)>;
@@ -5673,19 +5596,14 @@ def : Pat<(X86Movddup (memopv2f64 addr:$src)),
def : Pat<(X86Movddup (memopv2f64 addr:$src)),
(MOVDDUPrm addr:$src)>;
-def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
- (MOVDDUPrm addr:$src)>;
-
-def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
(MOVDDUPrm addr:$src)>;
-def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
(MOVDDUPrm addr:$src)>;
def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
@@ -5700,6 +5618,7 @@ def : Pat<(X86Movddup (bc_v2f64
(v2i64 (scalar_to_vector (loadi64 addr:$src))))),
(MOVDDUPrm addr:$src)>;
+
// Shuffle with UNPCKLPS
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
@@ -5724,9 +5643,9 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
// Shuffle with UNPCKLPD
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
- (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+ (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
- (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+ (UNPCKLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
@@ -5735,9 +5654,9 @@ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
// Shuffle with UNPCKHPD
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
- (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+ (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
- (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+ (UNPCKHPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
@@ -5812,10 +5731,18 @@ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
(MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Movddup VR128:$src)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>;
+
// Shuffle with MOVLHPD
def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
+
// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
@@ -5878,31 +5805,18 @@ def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
(MOVSLDUPrm addr:$src)>;
// Shuffle with PSHUFHW
-def : Pat<(v8i16 (X86PShufhwLd addr:$src, (i8 imm:$imm))),
- (PSHUFHWmi addr:$src, imm:$imm)>;
def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
(PSHUFHWri VR128:$src, imm:$imm)>;
def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
(PSHUFHWmi addr:$src, imm:$imm)>;
// Shuffle with PSHUFLW
-def : Pat<(v8i16 (X86PShuflwLd addr:$src, (i8 imm:$imm))),
- (PSHUFLWmi addr:$src, imm:$imm)>;
def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
(PSHUFLWri VR128:$src, imm:$imm)>;
def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
(PSHUFLWmi addr:$src, imm:$imm)>;
// Shuffle with PALIGN
-def : Pat<(v1i64 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
- (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
-def : Pat<(v2i32 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
- (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
-def : Pat<(v4i16 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
- (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
-def : Pat<(v8i8 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
- (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
-
def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
@@ -5920,6 +5834,15 @@ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
def : Pat<(X86Movlps VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(MOVLPSrm VR128:$src1, addr:$src2)>;
+// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+
+def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
// Shuffle with MOVLPD
def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
new file mode 100644
index 0000000..8278568
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -0,0 +1,746 @@
+//===- X86InstrShiftRotate.td - Shift and Rotate Instrs ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the shift and rotate instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: Someone needs to smear multipattern goodness all over this file.
+
+let Defs = [EFLAGS] in {
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "shl{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+ "shl{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+ "shl{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (shl GR32:$src1, CL))]>;
+def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+ "shl{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (shl GR64:$src1, CL))]>;
+} // Uses = [CL]
+
+def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "shl{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shl{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shl{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
+def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "shl{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+
+// NOTE: We don't include patterns for shifts of a register by one, because
+// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
+def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
+ "shl{b}\t$dst", []>;
+def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+ "shl{w}\t$dst", []>, OpSize;
+def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+ "shl{l}\t$dst", []>;
+def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+ "shl{q}\t$dst", []>;
+} // isConvertibleToThreeAddress = 1
+} // Constraints = "$src = $dst"
+
+
+// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
+// using CL?
+let Uses = [CL] in {
+def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t{%cl, $dst|$dst, CL}",
+ [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shl{b}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shl{w}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shl{l}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shl{q}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t$dst",
+ [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t$dst",
+ [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t$dst",
+ [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t$dst",
+ [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "shr{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+ "shr{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+ "shr{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (srl GR32:$src1, CL))]>;
+def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (srl GR64:$src1, CL))]>;
+}
+
+def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "shr{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
+def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shr{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shr{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "shr{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift right by 1
+def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
+ "shr{b}\t$dst",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+ "shr{w}\t$dst",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
+def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+ "shr{l}\t$dst",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
+def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t$dst",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
+ OpSize;
+def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t{%cl, $dst|$dst, CL}",
+ [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shr{b}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shr{w}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shr{l}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shr{q}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t$dst",
+ [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t$dst",
+ [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
+def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t$dst",
+ [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t$dst",
+ [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "sar{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+ "sar{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+ "sar{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (sra GR32:$src1, CL))]>;
+def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+ "sar{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (sra GR64:$src1, CL))]>;
+}
+
+def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "sar{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
+def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "sar{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "sar{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
+def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "sar{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "sar{b}\t$dst",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+ "sar{w}\t$dst",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
+def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+ "sar{l}\t$dst",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
+def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+ "sar{q}\t$dst",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t{%cl, $dst|$dst, CL}",
+ [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "sar{b}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "sar{w}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "sar{l}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "sar{q}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t$dst",
+ [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t$dst",
+ [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t$dst",
+ [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t$dst",
+ [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Rotate instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcl{b}\t$dst", []>;
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcl{w}\t$dst", []>, OpSize;
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcl{l}\t$dst", []>;
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcl{q}\t$dst", []>;
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcr{b}\t$dst", []>;
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcr{w}\t$dst", []>, OpSize;
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcr{l}\t$dst", []>;
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcr{q}\t$dst", []>;
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+
+} // Constraints = "$src = $dst"
+
+def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
+ "rcl{b}\t$dst", []>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+ "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
+ "rcl{w}\t$dst", []>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+ "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
+ "rcl{l}\t$dst", []>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+ "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
+ "rcl{q}\t$dst", []>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+ "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
+ "rcr{b}\t$dst", []>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+ "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
+ "rcr{w}\t$dst", []>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+ "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
+ "rcr{l}\t$dst", []>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+ "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
+ "rcr{q}\t$dst", []>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+ "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+let Uses = [CL] in {
+def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
+ "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
+ "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
+ "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
+ "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
+ "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
+ "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
+ "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
+ "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+}
+
+let Constraints = "$src1 = $dst" in {
+// FIXME: provide shorter instructions when imm8 == 1
+let Uses = [CL] in {
+def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "rol{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "rol{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "rol{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
+def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+ "rol{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
+}
+
+def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "rol{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "rol{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "rol{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
+def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "rol{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "rol{b}\t$dst",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "rol{w}\t$dst",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
+def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "rol{l}\t$dst",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
+def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+ "rol{q}\t$dst",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t{%cl, $dst|$dst, CL}",
+ [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
+ "rol{b}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
+ "rol{w}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>,
+ OpSize;
+def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
+ "rol{l}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
+ "rol{q}\t{$src1, $dst|$dst, $src1}",
+ [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+
+// Rotate by 1
+def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t$dst",
+ [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t$dst",
+ [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t$dst",
+ [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t$dst",
+ [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "ror{b}\t{%cl, $dst|$dst, CL}",
+ [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "ror{w}\t{%cl, $dst|$dst, CL}",
+ [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "ror{l}\t{%cl, $dst|$dst, CL}",
+ [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
+def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+ "ror{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
+}
+
+def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "ror{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
+def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "ror{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "ror{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
+def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
+ (ins GR64:$src1, i8imm:$src2),
+ "ror{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "ror{b}\t$dst",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "ror{w}\t$dst",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
+def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "ror{l}\t$dst",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
+def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+ "ror{q}\t$dst",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t{%cl, $dst|$dst, CL}",
+ [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "ror{b}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "ror{w}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "ror{l}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "ror{q}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Rotate by 1
+def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t$dst",
+ [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t$dst",
+ [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t$dst",
+ [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t$dst",
+ [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+
+//===----------------------------------------------------------------------===//
+// Double shift instructions (generalizations of rotate)
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+
+let Uses = [CL] in {
+def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+ TB, OpSize;
+def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+ TB, OpSize;
+def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>,
+ TB;
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>,
+ TB;
+}
+
+let isCommutable = 1 in { // These instructions commute to each other.
+def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
+ (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
+ (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+}
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize;
+def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize;
+
+def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>, TB;
+def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>, TB;
+
+def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
+def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
+}
+
+def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+
+def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+
+def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+
+} // Defs = [EFLAGS]
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
new file mode 100644
index 0000000..1a58ba0
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -0,0 +1,390 @@
+//===- X86InstrSystem.td - System Instructions -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instructions that are generally used in
+// privileged modes. These are not typically used by the compiler, but are
+// supported for the assembler and disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+let Defs = [RAX, RDX] in
+ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
+
+let Defs = [RAX, RCX, RDX] in
+ def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+
+// CPU flow control instructions
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
+ def UD2B : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB;
+}
+
+def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
+def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
+
+// Interrupt and SysCall Instructions.
+let Uses = [EFLAGS] in
+ def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
+ [(int_x86_int (i8 3))]>;
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+ [(int_x86_int imm:$trap)]>;
+
+def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
+def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
+def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+ Requires<[In64BitMode]>;
+
+def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
+
+def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+ Requires<[In32BitMode]>;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+ Requires<[In64BitMode]>;
+
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize;
+def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
+def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
+ Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+// Input/Output Instructions.
+//
+let Defs = [AL], Uses = [DX] in
+def IN8rr : I<0xEC, RawFrm, (outs), (ins),
+ "in{b}\t{%dx, %al|%AL, %DX}", []>;
+let Defs = [AX], Uses = [DX] in
+def IN16rr : I<0xED, RawFrm, (outs), (ins),
+ "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize;
+let Defs = [EAX], Uses = [DX] in
+def IN32rr : I<0xED, RawFrm, (outs), (ins),
+ "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
+
+let Defs = [AL] in
+def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
+ "in{b}\t{$port, %al|%AL, $port}", []>;
+let Defs = [AX] in
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+ "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
+let Defs = [EAX] in
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+ "in{l}\t{$port, %eax|%EAX, $port}", []>;
+
+let Uses = [DX, AL] in
+def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
+ "out{b}\t{%al, %dx|%DX, %AL}", []>;
+let Uses = [DX, AX] in
+def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
+let Uses = [DX, EAX] in
+def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
+
+let Uses = [AL] in
+def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
+ "out{b}\t{%al, $port|$port, %AL}", []>;
+let Uses = [AX] in
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+ "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
+let Uses = [EAX] in
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+ "out{l}\t{%eax, $port|$port, %EAX}", []>;
+
+def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>;
+def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>, OpSize;
+def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", []>;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from debug registers
+
+def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from control registers
+
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Segment override instruction prefixes
+
+def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
+def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
+def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
+def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
+def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
+def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Moves to and from segment registers.
+//
+
+def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+//===----------------------------------------------------------------------===//
+// Segmentation support instructions.
+
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
+
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
+
+def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB;
+def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB;
+def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
+ "ltr{w}\t{$src}", []>, TB;
+def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
+ "ltr{w}\t{$src}", []>, TB;
+
+def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
+ "push{w}\t%cs", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins),
+ "push{l}\t%cs", []>, Requires<[In32BitMode]>;
+def PUSHSS16 : I<0x16, RawFrm, (outs), (ins),
+ "push{w}\t%ss", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHSS32 : I<0x16, RawFrm, (outs), (ins),
+ "push{l}\t%ss", []>, Requires<[In32BitMode]>;
+def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins),
+ "push{w}\t%ds", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins),
+ "push{l}\t%ds", []>, Requires<[In32BitMode]>;
+def PUSHES16 : I<0x06, RawFrm, (outs), (ins),
+ "push{w}\t%es", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHES32 : I<0x06, RawFrm, (outs), (ins),
+ "push{l}\t%es", []>, Requires<[In32BitMode]>;
+
+def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
+ "push{w}\t%fs", []>, OpSize, TB;
+def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
+ "push{l}\t%fs", []>, TB, Requires<[In32BitMode]>;
+def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
+ "push{w}\t%gs", []>, OpSize, TB;
+def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
+ "push{l}\t%gs", []>, TB, Requires<[In32BitMode]>;
+
+def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
+ "push{q}\t%fs", []>, TB;
+def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
+ "push{q}\t%gs", []>, TB;
+
+// No "pop cs" instruction.
+def POPSS16 : I<0x17, RawFrm, (outs), (ins),
+ "pop{w}\t%ss", []>, OpSize, Requires<[In32BitMode]>;
+def POPSS32 : I<0x17, RawFrm, (outs), (ins),
+ "pop{l}\t%ss", []> , Requires<[In32BitMode]>;
+
+def POPDS16 : I<0x1F, RawFrm, (outs), (ins),
+ "pop{w}\t%ds", []>, OpSize, Requires<[In32BitMode]>;
+def POPDS32 : I<0x1F, RawFrm, (outs), (ins),
+ "pop{l}\t%ds", []> , Requires<[In32BitMode]>;
+
+def POPES16 : I<0x07, RawFrm, (outs), (ins),
+ "pop{w}\t%es", []>, OpSize, Requires<[In32BitMode]>;
+def POPES32 : I<0x07, RawFrm, (outs), (ins),
+ "pop{l}\t%es", []> , Requires<[In32BitMode]>;
+
+def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{w}\t%fs", []>, OpSize, TB;
+def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{l}\t%fs", []>, TB , Requires<[In32BitMode]>;
+def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
+ "pop{q}\t%fs", []>, TB;
+
+def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{w}\t%gs", []>, OpSize, TB;
+def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{l}\t%gs", []>, TB , Requires<[In32BitMode]>;
+def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
+ "pop{q}\t%gs", []>, TB;
+
+
+def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lds{l}\t{$src, $dst|$dst, $src}", []>;
+
+def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "les{l}\t{$src, $dst|$dst, $src}", []>;
+
+def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+ "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+ "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+ "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+
+def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
+ "verr\t$seg", []>, TB;
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
+ "verr\t$seg", []>, TB;
+def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
+ "verw\t$seg", []>, TB;
+def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
+ "verw\t$seg", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Descriptor-table support instructions
+
+def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+ "sgdtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+ "sgdt\t$dst", []>, TB;
+def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+ "sidtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+ "sidt\t$dst", []>, TB;
+def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
+ "sldt{w}\t$dst", []>, TB, OpSize;
+def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
+ "sldt{w}\t$dst", []>, TB;
+def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
+ "sldt{l}\t$dst", []>, TB;
+
+// LLDT is not interpreted specially in 64-bit mode because there is no sign
+// extension.
+def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
+ "sldt{q}\t$dst", []>, TB;
+def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
+ "sldt{q}\t$dst", []>, TB;
+
+def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+ "lgdtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+ "lgdt\t$src", []>, TB;
+def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+ "lidtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+ "lidt\t$src", []>, TB;
+def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
+ "lldt{w}\t$src", []>, TB;
+def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
+ "lldt{w}\t$src", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Specialized register support
+def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
+def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
+def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
+
+def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
+ "smsw{w}\t$dst", []>, OpSize, TB;
+def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
+ "smsw{l}\t$dst", []>, TB;
+// no m form encodable; use SMSW16m
+def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
+ "smsw{q}\t$dst", []>, TB;
+
+// For memory operands, there is only a 16-bit form
+def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
+ "smsw{w}\t$dst", []>, TB;
+
+def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
+ "lmsw{w}\t$src", []>, TB;
+def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
+ "lmsw{w}\t$src", []>, TB;
+
+def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Cache instructions
+def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrVMX.td b/contrib/llvm/lib/Target/X86/X86InstrVMX.td
new file mode 100644
index 0000000..daf61e4
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrVMX.td
@@ -0,0 +1,54 @@
+//===- X86InstrVMX.td - VMX Instruction Set Extension ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel VMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VMX instructions
+
+// 66 0F 38 80
+def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
+// 66 0F 38 81
+def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
+// 0F 01 C1
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
+def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+ "vmclear\t$vmcs", []>, OpSize, TB;
+// 0F 01 C2
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
+// 0F 01 C3
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
+def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+ "vmptrld\t$vmcs", []>, TB;
+def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
+ "vmptrst\t$vmcs", []>, TB;
+def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// 0F 01 C4
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
+def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
+ "vmxon\t{$vmxon}", []>, XS;
+
diff --git a/contrib/llvm/lib/Target/X86/X86JITInfo.cpp b/contrib/llvm/lib/Target/X86/X86JITInfo.cpp
index 6f0a8d9..3f88fa6 100644
--- a/contrib/llvm/lib/Target/X86/X86JITInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86JITInfo.cpp
@@ -19,7 +19,7 @@
#include "llvm/Function.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/System/Valgrind.h"
+#include "llvm/Support/Valgrind.h"
#include <cstdlib>
#include <cstring>
using namespace llvm;
@@ -127,9 +127,17 @@ extern "C" {
"movaps %xmm6, 96(%rsp)\n"
"movaps %xmm7, 112(%rsp)\n"
// JIT callee
+#ifdef _WIN64
+ "subq $32, %rsp\n"
+ "movq %rbp, %rcx\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rdx\n"
+ "call " ASMPREFIX "X86CompilationCallback2\n"
+ "addq $32, %rsp\n"
+#else
"movq %rbp, %rdi\n" // Pass prev frame and return address
"movq 8(%rbp), %rsi\n"
"call " ASMPREFIX "X86CompilationCallback2\n"
+#endif
// Restore all XMM arg registers
"movaps 112(%rsp), %xmm7\n"
"movaps 96(%rsp), %xmm6\n"
@@ -333,11 +341,11 @@ extern "C" {
extern "C" {
#if !(defined (X86_64_JIT) && defined(_MSC_VER))
// the following function is called only from this translation unit,
- // unless we are under 64bit Windows with MSC, where there is
+ // unless we are under 64bit Windows with MSC, where there is
// no support for inline assembly
static
#endif
-void ATTRIBUTE_USED
+void LLVM_ATTRIBUTE_USED
X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
intptr_t *RetAddrLoc = &StackPtr[1];
assert(*RetAddrLoc == RetAddr &&
@@ -462,7 +470,7 @@ TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
JITCodeEmitter &JCE) {
- // Note, we cast to intptr_t here to silence a -pedantic warning that
+ // Note, we cast to intptr_t here to silence a -pedantic warning that
// complains about casting a function pointer to a normal pointer.
#if defined (X86_32_JIT) && !defined (_MSC_VER)
bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
diff --git a/contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp
index 36badb4..6686214 100644
--- a/contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
enum AsmWriterFlavorTy {
@@ -68,7 +69,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
DwarfUsesInlineInfoSection = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::Dwarf;
+ ExceptionsType = ExceptionHandling::DwarfTable;
}
X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
@@ -88,8 +89,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
SupportsDebugInformation = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::Dwarf;
-
+ ExceptionsType = ExceptionHandling::DwarfTable;
+
// OpenBSD has buggy support for .quad in 32-bit mode, just split into two
// .words.
if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
@@ -98,13 +99,15 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
const MCSection *X86ELFMCAsmInfo::
getNonexecutableStackSection(MCContext &Ctx) const {
- return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS,
- 0, SectionKind::getMetadata(), false);
+ return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+ 0, SectionKind::getMetadata());
}
X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
- if (Triple.getArch() == Triple::x86_64)
+ if (Triple.getArch() == Triple::x86_64) {
GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ }
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
diff --git a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
index 9564fe0..e6dc74e 100644
--- a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -11,13 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "x86-emitter"
+#define DEBUG_TYPE "mccodeemitter"
#include "X86.h"
#include "X86InstrInfo.h"
#include "X86FixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -37,27 +38,6 @@ public:
~X86MCCodeEmitter() {}
- unsigned getNumFixupKinds() const {
- return 5;
- }
-
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
- const static MCFixupKindInfo Infos[] = {
- { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
- { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }
- };
-
- if (Kind < FirstTargetFixupKind)
- return MCCodeEmitter::getFixupKindInfo(Kind);
-
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
- }
-
static unsigned GetX86RegNum(const MCOperand &MO) {
return X86RegisterInfo::getX86RegNum(MO.getReg());
}
@@ -170,41 +150,77 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
unsigned Size = X86II::getSizeOfImm(TSFlags);
bool isPCRel = X86II::isImmPCRel(TSFlags);
- switch (Size) {
- default: assert(0 && "Unknown immediate size");
- case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1;
- case 2: return isPCRel ? MCFixupKind(X86::reloc_pcrel_2byte) : FK_Data_2;
- case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4;
- case 8: assert(!isPCRel); return FK_Data_8;
- }
+ return MCFixup::getKindForSize(Size, isPCRel);
+}
+
+/// Is32BitMemOperand - Return true if the specified instruction with a memory
+/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
+/// memory operand. Op specifies the operand # of the memoperand.
+static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg())))
+ return true;
+ return false;
}
+/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
+/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
+/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
+/// of a binary expression.
+static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+ if (Expr->getKind() == MCExpr::Binary) {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
+ Expr = BE->getLHS();
+ }
+
+ if (Expr->getKind() != MCExpr::SymbolRef)
+ return false;
+
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const MCSymbol &S = Ref->getSymbol();
+ return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+}
void X86MCCodeEmitter::
EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
unsigned &CurByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
- // If this is a simple integer displacement that doesn't require a relocation,
- // emit it now.
+ const MCExpr *Expr = NULL;
if (DispOp.isImm()) {
- // FIXME: is this right for pc-rel encoding?? Probably need to emit this as
- // a fixup if so.
- EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
- return;
+ // If this is a simple integer displacement that doesn't require a relocation,
+ // emit it now.
+ if (FixupKind != FK_PCRel_1 &&
+ FixupKind != FK_PCRel_2 &&
+ FixupKind != FK_PCRel_4) {
+ EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
+ return;
+ }
+ Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx);
+ } else {
+ Expr = DispOp.getExpr();
}
// If we have an immoffset, add it to the expression.
- const MCExpr *Expr = DispOp.getExpr();
+ if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) {
+ assert(ImmOffset == 0);
+
+ FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+ ImmOffset = CurByte;
+ }
// If the fixup is pc-relative, we need to bias the value to be relative to
// the start of the field, not the end of the field.
- if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
+ if (FixupKind == FK_PCRel_4 ||
FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
ImmOffset -= 4;
- if (FixupKind == MCFixupKind(X86::reloc_pcrel_2byte))
+ if (FixupKind == FK_PCRel_2)
ImmOffset -= 2;
- if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte))
+ if (FixupKind == FK_PCRel_1)
ImmOffset -= 1;
if (ImmOffset)
@@ -221,10 +237,10 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const{
- const MCOperand &Disp = MI.getOperand(Op+3);
- const MCOperand &Base = MI.getOperand(Op);
- const MCOperand &Scale = MI.getOperand(Op+1);
- const MCOperand &IndexReg = MI.getOperand(Op+2);
+ const MCOperand &Disp = MI.getOperand(Op+X86::AddrDisp);
+ const MCOperand &Base = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &Scale = MI.getOperand(Op+X86::AddrScaleAmt);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
unsigned BaseReg = Base.getReg();
// Handle %rip relative addressing.
@@ -238,8 +254,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// movq loads are handled with a special relocation form which allows the
// linker to eliminate some loads for GOT references which end up in the
// same linkage unit.
- if (MI.getOpcode() == X86::MOV64rm ||
- MI.getOpcode() == X86::MOV64rm_TC)
+ if (MI.getOpcode() == X86::MOV64rm)
FixupKind = X86::reloc_riprel_4byte_movq_load;
// rip-relative addressing is actually relative to the *next* instruction.
@@ -295,7 +310,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
- EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+ EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+ Fixups);
return;
}
@@ -355,7 +371,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
if (ForceDisp8)
EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
else if (ForceDisp32 || Disp.getImm() != 0)
- EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+ EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+ Fixups);
}
/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
@@ -708,14 +725,15 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if ((TSFlags & X86II::Op0Mask) == X86II::REP)
EmitByte(0xF3, CurByte, OS);
+ // Emit the address size opcode prefix as needed.
+ if ((TSFlags & X86II::AdSize) ||
+ (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand)))
+ EmitByte(0x67, CurByte, OS);
+
// Emit the operand size opcode prefix as needed.
if (TSFlags & X86II::OpSize)
EmitByte(0x66, CurByte, OS);
- // Emit the address size opcode prefix as needed.
- if (TSFlags & X86II::AdSize)
- EmitByte(0x67, CurByte, OS);
-
bool Need0FPrefix = false;
switch (TSFlags & X86II::Op0Mask) {
default: assert(0 && "Invalid prefix!");
@@ -806,6 +824,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if ((TSFlags >> 32) & X86II::VEX_4V)
HasVEX_4V = true;
+
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
if (MemoryOperand != -1) MemoryOperand += CurOp;
@@ -815,7 +834,12 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
else
EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+
unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+
+ if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+ BaseOpcode = 0x0F; // Weird 3DNow! encoding.
+
unsigned SrcRegNum = 0;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg:
@@ -828,6 +852,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(BaseOpcode, CurByte, OS);
break;
+ case X86II::RawFrmImm8:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups);
+ break;
case X86II::RawFrmImm16:
EmitByte(BaseOpcode, CurByte, OS);
EmitImmediate(MI.getOperand(CurOp++),
@@ -963,12 +994,24 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
RegNum |= GetX86RegNum(MO) << 4;
EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
Fixups);
- } else
+ } else {
+ unsigned FixupKind;
+ // FIXME: Is there a better way to know that we need a signed relocation?
+ if (MI.getOpcode() == X86::MOV64ri32 ||
+ MI.getOpcode() == X86::MOV64mi32 ||
+ MI.getOpcode() == X86::PUSH64i32)
+ FixupKind = X86::reloc_signed_4byte;
+ else
+ FixupKind = getImmFixupKind(TSFlags);
EmitImmediate(MI.getOperand(CurOp++),
- X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind),
CurByte, OS, Fixups);
+ }
}
+ if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+ EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+
#ifndef NDEBUG
// FIXME: Verify.
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index 8c4620f..cbe6db2 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "InstPrinter/X86ATTInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86AsmPrinter.h"
#include "X86COFFMachineModuleInfo.h"
@@ -38,11 +39,6 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
}
-MCSymbol *X86MCInstLower::GetPICBaseSymbol() const {
- return static_cast<const X86TargetLowering*>(TM.getTargetLowering())->
- getPICBaseSymbol(&MF, Ctx);
-}
-
/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
/// operand to an MCSymbol.
MCSymbol *X86MCInstLower::
@@ -154,7 +150,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
// Subtract the pic base.
Expr = MCBinaryExpr::CreateSub(Expr,
- MCSymbolRefExpr::Create(GetPICBaseSymbol(),
+ MCSymbolRefExpr::Create(MF.getPICBaseSymbol(),
Ctx),
Ctx);
break;
@@ -173,7 +169,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Expr = MCSymbolRefExpr::Create(Sym, Ctx);
// Subtract the pic base.
Expr = MCBinaryExpr::CreateSub(Expr,
- MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx),
+ MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
Ctx);
if (MO.isJTI() && MAI.hasSetDirective()) {
// If .set directive is supported, use it to reduce the number of
@@ -326,8 +322,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MO.getMBB()->getSymbol(), Ctx));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
- break;
case MachineOperand::MO_ExternalSymbol:
MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
break;
@@ -347,6 +341,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
}
// Handle a few special cases to eliminate operand modifiers.
+ReSimplify:
switch (OutMI.getOpcode()) {
case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
lower_lea64_32mem(&OutMI, 1);
@@ -377,11 +372,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
- case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break;
- case X86::MMX_V_SETALLONES:
- LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+ case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
@@ -417,6 +411,13 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
}
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ OutMI = MCInst();
+ OutMI.setOpcode(X86::RET);
+ break;
+ }
+
// TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
case X86::TAILJMPr:
case X86::TAILJMPd:
@@ -436,6 +437,19 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
}
+ // These are pseudo-ops for OR to help with the OR->ADD transformation. We do
+ // this with an ugly goto in case the resultant OR uses EAX and needs the
+ // short form.
+ case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
+ case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
+ case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
+ case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
+ case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
+ case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
+ case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
+ case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
+ case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
+
// The assembler backend wants to see branches in their small form and relax
// them to their large form. The JIT can only handle the large form because
// it does not do relaxation. For now, translate the large form to the
@@ -513,6 +527,66 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
}
}
+static void LowerTlsAddr(MCStreamer &OutStreamer,
+ X86MCInstLower &MCInstLowering,
+ const MachineInstr &MI) {
+ bool is64Bits = MI.getOpcode() == X86::TLS_addr64;
+ MCContext &context = OutStreamer.getContext();
+
+ if (is64Bits) {
+ MCInst prefix;
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ }
+ MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
+ const MCSymbolRefExpr *symRef =
+ MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context);
+
+ MCInst LEA;
+ if (is64Bits) {
+ LEA.setOpcode(X86::LEA64r);
+ LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest
+ LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(0)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ } else {
+ LEA.setOpcode(X86::LEA32r);
+ LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
+ LEA.addOperand(MCOperand::CreateReg(0)); // base
+ LEA.addOperand(MCOperand::CreateImm(1)); // scale
+ LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index
+ LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp
+ LEA.addOperand(MCOperand::CreateReg(0)); // seg
+ }
+ OutStreamer.EmitInstruction(LEA);
+
+ if (is64Bits) {
+ MCInst prefix;
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ prefix.setOpcode(X86::DATA16_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ prefix.setOpcode(X86::REX64_PREFIX);
+ OutStreamer.EmitInstruction(prefix);
+ }
+
+ MCInst call;
+ if (is64Bits)
+ call.setOpcode(X86::CALL64pcrel32);
+ else
+ call.setOpcode(X86::CALLpcrel32);
+ StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
+ MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
+ const MCSymbolRefExpr *tlsRef =
+ MCSymbolRefExpr::Create(tlsGetAddr,
+ MCSymbolRefExpr::VK_PLT,
+ context);
+
+ call.addOperand(MCOperand::CreateExpr(tlsRef));
+ OutStreamer.EmitInstruction(call);
+}
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(Mang, *MF, *this);
@@ -532,13 +606,26 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
return;
+
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64: {
+ // Lower these as normal, but add some comments.
+ unsigned Reg = MI->getOperand(0).getReg();
+ OutStreamer.AddComment(StringRef("eh_return, addr: %") +
+ X86ATTInstPrinter::getRegisterName(Reg));
+ break;
+ }
case X86::TAILJMPr:
case X86::TAILJMPd:
case X86::TAILJMPd64:
// Lower these as normal, but add some comments.
OutStreamer.AddComment("TAILCALL");
break;
-
+
+ case X86::TLS_addr32:
+ case X86::TLS_addr64:
+ return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
+
case X86::MOVPC32r: {
MCInst TmpInst;
// This is a pseudo op for a two instruction sequence with a label, which
@@ -548,7 +635,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
// popl %esi
// Emit the call.
- MCSymbol *PICBase = MCInstLowering.GetPICBaseSymbol();
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
TmpInst.setOpcode(X86::CALLpcrel32);
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
@@ -586,7 +673,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
const MCExpr *PICBase =
- MCSymbolRefExpr::Create(MCInstLowering.GetPICBaseSymbol(), OutContext);
+ MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.h b/contrib/llvm/lib/Target/X86/X86MCInstLower.h
index 539b09b..0210072 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.h
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.h
@@ -40,8 +40,6 @@ public:
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
- MCSymbol *GetPICBaseSymbol() const;
-
MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
diff --git a/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp
new file mode 100644
index 0000000..8f3dd32
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86MachObjectWriter.cpp
@@ -0,0 +1,32 @@
+//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+using namespace llvm;
+
+namespace {
+class X86MachObjectWriter : public MCMachObjectTargetWriter {
+public:
+ X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
+ uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+};
+}
+
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
+ CPUType,
+ CPUSubtype),
+ OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
index fedd49e..2f6bd88 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -31,7 +31,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -41,7 +41,7 @@
#include "llvm/Support/CommandLine.h"
using namespace llvm;
-static cl::opt<bool>
+cl::opt<bool>
ForceStackAlign("force-align-stack",
cl::desc("Force align the stack to the minimum alignment"
" needed for the function."),
@@ -60,7 +60,7 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
Is64Bit = Subtarget->is64Bit();
IsWin64 = Subtarget->isTargetWin64();
- StackAlign = TM.getFrameInfo()->getStackAlignment();
+ StackAlign = TM.getFrameLowering()->getStackAlignment();
if (Is64Bit) {
SlotSize = 8;
@@ -159,46 +159,21 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::YMM7: case X86::YMM15: case X86::MM7:
return 7;
- case X86::ES:
- return 0;
- case X86::CS:
- return 1;
- case X86::SS:
- return 2;
- case X86::DS:
- return 3;
- case X86::FS:
- return 4;
- case X86::GS:
- return 5;
-
- case X86::CR0:
- return 0;
- case X86::CR1:
- return 1;
- case X86::CR2:
- return 2;
- case X86::CR3:
- return 3;
- case X86::CR4:
- return 4;
-
- case X86::DR0:
- return 0;
- case X86::DR1:
- return 1;
- case X86::DR2:
- return 2;
- case X86::DR3:
- return 3;
- case X86::DR4:
- return 4;
- case X86::DR5:
- return 5;
- case X86::DR6:
- return 6;
- case X86::DR7:
- return 7;
+ case X86::ES: return 0;
+ case X86::CS: return 1;
+ case X86::SS: return 2;
+ case X86::DS: return 3;
+ case X86::FS: return 4;
+ case X86::GS: return 5;
+
+ case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
+ case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
+ case X86::CR2: case X86::CR10: case X86::DR2: return 2;
+ case X86::CR3: case X86::CR11: case X86::DR3: return 3;
+ case X86::CR4: case X86::CR12: case X86::DR4: return 4;
+ case X86::CR5: case X86::CR13: case X86::DR5: return 5;
+ case X86::CR6: case X86::CR14: case X86::DR6: return 6;
+ case X86::CR7: case X86::CR15: case X86::DR7: return 7;
// Pseudo index registers are equivalent to a "none"
// scaled index (See Intel Manual 2A, table 2-3)
@@ -295,9 +270,14 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
}
break;
case X86::sub_32bit:
- if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
+ if (B == &X86::GR32RegClass) {
if (A->getSize() == 8)
return A;
+ } else if (B == &X86::GR32_NOSPRegClass) {
+ if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass)
+ return &X86::GR64_NOSPRegClass;
+ if (A->getSize() == 8)
+ return getCommonSubClass(A, &X86::GR64_NOSPRegClass);
} else if (B == &X86::GR32_ABCDRegClass) {
if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
A == &X86::GR64_NOREXRegClass ||
@@ -336,10 +316,16 @@ X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return &X86::GR64RegClass;
return &X86::GR32RegClass;
- case 1: // Normal GRPs except the stack pointer (for encoding reasons).
+ case 1: // Normal GPRs except the stack pointer (for encoding reasons).
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
+ case 2: // Available for tailcall (not callee-saved GPRs).
+ if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
+ return &X86::GR64_TCW64RegClass;
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ return &X86::GR64_TCRegClass;
+ return &X86::GR32_TCRegClass;
}
}
@@ -408,6 +394,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
// Set the stack-pointer register and its aliases as reserved.
Reserved.set(X86::RSP);
Reserved.set(X86::ESP);
@@ -420,7 +408,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(X86::IP);
// Set the frame-pointer register and its aliases as reserved if needed.
- if (hasFP(MF)) {
+ if (TFI->hasFP(MF)) {
Reserved.set(X86::RBP);
Reserved.set(X86::EBP);
Reserved.set(X86::BP);
@@ -445,21 +433,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-/// hasFP - Return true if the specified function should have a dedicated frame
-/// pointer register. This is true if the function has variable sized allocas
-/// or if frame pointer elimination is disabled.
-bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const MachineModuleInfo &MMI = MF.getMMI();
-
- return (DisableFramePointerElim(MF) ||
- needsStackRealignment(MF) ||
- MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken() ||
- MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
- MMI.callsUnwindInit());
-}
-
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return (RealignStack &&
@@ -478,62 +451,25 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
if (0 && requiresRealignment && MFI->hasVarSizedObjects())
report_fatal_error(
"Stack realignment in presense of dynamic allocas is not supported");
-
+
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
return canRealignStack(MF);
-
- return requiresRealignment && canRealignStack(MF);
-}
-bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
- return !MF.getFrameInfo()->hasVarSizedObjects();
+ return requiresRealignment && canRealignStack(MF);
}
bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
- if (Reg == FramePtr && hasFP(MF)) {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (Reg == FramePtr && TFI->hasFP(MF)) {
FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
return true;
}
return false;
}
-int
-X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
- uint64_t StackSize = MFI->getStackSize();
-
- if (needsStackRealignment(MF)) {
- if (FI < 0) {
- // Skip the saved EBP.
- Offset += SlotSize;
- } else {
- unsigned Align = MFI->getObjectAlignment(FI);
- assert((-(Offset + StackSize)) % Align == 0);
- Align = 0;
- return Offset + StackSize;
- }
- // FIXME: Support tail calls
- } else {
- if (!hasFP(MF))
- return Offset + StackSize;
-
- // Skip the saved EBP.
- Offset += SlotSize;
-
- // Skip the RETADDR move area
- const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta < 0)
- Offset -= TailCallReturnAddrDelta;
- }
-
- return Offset;
-}
-
static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
if (is64Bit) {
if (isInt<8>(Imm))
@@ -561,69 +497,70 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
void X86RegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (!hasReservedCallFrame(MF)) {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
+ int Opcode = I->getOpcode();
+ bool isDestroy = Opcode == getCallFrameDestroyOpcode();
+ DebugLoc DL = I->getDebugLoc();
+ uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+ uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
+ I = MBB.erase(I);
+
+ if (!reseveCallFrame) {
// If the stack pointer can be changed after prologue, turn the
// adjcallstackup instruction into a 'sub ESP, <amt>' and the
// adjcallstackdown instruction into 'add ESP, <amt>'
// TODO: consider using push / pop instead of sub + store / add
- MachineInstr *Old = I;
- uint64_t Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
-
- MachineInstr *New = 0;
- if (Old->getOpcode() == getCallFrameSetupOpcode()) {
- New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(getSUBriOpcode(Is64Bit, Amount)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(Amount);
- } else {
- assert(Old->getOpcode() == getCallFrameDestroyOpcode());
-
- // Factor out the amount the callee already popped.
- uint64_t CalleeAmt = Old->getOperand(1).getImm();
- Amount -= CalleeAmt;
-
- if (Amount) {
- unsigned Opc = getADDriOpcode(Is64Bit, Amount);
- New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(Amount);
- }
- }
+ if (Amount == 0)
+ return;
+
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+ MachineInstr *New = 0;
+ if (Opcode == getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
+ StackPtr)
+ .addReg(StackPtr)
+ .addImm(Amount);
+ } else {
+ assert(Opcode == getCallFrameDestroyOpcode());
- if (New) {
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
+ // Factor out the amount the callee already popped.
+ Amount -= CalleeAmt;
- // Replace the pseudo instruction with a new instruction.
- MBB.insert(I, New);
+ if (Amount) {
+ unsigned Opc = getADDriOpcode(Is64Bit, Amount);
+ New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
}
}
- } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
- // If we are performing frame pointer elimination and if the callee pops
- // something off the stack pointer, add it back. We do this until we have
- // more advanced stack pointer tracking ability.
- if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
- unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
- MachineInstr *Old = I;
- MachineInstr *New =
- BuildMI(MF, Old->getDebugLoc(), TII.get(Opc),
- StackPtr)
- .addReg(StackPtr)
- .addImm(CalleeAmt);
+ if (New) {
// The EFLAGS implicit def is dead.
New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction.
MBB.insert(I, New);
}
+
+ return;
}
- MBB.erase(I);
+ if (Opcode == getCallFrameDestroyOpcode() && CalleeAmt) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back. We do this until we have
+ // more advanced stack pointer tracking ability.
+ unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
+ MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(CalleeAmt);
+
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+ MBB.insert(I, New);
+ }
}
void
@@ -634,6 +571,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned i = 0;
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
while (!MI.getOperand(i).isFI()) {
++i;
@@ -650,7 +588,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else if (AfterFPPop)
BasePtr = StackPtr;
else
- BasePtr = (hasFP(MF) ? FramePtr : StackPtr);
+ BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
// This must be part of a four operand memory reference. Replace the
// FrameIndex with base register with EBP. Add an offset to the offset.
@@ -660,11 +598,10 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FIOffset;
if (AfterFPPop) {
// Tail call jmp happens after FP is popped.
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
const MachineFrameInfo *MFI = MF.getFrameInfo();
- FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea();
+ FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
} else
- FIOffset = getFrameIndexOffset(MF, FrameIndex);
+ FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
if (MI.getOperand(i+3).isImm()) {
// Offset is a 32-bit integer.
@@ -677,710 +614,14 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-void
-X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-
- if (TailCallReturnAddrDelta < 0) {
- // create RETURNADDR area
- // arg
- // arg
- // RETADDR
- // { ...
- // RETADDR area
- // ...
- // }
- // [EBP]
- MFI->CreateFixedObject(-TailCallReturnAddrDelta,
- (-1U*SlotSize)+TailCallReturnAddrDelta, true);
- }
-
- if (hasFP(MF)) {
- assert((TailCallReturnAddrDelta <= 0) &&
- "The Delta should always be zero or negative");
- const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
-
- // Create a frame entry for the EBP register that must be saved.
- int FrameIdx = MFI->CreateFixedObject(SlotSize,
- -(int)SlotSize +
- TFI.getOffsetOfLocalArea() +
- TailCallReturnAddrDelta,
- true);
- assert(FrameIdx == MFI->getObjectIndexBegin() &&
- "Slot for EBP register must be last in order to be found!");
- FrameIdx = 0;
- }
-}
-
-/// emitSPUpdate - Emit a series of instructions to increment / decrement the
-/// stack pointer by a constant value.
-static
-void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, int64_t NumBytes, bool Is64Bit,
- const TargetInstrInfo &TII) {
- bool isSub = NumBytes < 0;
- uint64_t Offset = isSub ? -NumBytes : NumBytes;
- unsigned Opc = isSub ?
- getSUBriOpcode(Is64Bit, Offset) :
- getADDriOpcode(Is64Bit, Offset);
- uint64_t Chunk = (1LL << 31) - 1;
- DebugLoc DL = MBB.findDebugLoc(MBBI);
-
- while (Offset) {
- uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addImm(ThisVal);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
- Offset -= ThisVal;
- }
-}
-
-/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
-static
-void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, uint64_t *NumBytes = NULL) {
- if (MBBI == MBB.begin()) return;
-
- MachineBasicBlock::iterator PI = prior(MBBI);
- unsigned Opc = PI->getOpcode();
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
- PI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes += PI->getOperand(2).getImm();
- MBB.erase(PI);
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
- PI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes -= PI->getOperand(2).getImm();
- MBB.erase(PI);
- }
-}
-
-/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
-static
-void mergeSPUpdatesDown(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, uint64_t *NumBytes = NULL) {
- // FIXME: THIS ISN'T RUN!!!
- return;
-
- if (MBBI == MBB.end()) return;
-
- MachineBasicBlock::iterator NI = llvm::next(MBBI);
- if (NI == MBB.end()) return;
-
- unsigned Opc = NI->getOpcode();
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
- NI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes -= NI->getOperand(2).getImm();
- MBB.erase(NI);
- MBBI = NI;
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
- NI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes += NI->getOperand(2).getImm();
- MBB.erase(NI);
- MBBI = NI;
- }
-}
-
-/// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
-/// stack adjustment is returned as a positive value for ADD and a negative for
-/// SUB.
-static int mergeSPUpdates(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr,
- bool doMergeWithPrevious) {
- if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
- (!doMergeWithPrevious && MBBI == MBB.end()))
- return 0;
-
- MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
- MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
- unsigned Opc = PI->getOpcode();
- int Offset = 0;
-
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
- PI->getOperand(0).getReg() == StackPtr){
- Offset += PI->getOperand(2).getImm();
- MBB.erase(PI);
- if (!doMergeWithPrevious) MBBI = NI;
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
- PI->getOperand(0).getReg() == StackPtr) {
- Offset -= PI->getOperand(2).getImm();
- MBB.erase(PI);
- if (!doMergeWithPrevious) MBBI = NI;
- }
-
- return Offset;
-}
-
-void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
- MCSymbol *Label,
- unsigned FramePtr) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineModuleInfo &MMI = MF.getMMI();
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- if (CSI.empty()) return;
-
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
- const TargetData *TD = MF.getTarget().getTargetData();
- bool HasFP = hasFP(MF);
-
- // Calculate amount of bytes used for return address storing.
- int stackGrowth =
- (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
- TargetFrameInfo::StackGrowsUp ?
- TD->getPointerSize() : -TD->getPointerSize());
-
- // FIXME: This is dirty hack. The code itself is pretty mess right now.
- // It should be rewritten from scratch and generalized sometimes.
-
- // Determine maximum offset (minumum due to stack growth).
- int64_t MaxOffset = 0;
- for (std::vector<CalleeSavedInfo>::const_iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I)
- MaxOffset = std::min(MaxOffset,
- MFI->getObjectOffset(I->getFrameIdx()));
-
- // Calculate offsets.
- int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
- for (std::vector<CalleeSavedInfo>::const_iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I) {
- int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
- unsigned Reg = I->getReg();
- Offset = MaxOffset - Offset + saveAreaOffset;
-
- // Don't output a new machine move if we're re-saving the frame
- // pointer. This happens when the PrologEpilogInserter has inserted an extra
- // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
- // generates one when frame pointers are used. If we generate a "machine
- // move" for this extra "PUSH", the linker will lose track of the fact that
- // the frame pointer should have the value of the first "PUSH" when it's
- // trying to unwind.
- //
- // FIXME: This looks inelegant. It's possibly correct, but it's covering up
- // another bug. I.e., one where we generate a prolog like this:
- //
- // pushl %ebp
- // movl %esp, %ebp
- // pushl %ebp
- // pushl %esi
- // ...
- //
- // The immediate re-push of EBP is unnecessary. At the least, it's an
- // optimization bug. EBP can be used as a scratch register in certain
- // cases, but probably not when we have a frame pointer.
- if (HasFP && FramePtr == Reg)
- continue;
-
- MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
- MachineLocation CSSrc(Reg);
- Moves.push_back(MachineMove(Label, CSDst, CSSrc));
- }
-}
-
-/// emitPrologue - Push callee-saved registers onto the stack, which
-/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
-/// space for local variables. Also emit labels used by the exception handler to
-/// generate the exception handling frames.
-void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const Function *Fn = MF.getFunction();
- const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
- MachineModuleInfo &MMI = MF.getMMI();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- bool needsFrameMoves = MMI.hasDebugInfo() ||
- !Fn->doesNotThrow() || UnwindTablesMandatory;
- uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
- uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
- bool HasFP = hasFP(MF);
- DebugLoc DL;
-
- // If we're forcing a stack realignment we can't rely on just the frame
- // info, we need to know the ABI stack alignment as well in case we
- // have a call out. Otherwise just make sure we have some alignment - we'll
- // go with the minimum SlotSize.
- if (ForceStackAlign) {
- if (MFI->hasCalls())
- MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
- else if (MaxAlign < SlotSize)
- MaxAlign = SlotSize;
- }
-
- // Add RETADDR move area to callee saved frame size.
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta < 0)
- X86FI->setCalleeSavedFrameSize(
- X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
-
- // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
- // function, and use up to 128 bytes of stack space, don't have a frame
- // pointer, calls, or dynamic alloca then we do not need to adjust the
- // stack pointer (we fit in the Red Zone).
- if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
- !needsStackRealignment(MF) &&
- !MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->adjustsStack() && // No calls.
- !Subtarget->isTargetWin64()) { // Win64 has no Red Zone
- uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
- if (HasFP) MinSize += SlotSize;
- StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
- MFI->setStackSize(StackSize);
- } else if (Subtarget->isTargetWin64()) {
- // We need to always allocate 32 bytes as register spill area.
- // FIXME: We might reuse these 32 bytes for leaf functions.
- StackSize += 32;
- MFI->setStackSize(StackSize);
- }
-
- // Insert stack pointer adjustment for later moving of return addr. Only
- // applies to tail call optimized functions where the callee argument stack
- // size is bigger than the callers.
- if (TailCallReturnAddrDelta < 0) {
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL,
- TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(-TailCallReturnAddrDelta);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
- }
-
- // Mapping for machine moves:
- //
- // DST: VirtualFP AND
- // SRC: VirtualFP => DW_CFA_def_cfa_offset
- // ELSE => DW_CFA_def_cfa
- //
- // SRC: VirtualFP AND
- // DST: Register => DW_CFA_def_cfa_register
- //
- // ELSE
- // OFFSET < 0 => DW_CFA_offset_extended_sf
- // REG < 64 => DW_CFA_offset + Reg
- // ELSE => DW_CFA_offset_extended
-
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
- const TargetData *TD = MF.getTarget().getTargetData();
- uint64_t NumBytes = 0;
- int stackGrowth = -TD->getPointerSize();
-
- if (HasFP) {
- // Calculate required stack adjustment.
- uint64_t FrameSize = StackSize - SlotSize;
- if (needsStackRealignment(MF))
- FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
-
- NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
-
- // Get the offset of the stack slot for the EBP register, which is
- // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
- // Update the frame offset adjustment.
- MFI->setOffsetAdjustment(-NumBytes);
-
- // Save EBP/RBP into the appropriate stack slot.
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
- .addReg(FramePtr, RegState::Kill);
-
- if (needsFrameMoves) {
- // Mark the place where EBP/RBP was saved.
- MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
-
- // Define the current CFA rule to use the provided offset.
- if (StackSize) {
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
- Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
- } else {
- // FIXME: Verify & implement for FP
- MachineLocation SPDst(StackPtr);
- MachineLocation SPSrc(StackPtr, stackGrowth);
- Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
- }
-
- // Change the rule for the FramePtr to be an "offset" rule.
- MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
- MachineLocation FPSrc(FramePtr);
- Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
- }
-
- // Update EBP with the new base value...
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
- .addReg(StackPtr);
-
- if (needsFrameMoves) {
- // Mark effective beginning of when frame pointer becomes valid.
- MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
-
- // Define the current CFA to use the EBP/RBP register.
- MachineLocation FPDst(FramePtr);
- MachineLocation FPSrc(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
- }
-
- // Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
- I != E; ++I)
- I->addLiveIn(FramePtr);
-
- // Realign stack
- if (needsStackRealignment(MF)) {
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
- StackPtr).addReg(StackPtr).addImm(-MaxAlign);
-
- // The EFLAGS implicit def is dead.
- MI->getOperand(3).setIsDead();
- }
- } else {
- NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
- }
-
- // Skip the callee-saved push instructions.
- bool PushedRegs = false;
- int StackOffset = 2 * stackGrowth;
-
- while (MBBI != MBB.end() &&
- (MBBI->getOpcode() == X86::PUSH32r ||
- MBBI->getOpcode() == X86::PUSH64r)) {
- PushedRegs = true;
- ++MBBI;
-
- if (!HasFP && needsFrameMoves) {
- // Mark callee-saved push instruction.
- MCSymbol *Label = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
-
- // Define the current CFA rule to use the provided offset.
- unsigned Ptr = StackSize ?
- MachineLocation::VirtualFP : StackPtr;
- MachineLocation SPDst(Ptr);
- MachineLocation SPSrc(Ptr, StackOffset);
- Moves.push_back(MachineMove(Label, SPDst, SPSrc));
- StackOffset += stackGrowth;
- }
- }
-
- DL = MBB.findDebugLoc(MBBI);
-
- // Adjust stack pointer: ESP -= numbytes.
-
- // Windows and cygwin/mingw require a prologue helper routine when allocating
- // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
- // uses __alloca. __alloca and the 32-bit version of __chkstk will probe
- // the stack and adjust the stack pointer in one go. The 64-bit version
- // of __chkstk is only responsible for probing the stack. The 64-bit
- // prologue is responsible for adjusting the stack pointer. Touching the
- // stack at 4K increments is necessary to ensure that the guard pages used
- // by the OS virtual memory manager are allocated in correct sequence.
- if (NumBytes >= 4096 &&
- (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) {
- // Check, whether EAX is livein for this function.
- bool isEAXAlive = false;
- for (MachineRegisterInfo::livein_iterator
- II = MF.getRegInfo().livein_begin(),
- EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
- unsigned Reg = II->first;
- isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
- Reg == X86::AH || Reg == X86::AL);
- }
-
-
- const char *StackProbeSymbol =
- Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
- if (!isEAXAlive) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes);
- BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol(StackProbeSymbol)
- .addReg(StackPtr, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
- } else {
- // Save EAX
- BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
- .addReg(X86::EAX, RegState::Kill);
-
- // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
- // allocated bytes for EAX.
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes - 4);
- BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol(StackProbeSymbol)
- .addReg(StackPtr, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-
- // Restore EAX
- MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
- X86::EAX),
- StackPtr, false, NumBytes - 4);
- MBB.insert(MBBI, MI);
- }
- } else if (NumBytes) {
- // If there is an SUB32ri of ESP immediately before this instruction, merge
- // the two. This can be the case when tail call elimination is enabled and
- // the callee has more arguments then the caller.
- NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
-
- // If there is an ADD32ri or SUB32ri of ESP immediately after this
- // instruction, merge the two instructions.
- mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
-
- if (NumBytes)
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
- }
-
- if ((NumBytes || PushedRegs) && needsFrameMoves) {
- // Mark end of stack pointer adjustment.
- MCSymbol *Label = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
-
- if (!HasFP && NumBytes) {
- // Define the current CFA rule to use the provided offset.
- if (StackSize) {
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP,
- -StackSize + stackGrowth);
- Moves.push_back(MachineMove(Label, SPDst, SPSrc));
- } else {
- // FIXME: Verify & implement for FP
- MachineLocation SPDst(StackPtr);
- MachineLocation SPSrc(StackPtr, stackGrowth);
- Moves.push_back(MachineMove(Label, SPDst, SPSrc));
- }
- }
-
- // Emit DWARF info specifying the offsets of the callee-saved registers.
- if (PushedRegs)
- emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
- }
-}
-
-void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- unsigned RetOpcode = MBBI->getOpcode();
- DebugLoc DL = MBBI->getDebugLoc();
-
- switch (RetOpcode) {
- default:
- llvm_unreachable("Can only insert epilog into returning blocks");
- case X86::RET:
- case X86::RETI:
- case X86::TCRETURNdi:
- case X86::TCRETURNri:
- case X86::TCRETURNmi:
- case X86::TCRETURNdi64:
- case X86::TCRETURNri64:
- case X86::TCRETURNmi64:
- case X86::EH_RETURN:
- case X86::EH_RETURN64:
- break; // These are ok
- }
-
- // Get the number of bytes to allocate from the FrameInfo.
- uint64_t StackSize = MFI->getStackSize();
- uint64_t MaxAlign = MFI->getMaxAlignment();
- unsigned CSSize = X86FI->getCalleeSavedFrameSize();
- uint64_t NumBytes = 0;
-
- // If we're forcing a stack realignment we can't rely on just the frame
- // info, we need to know the ABI stack alignment as well in case we
- // have a call out. Otherwise just make sure we have some alignment - we'll
- // go with the minimum.
- if (ForceStackAlign) {
- if (MFI->hasCalls())
- MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
- else
- MaxAlign = MaxAlign ? MaxAlign : 4;
- }
-
- if (hasFP(MF)) {
- // Calculate required stack adjustment.
- uint64_t FrameSize = StackSize - SlotSize;
- if (needsStackRealignment(MF))
- FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
-
- NumBytes = FrameSize - CSSize;
-
- // Pop EBP.
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
- } else {
- NumBytes = StackSize - CSSize;
- }
-
- // Skip the callee-saved pop instructions.
- MachineBasicBlock::iterator LastCSPop = MBBI;
- while (MBBI != MBB.begin()) {
- MachineBasicBlock::iterator PI = prior(MBBI);
- unsigned Opc = PI->getOpcode();
-
- if (Opc != X86::POP32r && Opc != X86::POP64r &&
- !PI->getDesc().isTerminator())
- break;
-
- --MBBI;
- }
-
- DL = MBBI->getDebugLoc();
-
- // If there is an ADD32ri or SUB32ri of ESP immediately before this
- // instruction, merge the two instructions.
- if (NumBytes || MFI->hasVarSizedObjects())
- mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
-
- // If dynamic alloca is used, then reset esp to point to the last callee-saved
- // slot before popping them off! Same applies for the case, when stack was
- // realigned.
- if (needsStackRealignment(MF)) {
- // We cannot use LEA here, because stack pointer was realigned. We need to
- // deallocate local frame back.
- if (CSSize) {
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
- MBBI = prior(LastCSPop);
- }
-
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
- StackPtr).addReg(FramePtr);
- } else if (MFI->hasVarSizedObjects()) {
- if (CSSize) {
- unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
- MachineInstr *MI =
- addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
- FramePtr, false, -CSSize);
- MBB.insert(MBBI, MI);
- } else {
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
- .addReg(FramePtr);
- }
- } else if (NumBytes) {
- // Adjust stack pointer back: ESP += numbytes.
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
- }
-
- // We're returning from function via eh_return.
- if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
- MBBI = prior(MBB.end());
- MachineOperand &DestAddr = MBBI->getOperand(0);
- assert(DestAddr.isReg() && "Offset should be in register!");
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
- StackPtr).addReg(DestAddr.getReg());
- } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
- RetOpcode == X86::TCRETURNmi ||
- RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
- RetOpcode == X86::TCRETURNmi64) {
- bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
- // Tail call return: adjust the stack pointer and jump to callee.
- MBBI = prior(MBB.end());
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
- assert(StackAdjust.isImm() && "Expecting immediate value.");
-
- // Adjust stack pointer.
- int StackAdj = StackAdjust.getImm();
- int MaxTCDelta = X86FI->getTCReturnAddrDelta();
- int Offset = 0;
- assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
-
- // Incoporate the retaddr area.
- Offset = StackAdj-MaxTCDelta;
- assert(Offset >= 0 && "Offset should never be negative");
-
- if (Offset) {
- // Check for possible merge with preceeding ADD instruction.
- Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
- }
-
- // Jump to label or value in register.
- if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
- BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
- ? X86::TAILJMPd : X86::TAILJMPd64)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
- ? X86::TAILJMPm : X86::TAILJMPm64));
- for (unsigned i = 0; i != 5; ++i)
- MIB.addOperand(MBBI->getOperand(i));
- } else if (RetOpcode == X86::TCRETURNri64) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- } else {
- BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- }
-
- MachineInstr *NewMI = prior(MBBI);
- for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
- NewMI->addOperand(MBBI->getOperand(i));
-
- // Delete the pseudo instruction TCRETURN.
- MBB.erase(MBBI);
- } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
- (X86FI->getTCReturnAddrDelta() < 0)) {
- // Add the return addr area delta back since we are not tail calling.
- int delta = -1*X86FI->getTCReturnAddrDelta();
- MBBI = prior(MBB.end());
-
- // Check for possible merge with preceeding ADD instruction.
- delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
- emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
- }
-}
-
unsigned X86RegisterInfo::getRARegister() const {
return Is64Bit ? X86::RIP // Should have dwarf #16.
: X86::EIP; // Should have dwarf #8.
}
unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return hasFP(MF) ? FramePtr : StackPtr;
-}
-
-void
-X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
- // Calculate amount of bytes used for return address storing
- int stackGrowth = (Is64Bit ? -8 : -4);
-
- // Initial state of the frame pointer is esp+stackGrowth.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(StackPtr, stackGrowth);
- Moves.push_back(MachineMove(0, Dst, Src));
-
- // Add return address to move list
- MachineLocation CSDst(StackPtr, stackGrowth);
- MachineLocation CSSrc(getRARegister());
- Moves.push_back(MachineMove(0, CSDst, CSSrc));
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
unsigned X86RegisterInfo::getEHExceptionRegister() const {
@@ -1579,13 +820,13 @@ namespace {
// Be over-conservative: scan over all vreg defs and find whether vector
// registers are used. If yes, there is a possibility that vector register
// will be spilled and thus require dynamic stack realignment.
- for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
- RegNum < RI.getLastVirtReg(); ++RegNum)
- if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) {
+ for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
FuncInfo->setReserveFP(true);
return true;
}
-
+ }
// Nothing to do
return false;
}
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
index 527df05..064be64 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -111,14 +111,10 @@ public:
/// register scavenger to determine what registers are free.
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasFP(const MachineFunction &MF) const;
-
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
- bool hasReservedCallFrame(const MachineFunction &MF) const;
-
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
@@ -129,19 +125,12 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, RegScavenger *RS = NULL) const;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
-
- void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
- unsigned FramePtr) const;
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+ unsigned getStackRegister() const { return StackPtr; }
+ // FIXME: Move to FrameInfok
+ unsigned getSlotSize() const { return SlotSize; }
// Exception handling queries.
unsigned getEHExceptionRegister() const;
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
index 95269b1..612fac2 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -1,10 +1,10 @@
//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 Register file, defining the registers themselves,
@@ -34,8 +34,8 @@ let Namespace = "X86" in {
// because the register file generator is smart enough to figure out that
// AL aliases AX if we tell it that AX aliased AL (for example).
- // Dwarf numbering is different for 32-bit and 64-bit, and there are
- // variations by target as well. Currently the first entry is for X86-64,
+ // Dwarf numbering is different for 32-bit and 64-bit, and there are
+ // variations by target as well. Currently the first entry is for X86-64,
// second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
// and debug information on X86-32/Darwin)
@@ -81,7 +81,7 @@ let Namespace = "X86" in {
def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
}
def IP : Register<"ip">, DwarfRegNum<[16]>;
-
+
// X86-64 only
let SubRegIndices = [sub_8bit] in {
def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
@@ -103,8 +103,8 @@ let Namespace = "X86" in {
def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[5, 7, 7]>;
def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[6, 4, 5]>;
def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>;
- def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
-
+ def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
+
// X86-64 only
def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>;
def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>;
@@ -208,7 +208,7 @@ let Namespace = "X86" in {
def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>;
def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>;
def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>;
- def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>;
+ def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>;
// Status flags register
def EFLAGS : Register<"flags">;
@@ -220,7 +220,7 @@ let Namespace = "X86" in {
def ES : Register<"es">;
def FS : Register<"fs">;
def GS : Register<"gs">;
-
+
// Debug registers
def DR0 : Register<"dr0">;
def DR1 : Register<"dr1">;
@@ -230,8 +230,8 @@ let Namespace = "X86" in {
def DR5 : Register<"dr5">;
def DR6 : Register<"dr6">;
def DR7 : Register<"dr7">;
-
- // Condition registers
+
+ // Control registers
def CR0 : Register<"cr0">;
def CR1 : Register<"cr1">;
def CR2 : Register<"cr2">;
@@ -241,6 +241,13 @@ let Namespace = "X86" in {
def CR6 : Register<"cr6">;
def CR7 : Register<"cr7">;
def CR8 : Register<"cr8">;
+ def CR9 : Register<"cr9">;
+ def CR10 : Register<"cr10">;
+ def CR11 : Register<"cr11">;
+ def CR12 : Register<"cr12">;
+ def CR13 : Register<"cr13">;
+ def CR14 : Register<"cr14">;
+ def CR15 : Register<"cr15">;
// Pseudo index registers
def EIZ : Register<"eiz">;
@@ -254,10 +261,10 @@ let Namespace = "X86" in {
// implicitly defined to be the register allocation order.
//
-// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
// R12, R13, R14, and R15 for X86-64) are callee-save registers.
// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
-// R8B, ... R15B.
+// R8B, ... R15B.
// Allocate R12 and R13 last, as these require an extra byte when
// encoded in x86_64 instructions.
// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
@@ -292,14 +299,14 @@ def GR8 : RegisterClass<"X86", [i8], 8,
GR8Class::iterator
GR8Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP / EBP to being a frame ptr?
if (!Subtarget.is64Bit())
// In 32-mode, none of the 8-bit registers aliases EBP or ESP.
return begin() + 8;
- else if (RI->hasFP(MF) || MFI->getReserveFP())
+ else if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate SPL or BPL.
return array_endof(X86_GR8_AO_64) - 1;
else
@@ -337,12 +344,12 @@ def GR16 : RegisterClass<"X86", [i16], 16,
GR16Class::iterator
GR16Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (Subtarget.is64Bit()) {
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate SP or BP.
return array_endof(X86_GR16_AO_64) - 1;
else
@@ -350,7 +357,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
return array_endof(X86_GR16_AO_64);
} else {
// Does the function dedicate EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate SP or BP.
return begin() + 6;
else
@@ -389,12 +396,12 @@ def GR32 : RegisterClass<"X86", [i32], 32,
GR32Class::iterator
GR32Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (Subtarget.is64Bit()) {
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate ESP or EBP.
return array_endof(X86_GR32_AO_64) - 1;
else
@@ -402,7 +409,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
return array_endof(X86_GR32_AO_64);
} else {
// Does the function dedicate EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate ESP or EBP.
return begin() + 6;
else
@@ -429,13 +436,13 @@ def GR64 : RegisterClass<"X86", [i64], 64,
GR64Class::iterator
GR64Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (!Subtarget.is64Bit())
return begin(); // None of these are allocatable in 32-bit.
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
return end()-3; // If so, don't allocate RIP, RSP or RBP
else
return end()-2; // If not, just don't allocate RIP or RSP
@@ -446,18 +453,16 @@ def GR64 : RegisterClass<"X86", [i64], 64,
// Segment registers for use by MOV instructions (and others) that have a
// segment register as one operand. Always contain a 16-bit segment
// descriptor.
-def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> {
-}
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]>;
// Debug registers.
def DEBUG_REG : RegisterClass<"X86", [i32], 32,
- [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> {
-}
+ [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]>;
// Control registers.
def CONTROL_REG : RegisterClass<"X86", [i64], 64,
- [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> {
-}
+ [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8,
+ CR9, CR10, CR11, CR12, CR13, CR14, CR15]>;
// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
@@ -465,10 +470,8 @@ def CONTROL_REG : RegisterClass<"X86", [i64], 64,
// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
// and GR64_ABCD are classes for registers that support 8-bit h-register
// operations.
-def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> {
-}
-def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> {
-}
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]>;
def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
}
@@ -493,6 +496,9 @@ def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
(GR32_TC sub_32bit)];
}
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX,
+ R8, R9, R11]>;
+
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
[AL, CL, DL, AH, CH, DH, BL, BH]> {
@@ -538,10 +544,10 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16,
GR16_NOREXClass::iterator
GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP / EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate SP or BP.
return end() - 2;
else
@@ -562,10 +568,10 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
GR32_NOREXClass::iterator
GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP / EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate ESP or EBP.
return end() - 2;
else
@@ -587,10 +593,10 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
GR64_NOREXClass::iterator
GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate RIP, RSP or RBP.
return end() - 3;
else
@@ -629,12 +635,12 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
GR32_NOSPClass::iterator
GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (Subtarget.is64Bit()) {
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate EBP.
return array_endof(X86_GR32_NOSP_AO_64) - 1;
else
@@ -642,7 +648,7 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
return array_endof(X86_GR32_NOSP_AO_64);
} else {
// Does the function dedicate EBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate EBP.
return begin() + 6;
else
@@ -667,13 +673,13 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
GR64_NOSPClass::iterator
GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
if (!Subtarget.is64Bit())
return begin(); // None of these are allocatable in 32-bit.
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
return end()-1; // If so, don't allocate RBP
else
return end(); // If not, any reg in this class is ok.
@@ -695,10 +701,10 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const
{
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
// Does the function dedicate RBP to being a frame ptr?
- if (RI->hasFP(MF) || MFI->getReserveFP())
+ if (TFI->hasFP(MF) || MFI->getReserveFP())
// If so, don't allocate RBP.
return end() - 1;
else
@@ -784,7 +790,7 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32,
}
// Generic vector registers: VR64 and VR128.
-def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64,
+def VR64: RegisterClass<"X86", [x86mmx], 64,
[MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index 6297a27..42e8193 100644
--- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -32,10 +32,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile,
- const Value *DstSV,
- uint64_t DstSVOff) const {
+ MachinePointerInfo DstPtrInfo) const {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256)
+ return SDValue();
+
// If not DWORD aligned or size is more than the threshold, call the library.
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
@@ -133,7 +136,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
Dst, InFlag);
InFlag = Chain.getValue(1);
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
@@ -147,7 +150,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
X86::ECX,
Left, InFlag);
InFlag = Chain.getValue(1);
- Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
} else if (BytesLeft) {
@@ -161,7 +164,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
DAG.getConstant(Offset, AddrVT)),
Src,
DAG.getConstant(BytesLeft, SizeVT),
- Align, isVolatile, DstSV, DstSVOff + Offset);
+ Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
}
// TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
@@ -173,10 +176,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
// This requires the copy size to be a constant, preferrably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -186,14 +187,29 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
return SDValue();
- /// If not DWORD aligned, call the library.
- if ((Align & 3) != 0)
+ /// If not DWORD aligned, it is more efficient to call the library. However
+ /// if calling the library is not allowed (AlwaysInline), then soldier on as
+ /// the code generated here is better than the long load-store sequence we
+ /// would otherwise get.
+ if (!AlwaysInline && (Align & 3) != 0)
+ return SDValue();
+
+ // If to a segment-relative address space, use the default lowering.
+ if (DstPtrInfo.getAddrSpace() >= 256 ||
+ SrcPtrInfo.getAddrSpace() >= 256)
return SDValue();
- // DWORD aligned
- EVT AVT = MVT::i32;
- if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
- AVT = MVT::i64;
+ MVT AVT;
+ if (Align & 1)
+ AVT = MVT::i8;
+ else if (Align & 2)
+ AVT = MVT::i16;
+ else if (Align & 4)
+ // DWORD aligned
+ AVT = MVT::i32;
+ else
+ // QWORD aligned
+ AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
unsigned UBytes = AVT.getSizeInBits() / 8;
unsigned CountVal = SizeVal / UBytes;
@@ -214,7 +230,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
Src, InFlag);
InFlag = Chain.getValue(1);
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
array_lengthof(Ops));
@@ -234,8 +250,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
DAG.getConstant(Offset, SrcVT)),
DAG.getConstant(BytesLeft, SizeVT),
Align, isVolatile, AlwaysInline,
- DstSV, DstSVOff + Offset,
- SrcSV, SrcSVOff + Offset));
+ DstPtrInfo.getWithOffset(Offset),
+ SrcPtrInfo.getWithOffset(Offset)));
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
index 4f30f31..d1d66fe 100644
--- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h
@@ -39,8 +39,7 @@ public:
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile,
- const Value *DstSV,
- uint64_t DstSVOff) const;
+ MachinePointerInfo DstPtrInfo) const;
virtual
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
@@ -48,10 +47,8 @@ public:
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const;
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
};
}
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index 0d02e5e..de76856 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -1,4 +1,4 @@
-//===-- X86Subtarget.cpp - X86 Subtarget Information ------------*- C++ -*-===//
+//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,7 +18,7 @@
#include "llvm/GlobalValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Host.h"
+#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallVector.h"
@@ -256,13 +256,14 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if ((ECX >> 9) & 1) X86SSELevel = SSSE3;
if ((ECX >> 19) & 1) X86SSELevel = SSE41;
if ((ECX >> 20) & 1) X86SSELevel = SSE42;
+ // FIXME: AVX codegen support is not ready.
+ //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; }
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
- HasAVX = ((ECX >> 28) & 0x1);
HasAES = IsIntel && ((ECX >> 25) & 0x1);
if (IsIntel || IsAMD) {
@@ -289,6 +290,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, X863DNowLevel(NoThreeDNow)
, HasCMov(false)
, HasX86_64(false)
+ , HasPOPCNT(false)
, HasSSE4A(false)
, HasAVX(false)
, HasAES(false)
@@ -315,11 +317,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
ParseSubtargetFeatures(FS, CPU);
// All X86-64 CPUs also have SSE2, however user might request no SSE via
// -mattr, so don't force SSELevel here.
+ if (HasAVX)
+ X86SSELevel = NoMMXSSE;
} else {
// Otherwise, use CPUID to auto-detect feature set.
AutoDetectSubtargetFeatures();
// Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
- if (Is64Bit && X86SSELevel < SSE2)
+ if (Is64Bit && !HasAVX && X86SSELevel < SSE2)
X86SSELevel = SSE2;
}
@@ -338,9 +342,9 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
assert((!Is64Bit || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64
- // bit targets.
- if (isTargetDarwin() || Is64Bit)
+ // Stack alignment is 16 bytes on Darwin and Linux (both 32 and 64 bit) and
+ // for all 64-bit targets.
+ if (isTargetDarwin() || isTargetLinux() || Is64Bit)
stackAlignment = 16;
if (StackAlignment)
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index 0ee91ab..8a119b4 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -65,6 +65,9 @@ protected:
///
bool HasX86_64;
+ /// HasPOPCNT - True if the processor supports POPCNT.
+ bool HasPOPCNT;
+
/// HasSSE4A - True if the processor supports SSE4A instructions.
bool HasSSE4A;
@@ -100,7 +103,7 @@ protected:
/// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
///
unsigned MaxInlineSizeThreshold;
-
+
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
@@ -150,7 +153,10 @@ public:
bool hasSSE4A() const { return HasSSE4A; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+ bool hasPOPCNT() const { return HasPOPCNT; }
bool hasAVX() const { return HasAVX; }
+ bool hasXMM() const { return hasSSE1() || hasAVX(); }
+ bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
bool hasAES() const { return HasAES; }
bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; }
@@ -160,23 +166,21 @@ public:
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
-
+
// ELF is a reasonably sane default and the only other X86 targets we
// support are Darwin and Windows. Just use "not those".
- bool isTargetELF() const {
+ bool isTargetELF() const {
return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
}
bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
- bool isTargetMingw() const {
- return TargetTriple.getOS() == Triple::MinGW32 ||
- TargetTriple.getOS() == Triple::MinGW64; }
+ bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
bool isTargetCygMing() const {
return isTargetMingw() || isTargetCygwin();
}
-
+
/// isTargetCOFF - Return true if this is any COFF/Windows target variant.
bool isTargetCOFF() const {
return isTargetMingw() || isTargetCygwin() || isTargetWindows();
@@ -186,22 +190,12 @@ public:
return Is64Bit && (isTargetMingw() || isTargetWindows());
}
- bool isTargetWin32() const {
- return !Is64Bit && (isTargetMingw() || isTargetWindows());
+ bool isTargetEnvMacho() const {
+ return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO);
}
- std::string getDataLayout() const {
- const char *p;
- if (is64Bit())
- p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64";
- else if (isTargetDarwin())
- p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
- else if (isTargetMingw() || isTargetWindows())
- p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32";
- else
- p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
-
- return std::string(p);
+ bool isTargetWin32() const {
+ return !Is64Bit && (isTargetMingw() || isTargetWindows());
}
bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index ce8636eb..889c824 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -30,10 +30,12 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
case Triple::Darwin:
return new X86MCAsmInfoDarwin(TheTriple);
case Triple::MinGW32:
- case Triple::MinGW64:
case Triple::Cygwin:
case Triple::Win32:
- return new X86MCAsmInfoCOFF(TheTriple);
+ if (TheTriple.getEnvironment() == Triple::MachO)
+ return new X86MCAsmInfoDarwin(TheTriple);
+ else
+ return new X86MCAsmInfoCOFF(TheTriple);
default:
return new X86ELFMCAsmInfo(TheTriple);
}
@@ -43,22 +45,25 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
MCContext &Ctx, TargetAsmBackend &TAB,
raw_ostream &_OS,
MCCodeEmitter *_Emitter,
- bool RelaxAll) {
+ bool RelaxAll,
+ bool NoExecStack) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
case Triple::Darwin:
return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
case Triple::MinGW32:
- case Triple::MinGW64:
case Triple::Cygwin:
case Triple::Win32:
- return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+ if (TheTriple.getEnvironment() == Triple::MachO)
+ return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ else
+ return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
default:
- return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
}
}
-extern "C" void LLVMInitializeX86Target() {
+extern "C" void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
@@ -89,28 +94,38 @@ extern "C" void LLVMInitializeX86Target() {
X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
- : X86TargetMachine(T, TT, FS, false) {
+ : X86TargetMachine(T, TT, FS, false),
+ DataLayout(getSubtargetImpl()->isTargetDarwin() ?
+ "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" :
+ (getSubtargetImpl()->isTargetCygMing() ||
+ getSubtargetImpl()->isTargetWindows()) ?
+ "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"),
+ InstrInfo(*this),
+ TSInfo(*this),
+ TLInfo(*this),
+ JITInfo(*this) {
}
X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
- : X86TargetMachine(T, TT, FS, true) {
+ : X86TargetMachine(T, TT, FS, true),
+ DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"),
+ InstrInfo(*this),
+ TSInfo(*this),
+ TLInfo(*this),
+ JITInfo(*this) {
}
/// X86TargetMachine ctor - Create an X86 target.
///
-X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
+X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool is64Bit)
- : LLVMTargetMachine(T, TT),
+ : LLVMTargetMachine(T, TT),
Subtarget(TT, FS, is64Bit),
- DataLayout(Subtarget.getDataLayout()),
- FrameInfo(TargetFrameInfo::StackGrowsDown,
- Subtarget.getStackAlignment(),
- (Subtarget.isTargetWin64() ? -40 :
- (Subtarget.is64Bit() ? -8 : -4))),
- InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
- ELFWriterInfo(*this) {
+ FrameLowering(*this, Subtarget),
+ ELFWriterInfo(is64Bit, true) {
DefRelocModel = getRelocationModel();
// If no relocation model was picked, default as appropriate for the target.
@@ -217,12 +232,12 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
JITCodeEmitter &JCE) {
// FIXME: Move this to TargetJITInfo!
// On Darwin, do not override 64-bit setting made in X86TargetMachine().
- if (DefRelocModel == Reloc::Default &&
+ if (DefRelocModel == Reloc::Default &&
(!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
setRelocationModel(Reloc::Static);
Subtarget.setPICStyle(PICStyles::None);
}
-
+
PM.add(createX86JITCodeEmitterPass(*this, JCE));
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
index f9fb424..5973922 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
@@ -14,16 +14,17 @@
#ifndef X86TARGETMACHINE_H
#define X86TARGETMACHINE_H
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "X86.h"
#include "X86ELFWriterInfo.h"
#include "X86InstrInfo.h"
-#include "X86JITInfo.h"
-#include "X86Subtarget.h"
#include "X86ISelLowering.h"
+#include "X86FrameLowering.h"
+#include "X86JITInfo.h"
#include "X86SelectionDAGInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -31,12 +32,7 @@ class formatted_raw_ostream;
class X86TargetMachine : public LLVMTargetMachine {
X86Subtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
- TargetFrameInfo FrameInfo;
- X86InstrInfo InstrInfo;
- X86JITInfo JITInfo;
- X86TargetLowering TLInfo;
- X86SelectionDAGInfo TSInfo;
+ X86FrameLowering FrameLowering;
X86ELFWriterInfo ELFWriterInfo;
Reloc::Model DefRelocModel; // Reloc model before it's overridden.
@@ -49,20 +45,25 @@ public:
X86TargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool is64Bit);
- virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
- virtual X86JITInfo *getJITInfo() { return &JITInfo; }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ llvm_unreachable("getInstrInfo not implemented");
+ }
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ llvm_unreachable("getJITInfo not implemented");
+ }
virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; }
- virtual const X86TargetLowering *getTargetLowering() const {
- return &TLInfo;
+ virtual const X86TargetLowering *getTargetLowering() const {
+ llvm_unreachable("getTargetLowering not implemented");
}
virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
- return &TSInfo;
+ llvm_unreachable("getSelectionDAGInfo not implemented");
}
virtual const X86RegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
+ return &getInstrInfo()->getRegisterInfo();
}
- virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const X86ELFWriterInfo *getELFWriterInfo() const {
return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
}
@@ -79,17 +80,53 @@ public:
/// X86_32TargetMachine - X86 32-bit target machine.
///
class X86_32TargetMachine : public X86TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ X86InstrInfo InstrInfo;
+ X86SelectionDAGInfo TSInfo;
+ X86TargetLowering TLInfo;
+ X86JITInfo JITInfo;
public:
X86_32TargetMachine(const Target &T, const std::string &M,
const std::string &FS);
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ return &JITInfo;
+ }
};
/// X86_64TargetMachine - X86 64-bit target machine.
///
class X86_64TargetMachine : public X86TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ X86InstrInfo InstrInfo;
+ X86SelectionDAGInfo TSInfo;
+ X86TargetLowering TLInfo;
+ X86JITInfo JITInfo;
public:
X86_64TargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+ virtual const X86InstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual X86JITInfo *getJITInfo() {
+ return &JITInfo;
+ }
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 8f06dd3..8f06dd3 100644
--- a/contrib/llvm/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
diff --git a/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td b/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td
index 8107e32..b20d71f 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td
+++ b/contrib/llvm/lib/Target/XCore/XCoreCallingConv.td
@@ -24,6 +24,9 @@ def CC_XCore : CallingConv<[
// Promote i8/i16 arguments to i32.
CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ // The 'nest' parameter, if any, is passed in R11.
+ CCIfNest<CCAssignToReg<[R11]>>,
+
// The first 4 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameInfo.cpp
deleted file mode 100644
index f50dc96..0000000
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameInfo.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- XCoreFrameInfo.cpp - Frame info for XCore Target ---------*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains XCore frame information that doesn't fit anywhere else
-// cleanly...
-//
-//===----------------------------------------------------------------------===//
-
-#include "XCore.h"
-#include "XCoreFrameInfo.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// XCoreFrameInfo:
-//===----------------------------------------------------------------------===//
-
-XCoreFrameInfo::XCoreFrameInfo(const TargetMachine &tm):
- TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0)
-{
- // Do nothing
-}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameInfo.h b/contrib/llvm/lib/Target/XCore/XCoreFrameInfo.h
deleted file mode 100644
index 2c67577..0000000
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameInfo.h
+++ /dev/null
@@ -1,34 +0,0 @@
-//===-- XCoreFrameInfo.h - Frame info for XCore Target -----------*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains XCore frame information that doesn't fit anywhere else
-// cleanly...
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef XCOREFRAMEINFO_H
-#define XCOREFRAMEINFO_H
-
-#include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
- class XCoreFrameInfo: public TargetFrameInfo {
-
- public:
- XCoreFrameInfo(const TargetMachine &tm);
-
- //! Stack slot size (4 bytes)
- static int stackSlotSize() {
- return 4;
- }
- };
-}
-
-#endif // XCOREFRAMEINFO_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
new file mode 100644
index 0000000..0578220
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -0,0 +1,387 @@
+//===-- XCoreFrameLowering.cpp - Frame info for XCore Target -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreFrameLowering.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+// helper functions. FIXME: Eliminate.
+static inline bool isImmUs(unsigned val) {
+ return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+ return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+ return val < (1 << 16);
+}
+
+static void loadFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int Offset, DebugLoc dl,
+ const TargetInstrInfo &TII) {
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+ Offset/=4;
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset))
+ report_fatal_error("loadFromStack offset too big " + Twine(Offset));
+ int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
+ .addImm(Offset);
+}
+
+
+static void storeToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, int Offset, DebugLoc dl,
+ const TargetInstrInfo &TII) {
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+ Offset/=4;
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset))
+ report_fatal_error("storeToStack offset too big " + Twine(Offset));
+ int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode))
+ .addReg(SrcReg)
+ .addImm(Offset);
+}
+
+
+//===----------------------------------------------------------------------===//
+// XCoreFrameLowering:
+//===----------------------------------------------------------------------===//
+
+XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0),
+ STI(sti) {
+ // Do nothing
+}
+
+bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
+ return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = &MF.getMMI();
+ const XCoreRegisterInfo *RegInfo =
+ static_cast<const XCoreRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ const XCoreInstrInfo &TII =
+ *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ bool FP = hasFP(MF);
+ bool Nested = MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::Nest);
+
+ if (Nested) {
+ loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
+ }
+
+ // Work out frame sizes.
+ int FrameSize = MFI->getStackSize();
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+ FrameSize/=4;
+
+ bool isU6 = isImmU6(FrameSize);
+
+ if (!isU6 && !isImmU16(FrameSize)) {
+ // FIXME could emit multiple instructions.
+ report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
+ }
+ bool emitFrameMoves = RegInfo->needsFrameMoves(MF);
+
+ // Do we need to allocate space on the stack?
+ if (FrameSize) {
+ bool saveLR = XFI->getUsesLR();
+ bool LRSavedOnEntry = false;
+ int Opcode;
+ if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
+ Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
+ MBB.addLiveIn(XCore::LR);
+ saveLR = false;
+ LRSavedOnEntry = true;
+ } else {
+ Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ }
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+
+ if (emitFrameMoves) {
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+ // Show update of SP.
+ MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+ if (LRSavedOnEntry) {
+ MachineLocation CSDst(MachineLocation::VirtualFP, 0);
+ MachineLocation CSSrc(XCore::LR);
+ Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+ }
+ }
+ if (saveLR) {
+ int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+ storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl, TII);
+ MBB.addLiveIn(XCore::LR);
+
+ if (emitFrameMoves) {
+ MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
+ MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
+ MachineLocation CSSrc(XCore::LR);
+ MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
+ }
+ }
+ }
+
+ if (FP) {
+ // Save R10 to the stack.
+ int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+ storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl, TII);
+ // R10 is live-in. It is killed at the spill.
+ MBB.addLiveIn(XCore::R10);
+ if (emitFrameMoves) {
+ MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
+ MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
+ MachineLocation CSSrc(XCore::R10);
+ MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
+ }
+ // Set the FP from the SP.
+ unsigned FramePtr = XCore::R10;
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
+ .addImm(0);
+ if (emitFrameMoves) {
+ // Show FP is now valid.
+ MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+ MachineLocation SPDst(FramePtr);
+ MachineLocation SPSrc(MachineLocation::VirtualFP);
+ MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+ }
+ }
+
+ if (emitFrameMoves) {
+ // Frame moves for callee saved.
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+ std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels =
+ XFI->getSpillLabels();
+ for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
+ MCSymbol *SpillLabel = SpillLabels[I].first;
+ CalleeSavedInfo &CSI = SpillLabels[I].second;
+ int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
+ unsigned Reg = CSI.getReg();
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc));
+ }
+ }
+}
+
+void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const XCoreInstrInfo &TII =
+ *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ bool FP = hasFP(MF);
+ if (FP) {
+ // Restore the stack pointer.
+ unsigned FramePtr = XCore::R10;
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r))
+ .addReg(FramePtr);
+ }
+
+ // Work out frame sizes.
+ int FrameSize = MFI->getStackSize();
+
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+ FrameSize/=4;
+
+ bool isU6 = isImmU6(FrameSize);
+
+ if (!isU6 && !isImmU16(FrameSize)) {
+ // FIXME could emit multiple instructions.
+ report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize));
+ }
+
+ if (FrameSize) {
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+
+ if (FP) {
+ // Restore R10
+ int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+ FPSpillOffset += FrameSize*4;
+ loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl, TII);
+ }
+ bool restoreLR = XFI->getUsesLR();
+ if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) {
+ int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+ LRSpillOffset += FrameSize*4;
+ loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl, TII);
+ restoreLR = false;
+ }
+ if (restoreLR) {
+ // Fold prologue into return instruction
+ assert(MBBI->getOpcode() == XCore::RETSP_u6
+ || MBBI->getOpcode() == XCore::RETSP_lu6);
+ int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6;
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+ MBB.erase(MBBI);
+ } else {
+ int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
+ }
+ }
+}
+
+void XCoreFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Initial state of the frame pointer is SP.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(XCore::SP, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
+ bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+ it != CSI.end(); ++it) {
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(it->getReg());
+
+ unsigned Reg = it->getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true,
+ it->getFrameIdx(), RC, TRI);
+ if (emitFrameMoves) {
+ MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
+ BuildMI(MBB, MI, DL, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLabel);
+ XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
+ }
+ }
+ return true;
+}
+
+bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const{
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ bool AtStart = MI == MBB.begin();
+ MachineBasicBlock::iterator BeforeI = MI;
+ if (!AtStart)
+ --BeforeI;
+ for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+ it != CSI.end(); ++it) {
+ unsigned Reg = it->getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(),
+ RC, TRI);
+ assert(MI != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert multiple
+ // instructions.
+ if (AtStart)
+ MI = MBB.begin();
+ else {
+ MI = BeforeI;
+ ++MI;
+ }
+ }
+ return true;
+}
+
+void
+XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
+ const TargetRegisterClass *RC = XCore::GRRegsRegisterClass;
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ if (LRUsed) {
+ MF.getRegInfo().setPhysRegUnused(XCore::LR);
+
+ bool isVarArg = MF.getFunction()->isVarArg();
+ int FrameIdx;
+ if (! isVarArg) {
+ // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true);
+ } else {
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
+ false);
+ }
+ XFI->setUsesLR(FrameIdx);
+ XFI->setLRSpillSlot(FrameIdx);
+ }
+ if (RegInfo->requiresRegisterScavenging(MF)) {
+ // Reserve a slot close to SP or frame pointer.
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+ if (hasFP(MF)) {
+ // A callee save register is used to hold the FP.
+ // This needs saving / restoring in the epilogue / prologue.
+ XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
+}
+
+void XCoreFrameLowering::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
new file mode 100644
index 0000000..7da19f0
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.h
@@ -0,0 +1,59 @@
+//===-- XCoreFrameLowering.h - Frame info for XCore Target -------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREFRAMEINFO_H
+#define XCOREFRAMEINFO_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class XCoreSubtarget;
+
+ class XCoreFrameLowering: public TargetFrameLowering {
+ const XCoreSubtarget &STI;
+ public:
+ XCoreFrameLowering(const XCoreSubtarget &STI);
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ //! Stack slot size (4 bytes)
+ static int stackSlotSize() {
+ return 4;
+ }
+ };
+}
+
+#endif // XCOREFRAMEINFO_H
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 755ece7..fc8a07a 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -68,12 +68,9 @@ namespace {
}
// Complex Pattern Selectors.
- bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base,
- SDValue &Offset);
- bool SelectADDRdpii(SDNode *Op, SDValue Addr, SDValue &Base,
- SDValue &Offset);
- bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base,
- SDValue &Offset);
+ bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual const char *getPassName() const {
return "XCore DAG->DAG Pattern Instruction Selection";
@@ -91,8 +88,8 @@ FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
return new XCoreDAGToDAGISel(TM);
}
-bool XCoreDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
+bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
FrameIndexSDNode *FIN = 0;
if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -113,8 +110,8 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr,
return false;
}
-bool XCoreDAGToDAGISel::SelectADDRdpii(SDNode *Op, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
+bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
Base = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(0, MVT::i32);
@@ -134,8 +131,8 @@ bool XCoreDAGToDAGISel::SelectADDRdpii(SDNode *Op, SDValue Addr,
return false;
}
-bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
+bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
Base = Addr.getOperand(0);
Offset = CurDAG->getTargetConstant(0, MVT::i32);
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index abe7b2f..828d6f9 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -148,9 +148,13 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
-
- maxStoresPerMemset = 4;
- maxStoresPerMemmove = maxStoresPerMemcpy = 2;
+
+ // TRAMPOLINE is custom lowered.
+ setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+ maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
+ maxStoresPerMemmove = maxStoresPerMemmoveOptSize
+ = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2;
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::STORE);
@@ -177,6 +181,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADD:
case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
return SDValue();
@@ -392,24 +397,23 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
}
SDValue XCoreTargetLowering::
-LowerLOAD(SDValue Op, SelectionDAG &DAG) const
-{
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LoadSDNode *LD = cast<LoadSDNode>(Op);
assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
"Unexpected extension type");
assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
- if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
return SDValue();
- }
+
unsigned ABIAlignment = getTargetData()->
getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
// Leave aligned load alone.
- if (LD->getAlignment() >= ABIAlignment) {
+ if (LD->getAlignment() >= ABIAlignment)
return SDValue();
- }
+
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
SDValue Base;
int64_t Offset;
@@ -419,10 +423,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const
// We've managed to infer better alignment information than the load
// already has. Use an aligned load.
//
- // FIXME: No new alignment information is actually passed here.
- // Should the offset really be 4?
- //
- return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4,
+ return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
+ MachinePointerInfo(),
false, false, 0);
}
// Lower to
@@ -436,40 +438,40 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
- SDValue LowAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, LowOffset);
- SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset);
+ SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
- SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain,
- LowAddr, NULL, 4, false, false, 0);
- SDValue High = DAG.getLoad(getPointerTy(), dl, Chain,
- HighAddr, NULL, 4, false, false, 0);
- SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift);
- SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift);
- SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
+ SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
+ LowAddr, MachinePointerInfo(), false, false, 0);
+ SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
+ HighAddr, MachinePointerInfo(), false, false, 0);
+ SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
+ SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
High.getValue(1));
SDValue Ops[] = { Result, Chain };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, 2, DL);
}
if (LD->getAlignment() == 2) {
- int SVOffset = LD->getSrcValueOffset();
- SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, MVT::i32, dl, Chain,
- BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
+ SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain,
+ BasePtr, LD->getPointerInfo(), MVT::i16,
LD->isVolatile(), LD->isNonTemporal(), 2);
- SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(2, MVT::i32));
- SDValue High = DAG.getExtLoad(ISD::EXTLOAD, MVT::i32, dl, Chain,
- HighAddr, LD->getSrcValue(), SVOffset + 2,
+ SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
+ HighAddr,
+ LD->getPointerInfo().getWithOffset(2),
MVT::i16, LD->isVolatile(),
LD->isNonTemporal(), 2);
- SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High,
+ SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
DAG.getConstant(16, MVT::i32));
- SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1),
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
High.getValue(1));
SDValue Ops[] = { Result, Chain };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, 2, DL);
}
// Lower to a call to __misaligned_load(BasePtr).
@@ -486,12 +488,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const
false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
- Args, DAG, dl);
+ Args, DAG, DL);
SDValue Ops[] =
{ CallResult.first, CallResult.second };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, 2, DL);
}
SDValue XCoreTargetLowering::
@@ -515,18 +517,17 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
DebugLoc dl = Op.getDebugLoc();
if (ST->getAlignment() == 2) {
- int SVOffset = ST->getSrcValueOffset();
SDValue Low = Value;
SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
DAG.getConstant(16, MVT::i32));
SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
- ST->getSrcValue(), SVOffset, MVT::i16,
+ ST->getPointerInfo(), MVT::i16,
ST->isVolatile(), ST->isNonTemporal(),
2);
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
DAG.getConstant(2, MVT::i32));
SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
- ST->getSrcValue(), SVOffset + 2,
+ ST->getPointerInfo().getWithOffset(2),
MVT::i16, ST->isVolatile(),
ST->isNonTemporal(), 2);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
@@ -757,16 +758,18 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const
const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
EVT VT = Node->getValueType(0);
SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
- Node->getOperand(1), V, 0, false, false, 0);
+ Node->getOperand(1), MachinePointerInfo(V),
+ false, false, 0);
// Increment the pointer, VAList, to the next vararg
SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
DAG.getConstant(VT.getSizeInBits(),
getPointerTy()));
// Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0,
- false, false, 0);
+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1),
+ MachinePointerInfo(V), false, false, 0);
// Load the actual argument out of the pointer VAList
- return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, false, false, 0);
+ return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+ false, false, 0);
}
SDValue XCoreTargetLowering::
@@ -778,9 +781,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) const
MachineFunction &MF = DAG.getMachineFunction();
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
- const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0,
- false, false, 0);
+ return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1),
+ MachinePointerInfo(), false, false, 0);
}
SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -796,6 +798,64 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
RegInfo->getFrameRegister(MF), MVT::i32);
}
+SDValue XCoreTargetLowering::
+LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ // .align 4
+ // LDAPF_u10 r11, nest
+ // LDW_2rus r11, r11[0]
+ // STWSP_ru6 r11, sp[0]
+ // LDAPF_u10 r11, fptr
+ // LDW_2rus r11, r11[0]
+ // BAU_1r r11
+ // nest:
+ // .word nest
+ // fptr:
+ // .word fptr
+ SDValue OutChains[5];
+
+ SDValue Addr = Trmp;
+
+ DebugLoc dl = Op.getDebugLoc();
+ OutChains[0] = DAG.getStore(Chain, dl, DAG.getConstant(0x0a3cd805, MVT::i32),
+ Addr, MachinePointerInfo(TrmpAddr), false, false,
+ 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(4, MVT::i32));
+ OutChains[1] = DAG.getStore(Chain, dl, DAG.getConstant(0xd80456c0, MVT::i32),
+ Addr, MachinePointerInfo(TrmpAddr, 4), false,
+ false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(8, MVT::i32));
+ OutChains[2] = DAG.getStore(Chain, dl, DAG.getConstant(0x27fb0a3c, MVT::i32),
+ Addr, MachinePointerInfo(TrmpAddr, 8), false,
+ false, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(12, MVT::i32));
+ OutChains[3] = DAG.getStore(Chain, dl, Nest, Addr,
+ MachinePointerInfo(TrmpAddr, 12), false, false,
+ 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(16, MVT::i32));
+ OutChains[4] = DAG.getStore(Chain, dl, FPtr, Addr,
+ MachinePointerInfo(TrmpAddr, 16), false, false,
+ 0);
+
+ SDValue Ops[] =
+ { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -929,7 +989,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// = Chain, Callee, Reg#1, Reg#2, ...
//
// Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
@@ -1035,7 +1095,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
CCInfo.AnalyzeFormalArguments(Ins, CC_XCore);
- unsigned StackSlotSize = XCoreFrameInfo::stackSlotSize();
+ unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize();
unsigned LRSaveSize = StackSlotSize;
@@ -1068,7 +1128,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
if (ObjSize > StackSlotSize) {
errs() << "LowerFormalArguments Unhandled argument type: "
- << (unsigned)VA.getLocVT().getSimpleVT().SimpleTy
+ << EVT(VA.getLocVT()).getEVTString()
<< "\n";
}
// Create the frame index object for this incoming parameter...
@@ -1079,7 +1139,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0,
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
@@ -1111,8 +1172,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
RegInfo.addLiveIn(ArgRegs[i], VReg);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
// Move argument from virt reg -> stack
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
MemOps.push_back(Store);
}
if (!MemOps.empty())
@@ -1443,9 +1504,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
LD->getBasePtr(),
DAG.getConstant(StoreBits/8, MVT::i32),
- Alignment, false, ST->getSrcValue(),
- ST->getSrcValueOffset(), LD->getSrcValue(),
- LD->getSrcValueOffset());
+ Alignment, false, ST->getPointerInfo(),
+ LD->getPointerInfo());
}
}
break;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
index febc198..7e5dd2e 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -147,6 +147,7 @@ namespace llvm {
SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
// Inline asm support
std::vector<unsigned>
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index ad00046..9cb6a7d 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -384,74 +384,10 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addImm(0);
}
-bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty()) {
- return true;
- }
- MachineFunction *MF = MBB.getParent();
- XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
-
- bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
-
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
- for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
- it != CSI.end(); ++it) {
- // Add the callee-saved register as live-in. It's killed at the spill.
- MBB.addLiveIn(it->getReg());
-
- unsigned Reg = it->getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- storeRegToStackSlot(MBB, MI, Reg, true,
- it->getFrameIdx(), RC, &RI);
- if (emitFrameMoves) {
- MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
- BuildMI(MBB, MI, DL, get(XCore::PROLOG_LABEL)).addSym(SaveLabel);
- XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
- }
- }
- return true;
-}
-
-bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const
-{
- bool AtStart = MI == MBB.begin();
- MachineBasicBlock::iterator BeforeI = MI;
- if (!AtStart)
- --BeforeI;
- for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
- it != CSI.end(); ++it) {
- unsigned Reg = it->getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- loadRegFromStackSlot(MBB, MI, it->getReg(),
- it->getFrameIdx(),
- RC, &RI);
- assert(MI != MBB.begin() &&
- "loadRegFromStackSlot didn't insert any code!");
- // Insert in reverse order. loadRegFromStackSlot can insert multiple
- // instructions.
- if (AtStart)
- MI = MBB.begin();
- else {
- MI = BeforeI;
- ++MI;
- }
- }
- return true;
-}
-
/// ReverseBranchCondition - Return the inverse opcode of the
/// specified Branch instruction.
bool XCoreInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
-{
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert((Cond.size() == 2) &&
"Invalid XCore branch condition!");
Cond[0].setImm(GetOppositeBranchCondition((XCore::CondCode)Cond[0].getImm()));
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
index d2b116e..977fe8d 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
@@ -75,15 +75,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
- virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
virtual bool ReverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
index 6b3b39b..38cc734 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
@@ -29,11 +29,11 @@ include "XCoreInstrFormats.td"
// Call
def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
- [SDNPHasChain, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOptInGlue]>;
def SDT_XCoreBR_JT : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
@@ -66,9 +66,9 @@ def SDT_XCoreCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOutGlue]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_XCoreCallSeqEnd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
//===----------------------------------------------------------------------===//
// Instruction Pattern Stuff
@@ -610,8 +610,15 @@ def LDC_lru6 : _FLRU6<
[(set GRRegs:$dst, immU16:$b)]>;
}
+def SETC_ru6 : _FRU6<(outs), (ins GRRegs:$r, i32imm:$val),
+ "setc res[$r], $val",
+ [(int_xcore_setc GRRegs:$r, immU6:$val)]>;
+
+def SETC_lru6 : _FLRU6<(outs), (ins GRRegs:$r, i32imm:$val),
+ "setc res[$r], $val",
+ [(int_xcore_setc GRRegs:$r, immU16:$val)]>;
+
// Operand register - U6
-// TODO setc
let isBranch = 1, isTerminator = 1 in {
defm BRFT: FRU6_LRU6_branch<"bt">;
defm BRBT: FRU6_LRU6_branch<"bt">;
@@ -720,9 +727,8 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
"neg $dst, $b",
[(set GRRegs:$dst, (ineg GRRegs:$b))]>;
-// TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out,
-// in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp,
-// tsetmr, sext (reg), zext (reg)
+// TODO setd, eet, eef, getts, setpt, outshr, inshr, testwct, tinitpc, tinitdp,
+// tinitsp, tinitcp, tsetmr, sext (reg), zext (reg)
let Constraints = "$src1 = $dst" in {
let neverHasSideEffects = 1 in
def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
@@ -748,6 +754,50 @@ def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
"mkmsk $dst, $size",
[(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>;
+def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
+ "getr $dst, $type",
+ [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
+
+def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "outct res[$r], $val",
+ [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
+
+def OUTCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
+ "outct res[$r], $val",
+ [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
+
+def OUTT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "outt res[$r], $val",
+ [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
+
+def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "out res[$r], $val",
+ [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
+
+def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "inct $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
+
+def INT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "int $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
+
+def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "in $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>;
+
+def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "chkct res[$r], $val",
+ [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
+
+def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
+ "chkct res[$r], $val",
+ [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
+
+def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "setd res[$r], $val",
+ [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
+
// Two operand long
// TODO settw, setclk, setrdy, setpsc, endin, peek,
// getd, testlcl, tinitlr, getps, setps
@@ -763,8 +813,12 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
"clz $dst, $src",
[(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
+def SETC_l2r : _FRU6<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "setc res[$r], $val",
+ [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
+
// One operand short
-// TODO edu, eeu, waitet, waitef, freer, tstart, msync, mjoin, syncr, clrtp
+// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, syncr, clrtp
// setdp, setcp, setv, setev, kcall
// dgetreg
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
@@ -805,6 +859,10 @@ def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
[(XCoreBranchLink GRRegs:$addr)]>;
}
+def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
+ "freer res[$r]",
+ [(int_xcore_freer GRRegs:$r)]>;
+
// Zero operand short
// TODO waiteu, clre, ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
index f82e598..56c0879 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -21,7 +21,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -84,11 +84,13 @@ const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
Reserved.set(XCore::CP);
Reserved.set(XCore::DP);
Reserved.set(XCore::SP);
Reserved.set(XCore::LR);
- if (hasFP(MF)) {
+ if (TFI->hasFP(MF)) {
Reserved.set(XCore::R10);
}
return Reserved;
@@ -96,12 +98,10 @@ BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool
XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
- // TODO can we estimate stack size?
- return hasFP(MF);
-}
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const {
- return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+ // TODO can we estimate stack size?
+ return TFI->hasFP(MF);
}
// This function eliminates ADJCALLSTACKDOWN,
@@ -109,7 +109,9 @@ bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const {
void XCoreRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (!hasReservedCallFrame(MF)) {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!TFI->hasReservedCallFrame(MF)) {
// Turn the adjcallstackdown instruction into 'extsp <amt>' and the
// adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
MachineInstr *Old = I;
@@ -118,14 +120,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Align = TFI->getStackAlignment();
Amount = (Amount+Align-1)/Align*Align;
assert(Amount%4 == 0);
Amount /= 4;
-
+
bool isU6 = isImmU6(Amount);
-
if (!isU6 && !isImmU16(Amount)) {
// FIX could emit multiple instructions in this case.
#ifndef NDEBUG
@@ -172,6 +173,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FrameIndex = FrameOp.getIndex();
MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
int StackSize = MF.getFrameInfo()->getStackSize();
@@ -197,7 +199,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset/=4;
- bool FP = hasFP(MF);
+ bool FP = TFI->hasFP(MF);
unsigned Reg = MI.getOperand(0).getReg();
bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill();
@@ -292,48 +294,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-void
-XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
- const TargetRegisterClass *RC = XCore::GRRegsRegisterClass;
- XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
- if (LRUsed) {
- MF.getRegInfo().setPhysRegUnused(XCore::LR);
-
- bool isVarArg = MF.getFunction()->isVarArg();
- int FrameIdx;
- if (! isVarArg) {
- // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
- FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true);
- } else {
- FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
- false);
- }
- XFI->setUsesLR(FrameIdx);
- XFI->setLRSpillSlot(FrameIdx);
- }
- if (requiresRegisterScavenging(MF)) {
- // Reserve a slot close to SP or frame pointer.
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
- if (hasFP(MF)) {
- // A callee save register is used to hold the FP.
- // This needs saving / restoring in the epilogue / prologue.
- XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
-}
-
-void XCoreRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
-
-}
-
void XCoreRegisterInfo::
loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DstReg, int64_t Value, DebugLoc dl) const {
@@ -346,229 +306,19 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
}
-void XCoreRegisterInfo::
-storeToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, int Offset, DebugLoc dl) const {
- assert(Offset%4 == 0 && "Misaligned stack offset");
- Offset/=4;
- bool isU6 = isImmU6(Offset);
- if (!isU6 && !isImmU16(Offset))
- report_fatal_error("storeToStack offset too big " + Twine(Offset));
- int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
- BuildMI(MBB, I, dl, TII.get(Opcode))
- .addReg(SrcReg)
- .addImm(Offset);
-}
-
-void XCoreRegisterInfo::
-loadFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DstReg, int Offset, DebugLoc dl) const {
- assert(Offset%4 == 0 && "Misaligned stack offset");
- Offset/=4;
- bool isU6 = isImmU6(Offset);
- if (!isU6 && !isImmU16(Offset))
- report_fatal_error("loadFromStack offset too big " + Twine(Offset));
- int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
- BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
- .addImm(Offset);
-}
-
-void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineModuleInfo *MMI = &MF.getMMI();
- XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
- bool FP = hasFP(MF);
-
- // Work out frame sizes.
- int FrameSize = MFI->getStackSize();
-
- assert(FrameSize%4 == 0 && "Misaligned frame size");
-
- FrameSize/=4;
-
- bool isU6 = isImmU6(FrameSize);
-
- if (!isU6 && !isImmU16(FrameSize)) {
- // FIXME could emit multiple instructions.
- report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
- }
- bool emitFrameMoves = needsFrameMoves(MF);
-
- // Do we need to allocate space on the stack?
- if (FrameSize) {
- bool saveLR = XFI->getUsesLR();
- bool LRSavedOnEntry = false;
- int Opcode;
- if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
- Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
- MBB.addLiveIn(XCore::LR);
- saveLR = false;
- LRSavedOnEntry = true;
- } else {
- Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
- }
- BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
-
- if (emitFrameMoves) {
- std::vector<MachineMove> &Moves = MMI->getFrameMoves();
-
- // Show update of SP.
- MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
-
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
- Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
-
- if (LRSavedOnEntry) {
- MachineLocation CSDst(MachineLocation::VirtualFP, 0);
- MachineLocation CSSrc(XCore::LR);
- Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
- }
- }
- if (saveLR) {
- int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
- storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl);
- MBB.addLiveIn(XCore::LR);
-
- if (emitFrameMoves) {
- MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
- MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
- MachineLocation CSSrc(XCore::LR);
- MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
- }
- }
- }
-
- if (FP) {
- // Save R10 to the stack.
- int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
- storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl);
- // R10 is live-in. It is killed at the spill.
- MBB.addLiveIn(XCore::R10);
- if (emitFrameMoves) {
- MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
- MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
- MachineLocation CSSrc(XCore::R10);
- MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
- }
- // Set the FP from the SP.
- unsigned FramePtr = XCore::R10;
- BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
- .addImm(0);
- if (emitFrameMoves) {
- // Show FP is now valid.
- MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
- MachineLocation SPDst(FramePtr);
- MachineLocation SPSrc(MachineLocation::VirtualFP);
- MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
- }
- }
-
- if (emitFrameMoves) {
- // Frame moves for callee saved.
- std::vector<MachineMove> &Moves = MMI->getFrameMoves();
- std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels =
- XFI->getSpillLabels();
- for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
- MCSymbol *SpillLabel = SpillLabels[I].first;
- CalleeSavedInfo &CSI = SpillLabels[I].second;
- int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
- unsigned Reg = CSI.getReg();
- MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
- MachineLocation CSSrc(Reg);
- Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc));
- }
- }
-}
-
-void XCoreRegisterInfo::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineBasicBlock::iterator MBBI = prior(MBB.end());
- DebugLoc dl = MBBI->getDebugLoc();
-
- bool FP = hasFP(MF);
-
- if (FP) {
- // Restore the stack pointer.
- unsigned FramePtr = XCore::R10;
- BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r))
- .addReg(FramePtr);
- }
-
- // Work out frame sizes.
- int FrameSize = MFI->getStackSize();
-
- assert(FrameSize%4 == 0 && "Misaligned frame size");
-
- FrameSize/=4;
-
- bool isU6 = isImmU6(FrameSize);
-
- if (!isU6 && !isImmU16(FrameSize)) {
- // FIXME could emit multiple instructions.
- report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize));
- }
-
- if (FrameSize) {
- XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
-
- if (FP) {
- // Restore R10
- int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
- FPSpillOffset += FrameSize*4;
- loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl);
- }
- bool restoreLR = XFI->getUsesLR();
- if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) {
- int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
- LRSpillOffset += FrameSize*4;
- loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl);
- restoreLR = false;
- }
- if (restoreLR) {
- // Fold prologue into return instruction
- assert(MBBI->getOpcode() == XCore::RETSP_u6
- || MBBI->getOpcode() == XCore::RETSP_lu6);
- int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6;
- BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
- MBB.erase(MBBI);
- } else {
- int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
- BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
- }
- }
-}
-
int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
}
unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- bool FP = hasFP(MF);
-
- return FP ? XCore::R10 : XCore::SP;
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
}
unsigned XCoreRegisterInfo::getRARegister() const {
return XCore::LR;
}
-void XCoreRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
- const {
- // Initial state of the frame pointer is SP.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(XCore::SP, 0);
- Moves.push_back(MachineMove(0, Dst, Src));
-}
-
#include "XCoreGenRegisterInfo.inc"
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
index e636c1c..2185755 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -48,8 +48,6 @@ public:
bool requiresRegisterScavenging(const MachineFunction &MF) const;
- bool hasFP(const MachineFunction &MF) const;
-
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
@@ -57,18 +55,9 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
-
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const;
- void getInitialFrameState(std::vector<MachineMove> &Moves) const;
//! Return the array of argument passing registers
/*!
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td
index 62daf5d..765f717 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.td
@@ -61,8 +61,8 @@ def GRRegs : RegisterClass<"XCore", [i32], 32,
GRRegsClass::iterator
GRRegsClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
- if (RI->hasFP(MF))
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
+ if (TFI->hasFP(MF))
return end()-1; // don't allocate R10
else
return end();
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index b0013eb..30da2c8 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -27,7 +27,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
"i16:16:32-i32:32:32-i64:32:32-n32"),
InstrInfo(),
- FrameInfo(*this),
+ FrameLowering(Subtarget),
TLInfo(*this),
TSInfo(*this) {
}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
index 14073ba..24daadc 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h
@@ -16,7 +16,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
-#include "XCoreFrameInfo.h"
+#include "XCoreFrameLowering.h"
#include "XCoreSubtarget.h"
#include "XCoreInstrInfo.h"
#include "XCoreISelLowering.h"
@@ -28,7 +28,7 @@ class XCoreTargetMachine : public LLVMTargetMachine {
XCoreSubtarget Subtarget;
const TargetData DataLayout; // Calculates type size & alignment
XCoreInstrInfo InstrInfo;
- XCoreFrameInfo FrameInfo;
+ XCoreFrameLowering FrameLowering;
XCoreTargetLowering TLInfo;
XCoreSelectionDAGInfo TSInfo;
public:
@@ -36,7 +36,9 @@ public:
const std::string &FS);
virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const XCoreFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const XCoreFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
virtual const XCoreTargetLowering *getTargetLowering() const {
return &TLInfo;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
index cdf5a53..7f4e1c1 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -12,6 +12,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
@@ -19,31 +20,31 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
DataSection =
- Ctx.getELFSection(".dp.data", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
- MCSectionELF::XCORE_SHF_DP_SECTION,
- SectionKind::getDataRel(), false);
+ Ctx.getELFSection(".dp.data", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::XCORE_SHF_DP_SECTION,
+ SectionKind::getDataRel());
BSSSection =
- Ctx.getELFSection(".dp.bss", MCSectionELF::SHT_NOBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE |
- MCSectionELF::XCORE_SHF_DP_SECTION,
- SectionKind::getBSS(), false);
+ Ctx.getELFSection(".dp.bss", ELF::SHT_NOBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::XCORE_SHF_DP_SECTION,
+ SectionKind::getBSS());
MergeableConst4Section =
- Ctx.getELFSection(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
- MCSectionELF::XCORE_SHF_CP_SECTION,
- SectionKind::getMergeableConst4(), false);
+ Ctx.getELFSection(".cp.rodata.cst4", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getMergeableConst4());
MergeableConst8Section =
- Ctx.getELFSection(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
- MCSectionELF::XCORE_SHF_CP_SECTION,
- SectionKind::getMergeableConst8(), false);
+ Ctx.getELFSection(".cp.rodata.cst8", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getMergeableConst8());
MergeableConst16Section =
- Ctx.getELFSection(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE |
- MCSectionELF::XCORE_SHF_CP_SECTION,
- SectionKind::getMergeableConst16(), false);
+ Ctx.getELFSection(".cp.rodata.cst16", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getMergeableConst16());
// TLS globals are lowered in the backend to arrays indexed by the current
// thread id. After lowering they require no special handling by the linker
@@ -52,10 +53,10 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
TLSBSSSection = BSSSection;
ReadOnlySection =
- Ctx.getELFSection(".cp.rodata", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |
- MCSectionELF::XCORE_SHF_CP_SECTION,
- SectionKind::getReadOnlyWithRel(), false);
+ Ctx.getELFSection(".cp.rodata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::XCORE_SHF_CP_SECTION,
+ SectionKind::getReadOnlyWithRel());
// Dynamic linking is not supported. Data with relocations is placed in the
// same section as data without relocations.
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 0c77e1f..0c650cf 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -39,7 +39,6 @@
#include "llvm/LLVMContext.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Target/TargetData.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
@@ -67,7 +66,9 @@ namespace {
virtual bool runOnSCC(CallGraphSCC &SCC);
static char ID; // Pass identification, replacement for typeid
explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(ID), maxElements(maxElements) {}
+ : CallGraphSCCPass(ID), maxElements(maxElements) {
+ initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+ }
/// A vector used to hold the indices of a single GEP instruction
typedef std::vector<uint64_t> IndicesVector;
@@ -84,8 +85,12 @@ namespace {
}
char ArgPromotion::ID = 0;
-INITIALIZE_PASS(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false, false);
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
return new ArgPromotion(maxElements);
@@ -130,47 +135,74 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
if (PointerArgs.empty()) return 0;
// Second check: make sure that all callers are direct callers. We can't
- // transform functions that have indirect callers.
- if (F->hasAddressTaken())
- return 0;
-
+ // transform functions that have indirect callers. Also see if the function
+ // is self-recursive.
+ bool isSelfRecursive = false;
+ for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
+ UI != E; ++UI) {
+ CallSite CS(*UI);
+ // Must be a direct call.
+ if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0;
+
+ if (CS.getInstruction()->getParent()->getParent() == F)
+ isSelfRecursive = true;
+ }
+
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
SmallPtrSet<Argument*, 8> ArgsToPromote;
SmallPtrSet<Argument*, 8> ByValArgsToTransform;
for (unsigned i = 0; i != PointerArgs.size(); ++i) {
bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);
+ Argument *PtrArg = PointerArgs[i].first;
+ const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
// If this is a byval argument, and if the aggregate type is small, just
// pass the elements, which is always safe.
- Argument *PtrArg = PointerArgs[i].first;
if (isByVal) {
- const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
if (maxElements > 0 && STy->getNumElements() > maxElements) {
DEBUG(dbgs() << "argpromotion disable promoting argument '"
<< PtrArg->getName() << "' because it would require adding more"
<< " than " << maxElements << " arguments to the function.\n");
- } else {
- // If all the elements are single-value types, we can promote it.
- bool AllSimple = true;
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
- if (!STy->getElementType(i)->isSingleValueType()) {
- AllSimple = false;
- break;
- }
-
- // Safe to transform, don't even bother trying to "promote" it.
- // Passing the elements as a scalar will allow scalarrepl to hack on
- // the new alloca we introduce.
- if (AllSimple) {
- ByValArgsToTransform.insert(PtrArg);
- continue;
+ continue;
+ }
+
+ // If all the elements are single-value types, we can promote it.
+ bool AllSimple = true;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ if (!STy->getElementType(i)->isSingleValueType()) {
+ AllSimple = false;
+ break;
}
}
+
+ // Safe to transform, don't even bother trying to "promote" it.
+ // Passing the elements as a scalar will allow scalarrepl to hack on
+ // the new alloca we introduce.
+ if (AllSimple) {
+ ByValArgsToTransform.insert(PtrArg);
+ continue;
+ }
}
}
+ // If the argument is a recursive type and we're in a recursive
+ // function, we could end up infinitely peeling the function argument.
+ if (isSelfRecursive) {
+ if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+ bool RecursiveType = false;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ if (STy->getElementType(i) == PtrArg->getType()) {
+ RecursiveType = true;
+ break;
+ }
+ }
+ if (RecursiveType)
+ continue;
+ }
+ }
+
// Otherwise, see if we can promote the pointer to its value.
if (isSafeToPromoteArgument(PtrArg, isByVal))
ArgsToPromote.insert(PtrArg);
@@ -183,22 +215,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
}
-/// IsAlwaysValidPointer - Return true if the specified pointer is always legal
-/// to load.
-static bool IsAlwaysValidPointer(Value *V) {
- if (isa<AllocaInst>(V) || isa<GlobalVariable>(V)) return true;
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V))
- return IsAlwaysValidPointer(GEP->getOperand(0));
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::GetElementPtr)
- return IsAlwaysValidPointer(CE->getOperand(0));
-
- return false;
-}
-
-/// AllCalleesPassInValidPointerForArgument - Return true if we can prove that
+/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
/// all callees pass in a valid pointer for the specified function argument.
-static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) {
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
Function *Callee = Arg->getParent();
unsigned ArgNo = std::distance(Callee->arg_begin(),
@@ -211,7 +230,7 @@ static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) {
CallSite CS(*UI);
assert(CS && "Should only have direct calls!");
- if (!IsAlwaysValidPointer(CS.getArgument(ArgNo)))
+ if (!CS.getArgument(ArgNo)->isDereferenceablePointer())
return false;
}
return true;
@@ -318,7 +337,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByVal || AllCalleesPassInValidPointerForArgument(Arg))
+ if (isByVal || AllCallersPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -434,8 +453,6 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
SmallPtrSet<BasicBlock*, 16> TranspBlocks;
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- TargetData *TD = getAnalysisIfAvailable<TargetData>();
- if (!TD) return false; // Without TargetData, assume the worst.
for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
// Check to see if the load is invalidated from the start of the block to
@@ -443,11 +460,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
LoadInst *Load = Loads[i];
BasicBlock *BB = Load->getParent();
- const PointerType *LoadTy =
- cast<PointerType>(Load->getPointerOperand()->getType());
- unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType());
-
- if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize))
+ AliasAnalysis::Location Loc = AA.getLocation(Load);
+ if (AA.canInstructionRangeModify(BB->front(), *Load, Loc))
return false; // Pointer is invalidated!
// Now check every path from the entry block to the load for transparency.
@@ -458,7 +472,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
I = idf_ext_begin(P, TranspBlocks),
E = idf_ext_end(P, TranspBlocks); I != E; ++I)
- if (AA.canBasicBlockModify(**I, Arg, LoadSize))
+ if (AA.canBasicBlockModify(**I, Loc))
return false;
}
}
@@ -694,6 +708,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// of the previous load.
LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
newLoad->setAlignment(OrigLoad->getAlignment());
+ // Transfer the TBAA info too.
+ newLoad->setMetadata(LLVMContext::MD_tbaa,
+ OrigLoad->getMetadata(LLVMContext::MD_tbaa));
Args.push_back(newLoad);
AA.copyValue(OrigLoad, Args.back());
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 64e8d79..a21efce 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -33,7 +33,9 @@ STATISTIC(NumMerged, "Number of global constants merged");
namespace {
struct ConstantMerge : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- ConstantMerge() : ModulePass(ID) {}
+ ConstantMerge() : ModulePass(ID) {
+ initializeConstantMergePass(*PassRegistry::getPassRegistry());
+ }
// run - For this pass, process all of the globals in the module,
// eliminating duplicate constants.
@@ -44,7 +46,7 @@ namespace {
char ConstantMerge::ID = 0;
INITIALIZE_PASS(ConstantMerge, "constmerge",
- "Merge Duplicate Global Constants", false, false);
+ "Merge Duplicate Global Constants", false, false)
ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
@@ -63,6 +65,18 @@ static void FindUsedValues(GlobalVariable *LLVMUsed,
UsedValues.insert(GV);
}
+// True if A is better than B.
+static bool IsBetterCannonical(const GlobalVariable &A,
+ const GlobalVariable &B) {
+ if (!A.hasLocalLinkage() && B.hasLocalLinkage())
+ return true;
+
+ if (A.hasLocalLinkage() && !B.hasLocalLinkage())
+ return false;
+
+ return A.hasUnnamedAddr();
+}
+
bool ConstantMerge::runOnModule(Module &M) {
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
@@ -83,44 +97,76 @@ bool ConstantMerge::runOnModule(Module &M) {
// second level constants have initializers which point to the globals that
// were just merged.
while (1) {
- // First pass: identify all globals that can be merged together, filling in
- // the Replacements vector. We cannot do the replacement in this pass
- // because doing so may cause initializers of other globals to be rewritten,
- // invalidating the Constant* pointers in CMap.
- //
+
+ // First: Find the canonical constants others will be merged with.
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
GlobalVariable *GV = GVI++;
-
+
// If this GV is dead, remove it.
GV->removeDeadConstantUsers();
if (GV->use_empty() && GV->hasLocalLinkage()) {
GV->eraseFromParent();
continue;
}
-
- // Only process constants with initializers in the default addres space.
- if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() ||
- GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() ||
+
+ // Only process constants with initializers in the default address space.
+ if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
// Don't touch values marked with attribute(used).
UsedGlobals.count(GV))
continue;
-
-
-
+
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
GlobalVariable *&Slot = CMap[Init];
- if (Slot == 0) { // Nope, add it to the map.
+ // If this is the first constant we find or if the old on is local,
+ // replace with the current one. It the current is externally visible
+ // it cannot be replace, but can be the canonical constant we merge with.
+ if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) {
Slot = GV;
- } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate!
- // Make all uses of the duplicate constant use the canonical version.
- Replacements.push_back(std::make_pair(GV, Slot));
}
}
+ // Second: identify all globals that can be merged together, filling in
+ // the Replacements vector. We cannot do the replacement in this pass
+ // because doing so may cause initializers of other globals to be rewritten,
+ // invalidating the Constant* pointers in CMap.
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ) {
+ GlobalVariable *GV = GVI++;
+
+ // Only process constants with initializers in the default address space.
+ if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV))
+ continue;
+
+ // We can only replace constant with local linkage.
+ if (!GV->hasLocalLinkage())
+ continue;
+
+ Constant *Init = GV->getInitializer();
+
+ // Check to see if the initializer is already known.
+ GlobalVariable *Slot = CMap[Init];
+
+ if (!Slot || Slot == GV)
+ continue;
+
+ if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr())
+ continue;
+
+ if (!GV->hasUnnamedAddr())
+ Slot->setUnnamedAddr(false);
+
+ // Make all uses of the duplicate constant use the canonical version.
+ Replacements.push_back(std::make_pair(GV, Slot));
+ }
+
if (Replacements.empty())
return MadeChange;
CMap.clear();
@@ -133,6 +179,8 @@ bool ConstantMerge::runOnModule(Module &M) {
Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
// Delete the global value from the module.
+ assert(Replacements[i].first->hasLocalLinkage() &&
+ "Refusing to delete an externally visible global variable.");
Replacements[i].first->eraseFromParent();
}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 47df235..b423221 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -39,7 +39,8 @@ using namespace llvm;
STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
-
+STATISTIC(NumArgumentsReplacedWithUndef,
+ "Number of unread args replaced with undef");
namespace {
/// DAE - The dead argument elimination pass.
///
@@ -126,7 +127,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- DAE() : ModulePass(ID) {}
+ DAE() : ModulePass(ID) {
+ initializeDAEPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
@@ -146,12 +149,13 @@ namespace {
void PropagateLiveness(const RetOrArg &RA);
bool RemoveDeadStuffFromFunction(Function *F);
bool DeleteDeadVarargs(Function &Fn);
+ bool RemoveDeadArgumentsFromCallers(Function &Fn);
};
}
char DAE::ID = 0;
-INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false);
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
namespace {
/// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
@@ -168,7 +172,7 @@ namespace {
char DAH::ID = 0;
INITIALIZE_PASS(DAH, "deadarghaX0r",
"Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
- false, false);
+ false, false)
/// createDeadArgEliminationPass - This pass removes arguments from functions
/// which are not used by the body of the function.
@@ -285,6 +289,55 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
return true;
}
+/// RemoveDeadArgumentsFromCallers - Checks if the given function has any
+/// arguments that are unused, and changes the caller parameters to be undefined
+/// instead.
+bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
+{
+ if (Fn.isDeclaration())
+ return false;
+
+ // Functions with local linkage should already have been handled.
+ if (Fn.hasLocalLinkage())
+ return false;
+
+ if (Fn.use_empty())
+ return false;
+
+ llvm::SmallVector<unsigned, 8> UnusedArgs;
+ for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
+ I != E; ++I) {
+ Argument *Arg = I;
+
+ if (Arg->use_empty() && !Arg->hasByValAttr())
+ UnusedArgs.push_back(Arg->getArgNo());
+ }
+
+ if (UnusedArgs.empty())
+ return false;
+
+ bool Changed = false;
+
+ for (Function::use_iterator I = Fn.use_begin(), E = Fn.use_end();
+ I != E; ++I) {
+ CallSite CS(*I);
+ if (!CS || !CS.isCallee(I))
+ continue;
+
+ // Now go through all unused args and replace them with "undef".
+ for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) {
+ unsigned ArgNo = UnusedArgs[I];
+
+ Value *Arg = CS.getArgument(ArgNo);
+ CS.setArgument(ArgNo, UndefValue::get(Arg->getType()));
+ ++NumArgumentsReplacedWithUndef;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
/// Convenience function that returns the number of return values. It returns 0
/// for void functions and 1 for functions not returning a struct. It returns
/// the number of struct elements for functions returning a struct.
@@ -791,7 +844,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
} else if (New->getType()->isVoidTy()) {
// Our return value has uses, but they will get removed later on.
// Replace by null for now.
- Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+ if (!Call->getType()->isX86_MMXTy())
+ Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
} else {
assert(RetTy->isStructTy() &&
"Return type changed, but not into a void. The old return type"
@@ -854,7 +908,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
} else {
// If this argument is dead, replace any uses of it with null constants
// (these are guaranteed to become unused later on).
- I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+ if (!I->getType()->isX86_MMXTy())
+ I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
}
// If we change the return value of the function we must rewrite any return
@@ -935,5 +990,14 @@ bool DAE::runOnModule(Module &M) {
Function *F = I++;
Changed |= RemoveDeadStuffFromFunction(F);
}
+
+ // Finally, look for any unused parameters in functions with non-local
+ // linkage and replace the passed in parameters with undef.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ Function& F = *I;
+
+ Changed |= RemoveDeadArgumentsFromCallers(F);
+ }
+
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp
index 5dc50c5..a509931 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadTypeElimination.cpp
@@ -26,7 +26,9 @@ STATISTIC(NumKilled, "Number of unused typenames removed from symtab");
namespace {
struct DTE : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- DTE() : ModulePass(ID) {}
+ DTE() : ModulePass(ID) {
+ initializeDTEPass(*PassRegistry::getPassRegistry());
+ }
// doPassInitialization - For this pass, it removes global symbol table
// entries for primitive types. These are never used for linking in GCC and
@@ -45,7 +47,10 @@ namespace {
}
char DTE::ID = 0;
-INITIALIZE_PASS(DTE, "deadtypeelim", "Dead Type Elimination", false, false);
+INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(FindUsedTypes)
+INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false)
ModulePass *llvm::createDeadTypeEliminationPass() {
return new DTE();
diff --git a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
index 45c5fe7..9d432de 100644
--- a/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -50,24 +50,22 @@ namespace {
// Visit the GlobalVariables.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- if (!I->isDeclaration()) {
- if (I->hasLocalLinkage())
- I->setVisibility(GlobalValue::HiddenVisibility);
- I->setLinkage(GlobalValue::ExternalLinkage);
- if (deleteStuff == Named.count(I))
- I->setInitializer(0);
- }
+ I != E; ++I) {
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration())
+ I->setInitializer(0);
+ }
// Visit the Functions.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration()) {
- if (I->hasLocalLinkage())
- I->setVisibility(GlobalValue::HiddenVisibility);
- I->setLinkage(GlobalValue::ExternalLinkage);
- if (deleteStuff == Named.count(I))
- I->deleteBody();
- }
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration())
+ I->deleteBody();
+ }
return true;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 6165ba0..95decec 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -23,10 +23,10 @@
#include "llvm/CallGraphSCCPass.h"
#include "llvm/GlobalVariable.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/UniqueVector.h"
@@ -41,7 +41,9 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias");
namespace {
struct FunctionAttrs : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(ID) {}
+ FunctionAttrs() : CallGraphSCCPass(ID), AA(0) {
+ initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ }
// runOnSCC - Analyze the SCC, performing the transformation if possible.
bool runOnSCC(CallGraphSCC &SCC);
@@ -61,67 +63,25 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
- bool PointsToLocalMemory(Value *V);
+ private:
+ AliasAnalysis *AA;
};
}
char FunctionAttrs::ID = 0;
-INITIALIZE_PASS(FunctionAttrs, "functionattrs",
- "Deduce function attributes", false, false);
+INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false)
Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
-/// PointsToLocalMemory - Returns whether the given pointer value points to
-/// memory that is local to the function. Global constants are considered
-/// local to all functions.
-bool FunctionAttrs::PointsToLocalMemory(Value *V) {
- SmallVector<Value*, 16> Worklist;
- unsigned MaxLookup = 8;
-
- Worklist.push_back(V);
-
- do {
- V = Worklist.pop_back_val()->getUnderlyingObject();
-
- // An alloca instruction defines local memory.
- if (isa<AllocaInst>(V))
- continue;
-
- // A global constant counts as local memory for our purposes.
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
- if (!GV->isConstant())
- return false;
- continue;
- }
-
- // If both select values point to local memory, then so does the select.
- if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
- Worklist.push_back(SI->getTrueValue());
- Worklist.push_back(SI->getFalseValue());
- continue;
- }
-
- // If all values incoming to a phi node point to local memory, then so does
- // the phi.
- if (PHINode *PN = dyn_cast<PHINode>(V)) {
- // Don't bother inspecting phi nodes with many operands.
- if (PN->getNumIncomingValues() > MaxLookup)
- return false;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- Worklist.push_back(PN->getIncomingValue(i));
- continue;
- }
-
- return false;
- } while (!Worklist.empty() && --MaxLookup);
-
- return Worklist.empty();
-}
-
/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
SmallPtrSet<Function*, 8> SCCNodes;
@@ -141,14 +101,15 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
// External node - may write memory. Just give up.
return false;
- if (F->doesNotAccessMemory())
+ AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
+ if (MRB == AliasAnalysis::DoesNotAccessMemory)
// Already perfect!
continue;
// Definitions with weak linkage may be overridden at linktime with
// something that writes memory, so treat them like declarations.
if (F->isDeclaration() || F->mayBeOverridden()) {
- if (!F->onlyReadsMemory())
+ if (!AliasAnalysis::onlyReadsMemory(MRB))
// May write memory. Just give up.
return false;
@@ -163,32 +124,62 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
CallSite CS(cast<Value>(I));
- if (CS && CS.getCalledFunction()) {
+ if (CS) {
// Ignore calls to functions in the same SCC.
- if (SCCNodes.count(CS.getCalledFunction()))
+ if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
continue;
- // Ignore intrinsics that only access local memory.
- if (unsigned id = CS.getCalledFunction()->getIntrinsicID())
- if (AliasAnalysis::getIntrinsicModRefBehavior(id) ==
- AliasAnalysis::AccessesArguments) {
- // Check that all pointer arguments point to local memory.
+ AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS);
+ // If the call doesn't access arbitrary memory, we may be able to
+ // figure out something.
+ if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ // If the call does access argument pointees, check each argument.
+ if (AliasAnalysis::doesAccessArgPointees(MRB))
+ // Check whether all pointer arguments point to local memory, and
+ // ignore calls that only access local memory.
for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI) {
Value *Arg = *CI;
- if (Arg->getType()->isPointerTy() && !PointsToLocalMemory(Arg))
- // Writes memory. Just give up.
- return false;
+ if (Arg->getType()->isPointerTy()) {
+ AliasAnalysis::Location Loc(Arg,
+ AliasAnalysis::UnknownSize,
+ I->getMetadata(LLVMContext::MD_tbaa));
+ if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
+ if (MRB & AliasAnalysis::Mod)
+ // Writes non-local memory. Give up.
+ return false;
+ if (MRB & AliasAnalysis::Ref)
+ // Ok, it reads non-local memory.
+ ReadsMemory = true;
+ }
+ }
}
- // Only reads and writes local memory.
- continue;
- }
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- // Ignore loads from local memory.
- if (PointsToLocalMemory(LI->getPointerOperand()))
continue;
+ }
+ // The call could access any memory. If that includes writes, give up.
+ if (MRB & AliasAnalysis::Mod)
+ return false;
+ // If it reads, note it.
+ if (MRB & AliasAnalysis::Ref)
+ ReadsMemory = true;
+ continue;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // Ignore non-volatile loads from local memory.
+ if (!LI->isVolatile()) {
+ AliasAnalysis::Location Loc = AA->getLocation(LI);
+ if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // Ignore stores to local memory.
- if (PointsToLocalMemory(SI->getPointerOperand()))
+ // Ignore non-volatile stores to local memory.
+ if (!SI->isVolatile()) {
+ AliasAnalysis::Location Loc = AA->getLocation(SI);
+ if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+ continue;
+ }
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+ // Ignore vaargs on local memory.
+ AliasAnalysis::Location Loc = AA->getLocation(VI);
+ if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
@@ -198,10 +189,6 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
// Writes memory. Just give up.
return false;
- if (isMalloc(I))
- // malloc claims not to write memory! PR3754.
- return false;
-
// If this instruction may read memory, remember that.
ReadsMemory |= I->mayReadFromMemory();
}
@@ -384,6 +371,8 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
}
bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+ AA = &getAnalysis<AliasAnalysis>();
+
bool Changed = AddReadAttrs(SCC);
Changed |= AddNoCaptureAttrs(SCC);
Changed |= AddNoAliasAttrs(SCC);
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index aa18601..2b427aa 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -31,7 +31,9 @@ STATISTIC(NumVariables, "Number of global variables removed");
namespace {
struct GlobalDCE : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- GlobalDCE() : ModulePass(ID) {}
+ GlobalDCE() : ModulePass(ID) {
+ initializeGlobalDCEPass(*PassRegistry::getPassRegistry());
+ }
// run - Do the GlobalDCE pass on the specified module, optionally updating
// the specified callgraph to reflect the changes.
@@ -52,7 +54,7 @@ namespace {
char GlobalDCE::ID = 0;
INITIALIZE_PASS(GlobalDCE, "globaldce",
- "Dead Global Elimination", false, false);
+ "Dead Global Elimination", false, false)
ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index a77af54..d4cb712 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -40,6 +40,7 @@
using namespace llvm;
STATISTIC(NumMarked , "Number of globals marked constant");
+STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
STATISTIC(NumHeapSRA , "Number of heap objects SRA'd");
STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
@@ -55,11 +56,14 @@ STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
namespace {
+ struct GlobalStatus;
struct GlobalOpt : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
}
static char ID; // Pass identification, replacement for typeid
- GlobalOpt() : ModulePass(ID) {}
+ GlobalOpt() : ModulePass(ID) {
+ initializeGlobalOptPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
@@ -69,13 +73,16 @@ namespace {
bool OptimizeGlobalVars(Module &M);
bool OptimizeGlobalAliases(Module &M);
bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
- bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
+ bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
+ bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
+ const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+ const GlobalStatus &GS);
};
}
char GlobalOpt::ID = 0;
INITIALIZE_PASS(GlobalOpt, "globalopt",
- "Global Variable Optimizer", false, false);
+ "Global Variable Optimizer", false, false)
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
@@ -85,6 +92,9 @@ namespace {
/// about it. If we find out that the address of the global is taken, none of
/// this info will be accurate.
struct GlobalStatus {
+ /// isCompared - True if the global's address is used in a comparison.
+ bool isCompared;
+
/// isLoaded - True if the global is ever loaded. If the global isn't ever
/// loaded it can be deleted.
bool isLoaded;
@@ -129,10 +139,11 @@ struct GlobalStatus {
/// HasPHIUser - Set to true if this global has a user that is a PHI node.
bool HasPHIUser;
-
- GlobalStatus() : isLoaded(false), StoredType(NotStored), StoredOnceValue(0),
- AccessingFunction(0), HasMultipleAccessingFunctions(false),
- HasNonInstructionUser(false), HasPHIUser(false) {}
+
+ GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
+ StoredOnceValue(0), AccessingFunction(0),
+ HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
+ HasPHIUser(false) {}
};
}
@@ -165,6 +176,11 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
const User *U = *UI;
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
GS.HasNonInstructionUser = true;
+
+ // If the result of the constantexpr isn't pointer type, then we won't
+ // know to expect it in various places. Just reject early.
+ if (!isa<PointerType>(CE->getType())) return true;
+
if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
} else if (const Instruction *I = dyn_cast<Instruction>(U)) {
if (!GS.HasMultipleAccessingFunctions) {
@@ -221,7 +237,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
GS.HasPHIUser = true;
} else if (isa<CmpInst>(I)) {
- // Nothing to analyse.
+ GS.isCompared = true;
} else if (isa<MemTransferInst>(I)) {
const MemTransferInst *MTI = cast<MemTransferInst>(I);
if (MTI->getArgOperand(0) == V)
@@ -308,7 +324,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
if (Init)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
Changed |= CleanupConstantGlobalUsers(CE, SubInit);
- } else if (CE->getOpcode() == Instruction::BitCast &&
+ } else if (CE->getOpcode() == Instruction::BitCast &&
CE->getType()->isPointerTy()) {
// Pointer cast, delete any stores and memsets to the global.
Changed |= CleanupConstantGlobalUsers(CE, 0);
@@ -324,7 +340,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
// and will invalidate our notion of what Init is.
Constant *SubInit = 0;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
- ConstantExpr *CE =
+ ConstantExpr *CE =
dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
@@ -361,7 +377,7 @@ static bool isSafeSROAElementUse(Value *V) {
// We might have a dead and dangling constant hanging off of here.
if (Constant *C = dyn_cast<Constant>(V))
return SafeToDestroyConstant(C);
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
@@ -371,15 +387,15 @@ static bool isSafeSROAElementUse(Value *V) {
// Stores *to* the pointer are ok.
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getOperand(0) != V;
-
+
// Otherwise, it must be a GEP.
GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
if (GEPI == 0) return false;
-
+
if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
!cast<Constant>(GEPI->getOperand(1))->isNullValue())
return false;
-
+
for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
I != E; ++I)
if (!isSafeSROAElementUse(*I))
@@ -393,11 +409,11 @@ static bool isSafeSROAElementUse(Value *V) {
///
static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
// The user of the global must be a GEP Inst or a ConstantExpr GEP.
- if (!isa<GetElementPtrInst>(U) &&
- (!isa<ConstantExpr>(U) ||
+ if (!isa<GetElementPtrInst>(U) &&
+ (!isa<ConstantExpr>(U) ||
cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
return false;
-
+
// Check to see if this ConstantExpr GEP is SRA'able. In particular, we
// don't like < 3 operand CE's, and we don't like non-constant integer
// indices. This enforces that all uses are 'gep GV, 0, C, ...' for some
@@ -409,18 +425,18 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
++GEPI; // Skip over the pointer index.
-
+
// If this is a use of an array allocation, do a bit more checking for sanity.
if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
uint64_t NumElements = AT->getNumElements();
ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
-
+
// Check to make sure that index falls within the array. If not,
// something funny is going on, so we won't do the optimization.
//
if (Idx->getZExtValue() >= NumElements)
return false;
-
+
// We cannot scalar repl this level of the array unless any array
// sub-indices are in-range constants. In particular, consider:
// A[0][i]. We cannot know that the user isn't doing invalid things like
@@ -441,7 +457,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
"Indexed GEP type is not array, vector, or struct!");
continue;
}
-
+
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
return false;
@@ -465,7 +481,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
}
return true;
}
-
+
/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
/// variable. This opens the door for other optimizations by exposing the
@@ -476,7 +492,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
// Make sure this global only has simple uses that we can SRA.
if (!GlobalUsersSafeToSRA(GV))
return 0;
-
+
assert(GV->hasLocalLinkage() && !GV->isConstant());
Constant *Init = GV->getInitializer();
const Type *Ty = Init->getType();
@@ -488,7 +504,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
unsigned StartAlignment = GV->getAlignment();
if (StartAlignment == 0)
StartAlignment = TD.getABITypeAlignment(GV->getType());
-
+
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
NewGlobals.reserve(STy->getNumElements());
const StructLayout &Layout = *TD.getStructLayout(STy);
@@ -503,7 +519,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
GV->getType()->getAddressSpace());
Globals.insert(GV, NGV);
NewGlobals.push_back(NGV);
-
+
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
@@ -522,7 +538,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
if (NumElements > 16 && GV->hasNUsesOrMore(16))
return 0; // It's not worth it.
NewGlobals.reserve(NumElements);
-
+
uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
for (unsigned i = 0, e = NumElements; i != e; ++i) {
@@ -537,7 +553,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
GV->getType()->getAddressSpace());
Globals.insert(GV, NGV);
NewGlobals.push_back(NGV);
-
+
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
@@ -549,7 +565,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
if (NewGlobals.empty())
return 0;
-
+
DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -615,7 +631,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
}
/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
-/// value will trap if the value is dynamically null. PHIs keeps track of any
+/// value will trap if the value is dynamically null. PHIs keeps track of any
/// phi nodes we've seen to avoid reprocessing them.
static bool AllUsesOfValueWillTrapIfNull(const Value *V,
SmallPtrSet<const PHINode*, 8> &PHIs) {
@@ -757,7 +773,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
// Keep track of whether we are able to remove all the uses of the global
// other than the store that defines it.
bool AllNonStoreUsesGone = true;
-
+
// Replace all uses of loads with uses of uses of the stored value.
for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
User *GlobalUser = *GUI++;
@@ -830,7 +846,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
ConstantInt *NElements,
TargetData* TD) {
DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
-
+
const Type *GlobalType;
if (NElements->getZExtValue() == 1)
GlobalType = AllocTy;
@@ -840,14 +856,14 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// Create the new global variable. The contents of the malloc'd memory is
// undefined, so initialize with an undef value.
- GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
+ GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
GlobalType, false,
GlobalValue::InternalLinkage,
UndefValue::get(GlobalType),
GV->getName()+".body",
GV,
GV->isThreadLocal());
-
+
// If there are bitcast users of the malloc (which is typical, usually we have
// a malloc + bitcast) then replace them with uses of the new global. Update
// other users to use the global as well.
@@ -867,10 +883,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
User->replaceUsesOfWith(CI, TheBC);
}
}
-
+
Constant *RepValue = NewGV;
if (NewGV->getType() != GV->getType()->getElementType())
- RepValue = ConstantExpr::getBitCast(RepValue,
+ RepValue = ConstantExpr::getBitCast(RepValue,
GV->getType()->getElementType());
// If there is a comparison against null, we will insert a global bool to
@@ -890,7 +906,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
SI->eraseFromParent();
continue;
}
-
+
LoadInst *LI = cast<LoadInst>(GV->use_back());
while (!LI->use_empty()) {
Use &LoadUse = LI->use_begin().getUse();
@@ -898,7 +914,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
LoadUse = RepValue;
continue;
}
-
+
ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
// Replace the cmp X, 0 with a use of the bool value.
Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
@@ -963,20 +979,20 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
continue; // Fine, ignore.
}
-
+
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
return false; // Storing the pointer itself... bad.
continue; // Otherwise, storing through it, or storing into GV... fine.
}
-
+
// Must index into the array and into the struct.
if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
return false;
continue;
}
-
+
if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
// PHIs are ok if all uses are ok. Don't infinitely recurse through PHI
// cycles.
@@ -985,13 +1001,13 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
return false;
continue;
}
-
+
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
return false;
continue;
}
-
+
return false;
}
return true;
@@ -1000,9 +1016,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
/// somewhere. Transform all uses of the allocation into loads from the
/// global and uses of the resultant pointer. Further, delete the store into
-/// GV. This assumes that these value pass the
+/// GV. This assumes that these value pass the
/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
-static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
GlobalVariable *GV) {
while (!Alloc->use_empty()) {
Instruction *U = cast<Instruction>(*Alloc->use_begin());
@@ -1035,7 +1051,7 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
continue;
}
}
-
+
// Insert a load from the global, and use it instead of the malloc.
Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
U->replaceUsesOfWith(Alloc, NL);
@@ -1053,24 +1069,24 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
++UI) {
const Instruction *User = cast<Instruction>(*UI);
-
+
// Comparison against null is ok.
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
return false;
continue;
}
-
+
// getelementptr is also ok, but only a simple form.
if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
// Must index into the array and into the struct.
if (GEPI->getNumOperands() < 3)
return false;
-
+
// Otherwise the GEP is ok.
continue;
}
-
+
if (const PHINode *PN = dyn_cast<PHINode>(User)) {
if (!LoadUsingPHIsPerLoad.insert(PN))
// This means some phi nodes are dependent on each other.
@@ -1079,19 +1095,19 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
if (!LoadUsingPHIs.insert(PN))
// If we have already analyzed this PHI, then it is safe.
continue;
-
+
// Make sure all uses of the PHI are simple enough to transform.
if (!LoadUsesSimpleEnoughForHeapSRA(PN,
LoadUsingPHIs, LoadUsingPHIsPerLoad))
return false;
-
+
continue;
}
-
+
// Otherwise we don't know what this is, not ok.
return false;
}
-
+
return true;
}
@@ -1110,10 +1126,10 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
return false;
LoadUsingPHIsPerLoad.clear();
}
-
+
// If we reach here, we know that all uses of the loads and transitive uses
// (through PHI nodes) are simple enough to transform. However, we don't know
- // that all inputs the to the PHI nodes are in the same equivalence sets.
+ // that all inputs the to the PHI nodes are in the same equivalence sets.
// Check to verify that all operands of the PHIs are either PHIS that can be
// transformed, loads from GV, or MI itself.
for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
@@ -1121,29 +1137,29 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
const PHINode *PN = *I;
for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
Value *InVal = PN->getIncomingValue(op);
-
+
// PHI of the stored value itself is ok.
if (InVal == StoredVal) continue;
-
+
if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
// One of the PHIs in our set is (optimistically) ok.
if (LoadUsingPHIs.count(InPN))
continue;
return false;
}
-
+
// Load from GV is ok.
if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
if (LI->getOperand(0) == GV)
continue;
-
+
// UNDEF? NULL?
-
+
// Anything else is rejected.
return false;
}
}
-
+
return true;
}
@@ -1151,15 +1167,15 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
-
+
if (FieldNo >= FieldVals.size())
FieldVals.resize(FieldNo+1);
-
+
// If we already have this value, just reuse the previously scalarized
// version.
if (Value *FieldVal = FieldVals[FieldNo])
return FieldVal;
-
+
// Depending on what instruction this is, we have several cases.
Value *Result;
if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
@@ -1172,9 +1188,9 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
- const StructType *ST =
+ const StructType *ST =
cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
-
+
Result =
PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
PN->getName()+".f"+Twine(FieldNo), PN);
@@ -1183,13 +1199,13 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
llvm_unreachable("Unknown usable value");
Result = 0;
}
-
+
return FieldVals[FieldNo] = Result;
}
/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
/// the load, rewrite the derived value to use the HeapSRoA'd load.
-static void RewriteHeapSROALoadUser(Instruction *LoadUser,
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
// If this is a comparison against null, handle it.
@@ -1199,30 +1215,30 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
// field.
Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
InsertedScalarizedValues, PHIsToRewrite);
-
+
Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
- Constant::getNullValue(NPtr->getType()),
+ Constant::getNullValue(NPtr->getType()),
SCI->getName());
SCI->replaceAllUsesWith(New);
SCI->eraseFromParent();
return;
}
-
+
// Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
&& "Unexpected GEPI!");
-
+
// Load the pointer for this field.
unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
InsertedScalarizedValues, PHIsToRewrite);
-
+
// Create the new GEP idx vector.
SmallVector<Value*, 8> GEPIdx;
GEPIdx.push_back(GEPI->getOperand(1));
GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
-
+
Value *NGEPI = GetElementPtrInst::Create(NewPtr,
GEPIdx.begin(), GEPIdx.end(),
GEPI->getName(), GEPI);
@@ -1243,7 +1259,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
tie(InsertPos, Inserted) =
InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
if (!Inserted) return;
-
+
// If this is the first time we've seen this PHI, recursively process all
// users.
for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
@@ -1256,7 +1272,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
/// is a value loaded from the global. Eliminate all uses of Ptr, making them
/// use FieldGlobals instead. All uses of loaded values satisfy
/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
-static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
@@ -1264,7 +1280,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
Instruction *User = cast<Instruction>(*UI++);
RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
}
-
+
if (Load->use_empty()) {
Load->eraseFromParent();
InsertedScalarizedValues.erase(Load);
@@ -1289,11 +1305,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// new mallocs at the same place as CI, and N globals.
std::vector<Value*> FieldGlobals;
std::vector<Value*> FieldMallocs;
-
+
for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
const Type *FieldTy = STy->getElementType(FieldNo);
const PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
-
+
GlobalVariable *NGV =
new GlobalVariable(*GV->getParent(),
PFieldTy, false, GlobalValue::InternalLinkage,
@@ -1301,7 +1317,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
GV->getName() + ".f" + Twine(FieldNo), GV,
GV->isThreadLocal());
FieldGlobals.push_back(NGV);
-
+
unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
if (const StructType *ST = dyn_cast<StructType>(FieldTy))
TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
@@ -1313,7 +1329,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
FieldMallocs.push_back(NMI);
new StoreInst(NMI, NGV, CI);
}
-
+
// The tricky aspect of this transformation is handling the case when malloc
// fails. In the original code, malloc failing would set the result pointer
// of malloc to null. In this case, some mallocs could succeed and others
@@ -1340,23 +1356,23 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Split the basic block at the old malloc.
BasicBlock *OrigBB = CI->getParent();
BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
-
+
// Create the block to check the first condition. Put all these blocks at the
// end of the function as they are unlikely to be executed.
BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
"malloc_ret_null",
OrigBB->getParent());
-
+
// Remove the uncond branch from OrigBB to ContBB, turning it into a cond
// branch on RunningOr.
OrigBB->getTerminator()->eraseFromParent();
BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
-
+
// Within the NullPtrBlock, we need to emit a comparison and branch for each
// pointer, because some may be null while others are not.
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
- Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
+ Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
Constant::getNullValue(GVVal->getType()),
"tmp");
BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
@@ -1371,10 +1387,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
FreeBlock);
BranchInst::Create(NextBlock, FreeBlock);
-
+
NullPtrBlock = NextBlock;
}
-
+
BranchInst::Create(ContBB, NullPtrBlock);
// CI is no longer needed, remove it.
@@ -1385,25 +1401,25 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
/// inserted for a given load.
DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
InsertedScalarizedValues[GV] = FieldGlobals;
-
+
std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
-
+
// Okay, the malloc site is completely handled. All of the uses of GV are now
// loads, and all uses of those loads are simple. Rewrite them to use loads
// of the per-field globals instead.
for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
-
+
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
continue;
}
-
+
// Must be a store of null.
StoreInst *SI = cast<StoreInst>(User);
assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
"Unexpected heap-sra user!");
-
+
// Insert a store of null into each global.
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
@@ -1430,7 +1446,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
}
}
-
+
// Drop all inter-phi links and any loads that made it this far.
for (DenseMap<Value*, std::vector<Value*> >::iterator
I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
@@ -1440,7 +1456,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
LI->dropAllReferences();
}
-
+
// Delete all the phis and loads now that inter-references are dead.
for (DenseMap<Value*, std::vector<Value*> >::iterator
I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
@@ -1450,7 +1466,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
LI->eraseFromParent();
}
-
+
// The old global is now dead, remove it.
GV->eraseFromParent();
@@ -1468,7 +1484,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
TargetData *TD) {
if (!TD)
return false;
-
+
// If this is a malloc of an abstract type, don't touch it.
if (!AllocTy->isSized())
return false;
@@ -1508,7 +1524,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
return true;
}
-
+
// If the allocation is an array of structures, consider transforming this
// into multiple malloc'd arrays, one for each field. This is basically
// SRoA for malloc'd memory.
@@ -1544,13 +1560,13 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CI = dyn_cast<BitCastInst>(Malloc) ?
extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
}
-
+
GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
return true;
}
-
+
return false;
-}
+}
// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
// that only one value (besides its initializer) is ever stored to the global.
@@ -1568,7 +1584,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
GV->getInitializer()->isNullValue()) {
if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
if (GV->getInitializer()->getType() != SOVC->getType())
- SOVC =
+ SOVC =
ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
// Optimize away any trapping uses of the loaded value.
@@ -1576,7 +1592,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
const Type* MallocType = getMallocAllocatedType(CI);
- if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+ if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
GVI, TD))
return true;
}
@@ -1591,7 +1607,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
/// whenever it is used. This exposes the values to other scalar optimizations.
static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
const Type *GVElType = GV->getType()->getElementType();
-
+
// If GVElType is already i1, it is already shrunk. If the type of the GV is
// an FP value, pointer or vector, don't do this optimization because a select
// between them is very expensive and unlikely to lead to later
@@ -1611,11 +1627,11 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
}
DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV);
-
+
// Create the new global, initializing it to false.
GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
false,
- GlobalValue::InternalLinkage,
+ GlobalValue::InternalLinkage,
ConstantInt::getFalse(GV->getContext()),
GV->getName()+".b",
GV->isThreadLocal());
@@ -1684,10 +1700,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
/// ProcessInternalGlobal - Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
-bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
- Module::global_iterator &GVI) {
- SmallPtrSet<const PHINode*, 16> PHIUsers;
- GlobalStatus GS;
+bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
+ Module::global_iterator &GVI) {
+ if (!GV->hasLocalLinkage())
+ return false;
+
+ // Do more involved optimizations if the global is internal.
GV->removeDeadConstantUsers();
if (GV->use_empty()) {
@@ -1697,140 +1715,139 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
return true;
}
- if (!AnalyzeGlobal(GV, GS, PHIUsers)) {
-#if 0
- DEBUG(dbgs() << "Global: " << *GV);
- DEBUG(dbgs() << " isLoaded = " << GS.isLoaded << "\n");
- DEBUG(dbgs() << " StoredType = ");
- switch (GS.StoredType) {
- case GlobalStatus::NotStored: DEBUG(dbgs() << "NEVER STORED\n"); break;
- case GlobalStatus::isInitializerStored: DEBUG(dbgs() << "INIT STORED\n");
- break;
- case GlobalStatus::isStoredOnce: DEBUG(dbgs() << "STORED ONCE\n"); break;
- case GlobalStatus::isStored: DEBUG(dbgs() << "stored\n"); break;
- }
- if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue)
- DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n");
- if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions)
- DEBUG(dbgs() << " AccessingFunction = "
- << GS.AccessingFunction->getName() << "\n");
- DEBUG(dbgs() << " HasMultipleAccessingFunctions = "
- << GS.HasMultipleAccessingFunctions << "\n");
- DEBUG(dbgs() << " HasNonInstructionUser = "
- << GS.HasNonInstructionUser<<"\n");
- DEBUG(dbgs() << "\n");
-#endif
-
- // If this is a first class global and has only one accessing function
- // and this function is main (which we know is not recursive we can make
- // this global a local variable) we replace the global with a local alloca
- // in this function.
- //
- // NOTE: It doesn't make sense to promote non single-value types since we
- // are just replacing static memory to stack memory.
- //
- // If the global is in different address space, don't bring it to stack.
- if (!GS.HasMultipleAccessingFunctions &&
- GS.AccessingFunction && !GS.HasNonInstructionUser &&
- GV->getType()->getElementType()->isSingleValueType() &&
- GS.AccessingFunction->getName() == "main" &&
- GS.AccessingFunction->hasExternalLinkage() &&
- GV->getType()->getAddressSpace() == 0) {
- DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
- Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
- ->getEntryBlock().begin());
- const Type* ElemTy = GV->getType()->getElementType();
- // FIXME: Pass Global's alignment when globals have alignment
- AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
- if (!isa<UndefValue>(GV->getInitializer()))
- new StoreInst(GV->getInitializer(), Alloca, &FirstI);
-
- GV->replaceAllUsesWith(Alloca);
+ SmallPtrSet<const PHINode*, 16> PHIUsers;
+ GlobalStatus GS;
+
+ if (AnalyzeGlobal(GV, GS, PHIUsers))
+ return false;
+
+ if (!GS.isCompared && !GV->hasUnnamedAddr()) {
+ GV->setUnnamedAddr(true);
+ NumUnnamed++;
+ }
+
+ if (GV->isConstant() || !GV->hasInitializer())
+ return false;
+
+ return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
+}
+
+/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// it if possible. If we make a change, return true.
+bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
+ Module::global_iterator &GVI,
+ const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+ const GlobalStatus &GS) {
+ // If this is a first class global and has only one accessing function
+ // and this function is main (which we know is not recursive we can make
+ // this global a local variable) we replace the global with a local alloca
+ // in this function.
+ //
+ // NOTE: It doesn't make sense to promote non single-value types since we
+ // are just replacing static memory to stack memory.
+ //
+ // If the global is in different address space, don't bring it to stack.
+ if (!GS.HasMultipleAccessingFunctions &&
+ GS.AccessingFunction && !GS.HasNonInstructionUser &&
+ GV->getType()->getElementType()->isSingleValueType() &&
+ GS.AccessingFunction->getName() == "main" &&
+ GS.AccessingFunction->hasExternalLinkage() &&
+ GV->getType()->getAddressSpace() == 0) {
+ DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+ Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+ ->getEntryBlock().begin());
+ const Type* ElemTy = GV->getType()->getElementType();
+ // FIXME: Pass Global's alignment when globals have alignment
+ AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+ if (!isa<UndefValue>(GV->getInitializer()))
+ new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+
+ GV->replaceAllUsesWith(Alloca);
+ GV->eraseFromParent();
+ ++NumLocalized;
+ return true;
+ }
+
+ // If the global is never loaded (but may be stored to), it is dead.
+ // Delete it now.
+ if (!GS.isLoaded) {
+ DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
+
+ // Delete any stores we can find to the global. We may not be able to
+ // make it completely dead though.
+ bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ // If the global is dead now, delete it.
+ if (GV->use_empty()) {
GV->eraseFromParent();
- ++NumLocalized;
- return true;
+ ++NumDeleted;
+ Changed = true;
}
-
- // If the global is never loaded (but may be stored to), it is dead.
- // Delete it now.
- if (!GS.isLoaded) {
- DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
-
- // Delete any stores we can find to the global. We may not be able to
- // make it completely dead though.
- bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
-
- // If the global is dead now, delete it.
- if (GV->use_empty()) {
- GV->eraseFromParent();
- ++NumDeleted;
- Changed = true;
- }
- return Changed;
+ return Changed;
- } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
- DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
- GV->setConstant(true);
+ } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+ DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
+ GV->setConstant(true);
- // Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer());
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
- // If the global is dead now, just nuke it.
- if (GV->use_empty()) {
- DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
- << "all users and delete global!\n");
- GV->eraseFromParent();
- ++NumDeleted;
+ // If the global is dead now, just nuke it.
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
+ << "all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
+ }
+
+ ++NumMarked;
+ return true;
+ } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+ if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
+ GVI = FirstNewGV; // Don't skip the newly produced globals!
+ return true;
+ }
+ } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+ // If the initial value for the global was an undef value, and if only
+ // one other value was stored into it, we can just change the
+ // initializer to be the stored value, then delete all stores to the
+ // global. This allows us to mark it constant.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (isa<UndefValue>(GV->getInitializer())) {
+ // Change the initial value here.
+ GV->setInitializer(SOVConstant);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+ if (GV->use_empty()) {
+ DEBUG(dbgs() << " *** Substituting initializer allowed us to "
+ << "simplify all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
+ } else {
+ GVI = GV;
+ }
+ ++NumSubstitute;
+ return true;
}
- ++NumMarked;
+ // Try to optimize globals based on the knowledge that only one value
+ // (besides its initializer) is ever stored to the global.
+ if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
+ getAnalysisIfAvailable<TargetData>()))
return true;
- } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
- if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
- if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
- GVI = FirstNewGV; // Don't skip the newly produced globals!
- return true;
- }
- } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
- // If the initial value for the global was an undef value, and if only
- // one other value was stored into it, we can just change the
- // initializer to be the stored value, then delete all stores to the
- // global. This allows us to mark it constant.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
- if (isa<UndefValue>(GV->getInitializer())) {
- // Change the initial value here.
- GV->setInitializer(SOVConstant);
-
- // Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer());
-
- if (GV->use_empty()) {
- DEBUG(dbgs() << " *** Substituting initializer allowed us to "
- << "simplify all users and delete global!\n");
- GV->eraseFromParent();
- ++NumDeleted;
- } else {
- GVI = GV;
- }
- ++NumSubstitute;
- return true;
- }
- // Try to optimize globals based on the knowledge that only one value
- // (besides its initializer) is ever stored to the global.
- if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
- getAnalysisIfAvailable<TargetData>()))
+ // Otherwise, if the global was not a boolean, we can shrink it to be a
+ // boolean.
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
return true;
-
- // Otherwise, if the global was not a boolean, we can shrink it to be a
- // boolean.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
- if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
- ++NumShrunkToBool;
- return true;
- }
- }
+ }
}
+
return false;
}
@@ -1917,10 +1934,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
if (New && New != CE)
GV->setInitializer(New);
}
- // Do more involved optimizations if the global is internal.
- if (!GV->isConstant() && GV->hasLocalLinkage() &&
- GV->hasInitializer())
- Changed |= ProcessInternalGlobal(GV, GVI);
+
+ Changed |= ProcessGlobal(GV, GVI);
}
return Changed;
}
@@ -1928,46 +1943,47 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all
/// initializers have an init priority of 65535.
GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- if (I->getName() == "llvm.global_ctors") {
- // Found it, verify it's an array of { int, void()* }.
- const ArrayType *ATy =dyn_cast<ArrayType>(I->getType()->getElementType());
- if (!ATy) return 0;
- const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
- if (!STy || STy->getNumElements() != 2 ||
- !STy->getElementType(0)->isIntegerTy(32)) return 0;
- const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
- if (!PFTy) return 0;
- const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
- if (!FTy || !FTy->getReturnType()->isVoidTy() ||
- FTy->isVarArg() || FTy->getNumParams() != 0)
- return 0;
-
- // Verify that the initializer is simple enough for us to handle.
- if (!I->hasDefinitiveInitializer()) return 0;
- ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());
- if (!CA) return 0;
- for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) {
- if (isa<ConstantPointerNull>(CS->getOperand(1)))
- continue;
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (GV == 0) return 0;
+
+ // Found it, verify it's an array of { int, void()* }.
+ const ArrayType *ATy =dyn_cast<ArrayType>(GV->getType()->getElementType());
+ if (!ATy) return 0;
+ const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+ if (!STy || STy->getNumElements() != 2 ||
+ !STy->getElementType(0)->isIntegerTy(32)) return 0;
+ const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
+ if (!PFTy) return 0;
+ const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
+ if (!FTy || !FTy->getReturnType()->isVoidTy() ||
+ FTy->isVarArg() || FTy->getNumParams() != 0)
+ return 0;
- // Must have a function or null ptr.
- if (!isa<Function>(CS->getOperand(1)))
- return 0;
-
- // Init priority must be standard.
- ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
- if (!CI || CI->getZExtValue() != 65535)
- return 0;
- } else {
- return 0;
- }
-
- return I;
- }
- return 0;
+ // Verify that the initializer is simple enough for us to handle. We are
+ // only allowed to optimize the initializer if it is unique.
+ if (!GV->hasUniqueInitializer()) return 0;
+
+ ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!CA) return 0;
+
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ ConstantStruct *CS = dyn_cast<ConstantStruct>(*i);
+ if (CS == 0) return 0;
+
+ if (isa<ConstantPointerNull>(CS->getOperand(1)))
+ continue;
+
+ // Must have a function or null ptr.
+ if (!isa<Function>(CS->getOperand(1)))
+ return 0;
+
+ // Init priority must be standard.
+ ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
+ if (!CI || CI->getZExtValue() != 65535)
+ return 0;
+ }
+
+ return GV;
}
/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
@@ -1985,13 +2001,13 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
/// specified array, returning the new global to use.
-static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
+static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
const std::vector<Function*> &Ctors) {
// If we made a change, reassemble the initializer list.
std::vector<Constant*> CSVals;
CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
CSVals.push_back(0);
-
+
// Create the new init list.
std::vector<Constant*> CAList;
for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
@@ -2007,26 +2023,26 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
}
CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
}
-
+
// Create the array initializer.
const Type *StructTy =
cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
- Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
+ Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
CAList.size()), CAList);
-
+
// If we didn't change the number of elements, don't create a new GV.
if (CA->getType() == GCL->getInitializer()->getType()) {
GCL->setInitializer(CA);
return GCL;
}
-
+
// Create the new global and insert it next to the existing list.
GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
GCL->getLinkage(), CA, "",
GCL->isThreadLocal());
GCL->getParent()->getGlobalList().insert(GCL, NGV);
NGV->takeName(GCL);
-
+
// Nuke the old list, replacing any uses with the new one.
if (!GCL->use_empty()) {
Constant *V = NGV;
@@ -2035,7 +2051,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
GCL->replaceAllUsesWith(V);
}
GCL->eraseFromParent();
-
+
if (Ctors.size())
return NGV;
else
@@ -2043,17 +2059,86 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
}
-static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues,
- Value *V) {
+static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, Value *V) {
if (Constant *CV = dyn_cast<Constant>(V)) return CV;
Constant *R = ComputedValues[V];
assert(R && "Reference to an uncomputed value!");
return R;
}
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSet<Constant*, 8> &SimpleConstants);
+
+
+/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
+/// handled by the code generator. We don't want to generate something like:
+/// void *X = &X/42;
+/// because the code generator doesn't have a relocation that can handle that.
+///
+/// This function should be called if C was not found (but just got inserted)
+/// in SimpleConstants to avoid having to rescan the same constants all the
+/// time.
+static bool isSimpleEnoughValueToCommitHelper(Constant *C,
+ SmallPtrSet<Constant*, 8> &SimpleConstants) {
+ // Simple integer, undef, constant aggregate zero, global addresses, etc are
+ // all supported.
+ if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
+ isa<GlobalValue>(C))
+ return true;
+
+ // Aggregate values are safe if all their elements are.
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+ isa<ConstantVector>(C)) {
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+ Constant *Op = cast<Constant>(C->getOperand(i));
+ if (!isSimpleEnoughValueToCommit(Op, SimpleConstants))
+ return false;
+ }
+ return true;
+ }
+
+ // We don't know exactly what relocations are allowed in constant expressions,
+ // so we allow &global+constantoffset, which is safe and uniformly supported
+ // across targets.
+ ConstantExpr *CE = cast<ConstantExpr>(C);
+ switch (CE->getOpcode()) {
+ case Instruction::BitCast:
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // These casts are always fine if the casted value is.
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+
+ // GEP is fine if it is simple + constant offset.
+ case Instruction::GetElementPtr:
+ for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+ if (!isa<ConstantInt>(CE->getOperand(i)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+
+ case Instruction::Add:
+ // We allow simple+cst.
+ if (!isa<ConstantInt>(CE->getOperand(1)))
+ return false;
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+ }
+ return false;
+}
+
+static inline bool
+isSimpleEnoughValueToCommit(Constant *C,
+ SmallPtrSet<Constant*, 8> &SimpleConstants) {
+ // If we already checked this constant, we win.
+ if (!SimpleConstants.insert(C)) return true;
+ // Check the constant.
+ return isSimpleEnoughValueToCommitHelper(C, SimpleConstants);
+}
+
+
/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
-/// enough for us to understand. In particular, if it is a cast of something,
-/// we punt. We basically just support direct accesses to globals and GEP's of
+/// enough for us to understand. In particular, if it is a cast to anything
+/// other than from one pointer type to another pointer type, we punt.
+/// We basically just support direct accesses to globals and GEP's of
/// globals. This should be kept up to date with CommitValueTo.
static bool isSimpleEnoughPointerToCommit(Constant *C) {
// Conservatively, avoid aggregate types. This is because we don't
@@ -2062,19 +2147,19 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
- // Do not allow weak/linkonce/dllimport/dllexport linkage or
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
// external globals.
- return GV->hasDefinitiveInitializer();
+ return GV->hasUniqueInitializer();
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
// Handle a constantexpr gep.
if (CE->getOpcode() == Instruction::GetElementPtr &&
isa<GlobalVariable>(CE->getOperand(0)) &&
cast<GEPOperator>(CE)->isInBounds()) {
GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
- // Do not allow weak/linkonce/dllimport/dllexport linkage or
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
// external globals.
- if (!GV->hasDefinitiveInitializer())
+ if (!GV->hasUniqueInitializer())
return false;
// The first index must be zero.
@@ -2087,7 +2172,18 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+
+ // A constantexpr bitcast from a pointer to another pointer is a no-op,
+ // and we know how to evaluate it by moving the bitcast from the pointer
+ // operand to the value operand.
+ } else if (CE->getOpcode() == Instruction::BitCast &&
+ isa<GlobalVariable>(CE->getOperand(0))) {
+ // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+ // external globals.
+ return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
}
+ }
+
return false;
}
@@ -2101,7 +2197,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
assert(Val->getType() == Init->getType() && "Type mismatch!");
return Val;
}
-
+
std::vector<Constant*> Elts;
if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
@@ -2119,13 +2215,13 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
llvm_unreachable("This code is out of sync with "
" ConstantFoldLoadThroughGEPConstantExpr");
}
-
+
// Replace the element that we are supposed to.
ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
unsigned Idx = CU->getZExtValue();
assert(Idx < STy->getNumElements() && "Struct index out of range!");
Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
-
+
// Return the modified struct.
return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
STy->isPacked());
@@ -2138,8 +2234,8 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
NumElts = ATy->getNumElements();
else
NumElts = cast<VectorType>(InitTy)->getNumElements();
-
-
+
+
// Break up the array into elements.
if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
@@ -2154,16 +2250,15 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
" ConstantFoldLoadThroughGEPConstantExpr");
Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
}
-
+
assert(CI->getZExtValue() < NumElts);
Elts[CI->getZExtValue()] =
EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
-
+
if (Init->getType()->isArrayTy())
return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
- else
- return ConstantVector::get(&Elts[0], Elts.size());
- }
+ return ConstantVector::get(Elts);
+ }
}
/// CommitValueTo - We have decided that Addr (which satisfies the predicate
@@ -2189,14 +2284,14 @@ static Constant *ComputeLoadResult(Constant *P,
// is the most up-to-date.
DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
if (I != Memory.end()) return I->second;
-
+
// Access it.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
if (GV->hasDefinitiveInitializer())
return GV->getInitializer();
return 0;
}
-
+
// Handle a constantexpr getelementptr.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
if (CE->getOpcode() == Instruction::GetElementPtr &&
@@ -2216,17 +2311,19 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
const SmallVectorImpl<Constant*> &ActualArgs,
std::vector<Function*> &CallStack,
DenseMap<Constant*, Constant*> &MutatedMemory,
- std::vector<GlobalVariable*> &AllocaTmps) {
+ std::vector<GlobalVariable*> &AllocaTmps,
+ SmallPtrSet<Constant*, 8> &SimpleConstants,
+ const TargetData *TD) {
// Check to see if this function is already executing (recursion). If so,
// bail out. TODO: we might want to accept limited recursion.
if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
return false;
-
+
CallStack.push_back(F);
-
+
/// Values - As we compute SSA register values, we store their contents here.
DenseMap<Value*, Constant*> Values;
-
+
// Initialize arguments to the incoming values specified.
unsigned ArgNo = 0;
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
@@ -2237,21 +2334,65 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
/// we can only evaluate any one basic block at most once. This set keeps
/// track of what we have executed so we can detect recursive cases etc.
SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
-
+
// CurInst - The current instruction we're evaluating.
BasicBlock::iterator CurInst = F->begin()->begin();
-
+
// This is the main evaluation loop.
while (1) {
Constant *InstResult = 0;
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
if (SI->isVolatile()) return false; // no volatile accesses.
Constant *Ptr = getVal(Values, SI->getOperand(1));
if (!isSimpleEnoughPointerToCommit(Ptr))
// If this is too complex for us to commit, reject it.
return false;
+
Constant *Val = getVal(Values, SI->getOperand(0));
+
+ // If this might be too difficult for the backend to handle (e.g. the addr
+ // of one global variable divided by another) then we can't commit it.
+ if (!isSimpleEnoughValueToCommit(Val, SimpleConstants))
+ return false;
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (CE->getOpcode() == Instruction::BitCast) {
+ // If we're evaluating a store through a bitcast, then we need
+ // to pull the bitcast off the pointer type and push it onto the
+ // stored value.
+ Ptr = CE->getOperand(0);
+
+ const Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType();
+
+ // In order to push the bitcast onto the stored value, a bitcast
+ // from NewTy to Val's type must be legal. If it's not, we can try
+ // introspecting NewTy to find a legal conversion.
+ while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
+ // If NewTy is a struct, we can convert the pointer to the struct
+ // into a pointer to its first member.
+ // FIXME: This could be extended to support arrays as well.
+ if (const StructType *STy = dyn_cast<StructType>(NewTy)) {
+ NewTy = STy->getTypeAtIndex(0U);
+
+ const IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32);
+ Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+ Constant * const IdxList[] = {IdxZero, IdxZero};
+
+ Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList, 2);
+
+ // If we can't improve the situation by introspecting NewTy,
+ // we have to give up.
+ } else {
+ return 0;
+ }
+ }
+
+ // If we found compatible types, go ahead and push the bitcast
+ // onto the stored value.
+ Val = ConstantExpr::getBitCast(Val, NewTy);
+ }
+
MutatedMemory[Ptr] = Val;
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
InstResult = ConstantExpr::get(BO->getOpcode(),
@@ -2290,7 +2431,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
GlobalValue::InternalLinkage,
UndefValue::get(Ty),
AI->getName()));
- InstResult = AllocaTmps.back();
+ InstResult = AllocaTmps.back();
} else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) {
// Debug info can safely be ignored here.
@@ -2324,11 +2465,11 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
} else {
if (Callee->getFunctionType()->isVarArg())
return false;
-
+
Constant *RetVal;
// Execute the call, if successful, use the return value.
if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
- MutatedMemory, AllocaTmps))
+ MutatedMemory, AllocaTmps, SimpleConstants, TD))
return false;
InstResult = RetVal;
}
@@ -2342,7 +2483,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
dyn_cast<ConstantInt>(getVal(Values, BI->getCondition()));
if (!Cond) return false; // Cannot determine.
- NewBB = BI->getSuccessor(!Cond->getZExtValue());
+ NewBB = BI->getSuccessor(!Cond->getZExtValue());
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
ConstantInt *Val =
@@ -2358,20 +2499,20 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
if (RI->getNumOperands())
RetVal = getVal(Values, RI->getOperand(0));
-
+
CallStack.pop_back(); // return from fn.
return true; // We succeeded at evaluating this ctor!
} else {
// invoke, unwind, unreachable.
return false; // Cannot handle this terminator.
}
-
+
// Okay, we succeeded in evaluating this control flow. See if we have
// executed the new block before. If so, we have a looping function,
// which we cannot evaluate in reasonable time.
if (!ExecutedBlocks.insert(NewBB))
return false; // looped!
-
+
// Okay, we have never been in this block before. Check to see if there
// are any PHI nodes. If so, evaluate them with information about where
// we came from.
@@ -2387,10 +2528,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
// Did not know how to evaluate this!
return false;
}
-
- if (!CurInst->use_empty())
+
+ if (!CurInst->use_empty()) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
+ InstResult = ConstantFoldConstantExpression(CE, TD);
+
Values[CurInst] = InstResult;
-
+ }
+
// Advance program counter.
++CurInst;
}
@@ -2398,7 +2543,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
/// we can. Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F) {
+static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
/// MutatedMemory - For each store we execute, we update this map. Loads
/// check this to get the most up-to-date value. If evaluation is successful,
/// this state is committed to the process.
@@ -2408,17 +2553,23 @@ static bool EvaluateStaticConstructor(Function *F) {
/// to represent its body. This vector is needed so we can delete the
/// temporary globals when we are done.
std::vector<GlobalVariable*> AllocaTmps;
-
+
/// CallStack - This is used to detect recursion. In pathological situations
/// we could hit exponential behavior, but at least there is nothing
/// unbounded.
std::vector<Function*> CallStack;
+ /// SimpleConstants - These are constants we have checked and know to be
+ /// simple enough to live in a static initializer of a global.
+ SmallPtrSet<Constant*, 8> SimpleConstants;
+
// Call the function.
Constant *RetValDummy;
bool EvalSuccess = EvaluateFunction(F, RetValDummy,
SmallVector<Constant*, 0>(), CallStack,
- MutatedMemory, AllocaTmps);
+ MutatedMemory, AllocaTmps,
+ SimpleConstants, TD);
+
if (EvalSuccess) {
// We succeeded at evaluation: commit the result.
DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
@@ -2428,13 +2579,13 @@ static bool EvaluateStaticConstructor(Function *F) {
E = MutatedMemory.end(); I != E; ++I)
CommitValueTo(I->second, I->first);
}
-
+
// At this point, we are done interpreting. If we created any 'alloca'
// temporaries, release them now.
while (!AllocaTmps.empty()) {
GlobalVariable *Tmp = AllocaTmps.back();
AllocaTmps.pop_back();
-
+
// If there are still users of the alloca, the program is doing something
// silly, e.g. storing the address of the alloca somewhere and using it
// later. Since this is undefined, we'll just make it be null.
@@ -2442,7 +2593,7 @@ static bool EvaluateStaticConstructor(Function *F) {
Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
delete Tmp;
}
-
+
return EvalSuccess;
}
@@ -2454,7 +2605,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
bool MadeChange = false;
if (Ctors.empty()) return false;
-
+
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
// Loop over global ctors, optimizing them when we can.
for (unsigned i = 0; i != Ctors.size(); ++i) {
Function *F = Ctors[i];
@@ -2467,12 +2619,12 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
}
break;
}
-
+
// We cannot simplify external ctor functions.
if (F->empty()) continue;
-
+
// If we can evaluate the ctor at compile time, do.
- if (EvaluateStaticConstructor(F)) {
+ if (EvaluateStaticConstructor(F, TD)) {
Ctors.erase(Ctors.begin()+i);
MadeChange = true;
--i;
@@ -2480,9 +2632,9 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
continue;
}
}
-
+
if (!MadeChange) return false;
-
+
GCL = InstallGlobalCtors(GCL, Ctors);
return true;
}
@@ -2546,21 +2698,21 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
bool GlobalOpt::runOnModule(Module &M) {
bool Changed = false;
-
+
// Try to find the llvm.globalctors list.
GlobalVariable *GlobalCtors = FindGlobalCtors(M);
bool LocalChange = true;
while (LocalChange) {
LocalChange = false;
-
+
// Delete functions that are trivially dead, ccc -> fastcc
LocalChange |= OptimizeFunctions(M);
-
+
// Optimize global_ctors list.
if (GlobalCtors)
LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
-
+
// Optimize non-address-taken globals.
LocalChange |= OptimizeGlobalVars(M);
@@ -2568,9 +2720,9 @@ bool GlobalOpt::runOnModule(Module &M) {
LocalChange |= OptimizeGlobalAliases(M);
Changed |= LocalChange;
}
-
+
// TODO: Move all global ctors functions to the end of the module for code
// layout.
-
+
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
index 1b3cf78..c7c2939 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -35,7 +35,9 @@ namespace {
///
struct IPCP : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- IPCP() : ModulePass(ID) {}
+ IPCP() : ModulePass(ID) {
+ initializeIPCPPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
private:
@@ -46,7 +48,7 @@ namespace {
char IPCP::ID = 0;
INITIALIZE_PASS(IPCP, "ipconstprop",
- "Interprocedural constant propagation", false, false);
+ "Interprocedural constant propagation", false, false)
ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index 340b70e..fbe90ce 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -7,17 +7,51 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the C bindings for libLLVMIPO.a, which implements
-// several transformations over the LLVM intermediate representation.
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMIPO.a, which implements several transformations over the LLVM
+// intermediate representation.
//
//===----------------------------------------------------------------------===//
#include "llvm-c/Transforms/IPO.h"
+#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
+void llvm::initializeIPO(PassRegistry &Registry) {
+ initializeArgPromotionPass(Registry);
+ initializeConstantMergePass(Registry);
+ initializeDAEPass(Registry);
+ initializeDAHPass(Registry);
+ initializeDTEPass(Registry);
+ initializeFunctionAttrsPass(Registry);
+ initializeGlobalDCEPass(Registry);
+ initializeGlobalOptPass(Registry);
+ initializeIPCPPass(Registry);
+ initializeAlwaysInlinerPass(Registry);
+ initializeSimpleInlinerPass(Registry);
+ initializeInternalizePassPass(Registry);
+ initializeLoopExtractorPass(Registry);
+ initializeBlockExtractorPassPass(Registry);
+ initializeSingleLoopExtractorPass(Registry);
+ initializeLowerSetJmpPass(Registry);
+ initializeMergeFunctionsPass(Registry);
+ initializePartialInlinerPass(Registry);
+ initializePruneEHPass(Registry);
+ initializeStripDeadPrototypesPassPass(Registry);
+ initializeStripSymbolsPass(Registry);
+ initializeStripDebugDeclarePass(Registry);
+ initializeStripDeadDebugInfoPass(Registry);
+ initializeStripNonDebugSymbolsPass(Registry);
+ initializeSRETPromotionPass(Registry);
+}
+
+void LLVMInitializeIPO(LLVMPassRegistryRef R) {
+ initializeIPO(*unwrap(R));
+}
+
void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createArgumentPromotionPass());
}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
index ecc60ad..ce795b7 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
@@ -36,7 +36,9 @@ namespace {
InlineCostAnalyzer CA;
public:
// Use extremely low threshold.
- AlwaysInliner() : Inliner(ID, -2000000000) {}
+ AlwaysInliner() : Inliner(ID, -2000000000) {
+ initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+ }
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
return CA.getInlineCost(CS, NeverInline);
@@ -61,8 +63,11 @@ namespace {
}
char AlwaysInliner::ID = 0;
-INITIALIZE_PASS(AlwaysInliner, "always-inline",
- "Inliner for always_inline functions", false, false);
+INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false)
Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
index 9c6637d..0c5b3be 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -33,8 +33,12 @@ namespace {
SmallPtrSet<const Function*, 16> NeverInline;
InlineCostAnalyzer CA;
public:
- SimpleInliner() : Inliner(ID) {}
- SimpleInliner(int Threshold) : Inliner(ID, Threshold) {}
+ SimpleInliner() : Inliner(ID) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
+ SimpleInliner(int Threshold) : Inliner(ID, Threshold) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
return CA.getInlineCost(CS, NeverInline);
@@ -56,8 +60,11 @@ namespace {
}
char SimpleInliner::ID = 0;
-INITIALIZE_PASS(SimpleInliner, "inline",
- "Function Integration/Inlining", false, false);
+INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false)
Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 4983e8e..37eafd7 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -52,7 +52,8 @@ Inliner::Inliner(char &ID)
: CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
Inliner::Inliner(char &ID, int Threshold)
- : CallGraphSCCPass(ID), InlineThreshold(Threshold) {}
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
+ InlineLimit : Threshold) {}
/// getAnalysisUsage - For this class, we declare that we require and preserve
/// the call graph. If the derived class implements this method, it should
@@ -74,7 +75,8 @@ InlinedArrayAllocasTy;
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
- InlinedArrayAllocasTy &InlinedArrayAllocas) {
+ InlinedArrayAllocasTy &InlinedArrayAllocas,
+ int InlineHistory) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
@@ -91,7 +93,6 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
!Caller->hasFnAttr(Attribute::StackProtectReq))
Caller->addFnAttr(Attribute::StackProtect);
-
// Look at all of the allocas that we inlined through this call site. If we
// have already inlined other allocas through other calls into this function,
// then we know that they have disjoint lifetimes and that we can merge them.
@@ -115,6 +116,21 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
//
SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+ // When processing our SCC, check to see if CS was inlined from some other
+ // call site. For example, if we're processing "A" in this code:
+ // A() { B() }
+ // B() { x = alloca ... C() }
+ // C() { y = alloca ... }
+ // Assume that C was not inlined into B initially, and so we're processing A
+ // and decide to inline B into A. Doing this makes an alloca available for
+ // reuse and makes a callsite (C) available for inlining. When we process
+ // the C call site we don't want to do any alloca merging between X and Y
+ // because their scopes are not disjoint. We could make this smarter by
+ // keeping track of the inline history for each alloca in the
+ // InlinedArrayAllocas but this isn't likely to be a significant win.
+ if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
+ return true;
+
// Loop over all the allocas we have so far and see if they can be merged with
// a previously inlined alloca. If not, remember that we had it.
for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
@@ -152,19 +168,21 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
// Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
// success!
- DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI);
+ DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
+ << *AvailableAlloca << '\n');
AI->replaceAllUsesWith(AvailableAlloca);
AI->eraseFromParent();
MergedAwayAlloca = true;
++NumMergedAllocas;
+ IFI.StaticAllocas[AllocaNo] = 0;
break;
}
// If we already nuked the alloca, we're done with it.
if (MergedAwayAlloca)
continue;
-
+
// If we were unable to merge away the alloca either because there are no
// allocas of the right type available or because we reused them all
// already, remember that this alloca came from an inlined function and mark
@@ -234,20 +252,25 @@ bool Inliner::shouldInline(CallSite CS) {
if (Caller->hasLocalLinkage()) {
int TotalSecondaryCost = 0;
bool outerCallsFound = false;
- bool allOuterCallsWillBeInlined = true;
- bool someOuterCallWouldNotBeInlined = false;
+ // This bool tracks what happens if we do NOT inline C into B.
+ bool callerWillBeRemoved = true;
+ // This bool tracks what happens if we DO inline C into B.
+ bool inliningPreventsSomeOuterInline = false;
for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
I != E; ++I) {
CallSite CS2(*I);
// If this isn't a call to Caller (it could be some other sort
- // of reference) skip it.
- if (!CS2 || CS2.getCalledFunction() != Caller)
+ // of reference) skip it. Such references will prevent the caller
+ // from being removed.
+ if (!CS2 || CS2.getCalledFunction() != Caller) {
+ callerWillBeRemoved = false;
continue;
+ }
InlineCost IC2 = getInlineCost(CS2);
if (IC2.isNever())
- allOuterCallsWillBeInlined = false;
+ callerWillBeRemoved = false;
if (IC2.isAlways() || IC2.isNever())
continue;
@@ -257,14 +280,14 @@ bool Inliner::shouldInline(CallSite CS) {
float FudgeFactor2 = getInlineFudgeFactor(CS2);
if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
- allOuterCallsWillBeInlined = false;
+ callerWillBeRemoved = false;
// See if we have this case. We subtract off the penalty
// for the call instruction, which we would be deleting.
if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
Cost2 + Cost - (InlineConstants::CallPenalty + 1) >=
(int)(CurrentThreshold2 * FudgeFactor2)) {
- someOuterCallWouldNotBeInlined = true;
+ inliningPreventsSomeOuterInline = true;
TotalSecondaryCost += Cost2;
}
}
@@ -272,10 +295,10 @@ bool Inliner::shouldInline(CallSite CS) {
// one is set very low by getInlineCost, in anticipation that Caller will
// be removed entirely. We did not account for this above unless there
// is only one caller of Caller.
- if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
+ if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end())
TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
- if (outerCallsFound && someOuterCallWouldNotBeInlined &&
+ if (outerCallsFound && inliningPreventsSomeOuterInline &&
TotalSecondaryCost < Cost) {
DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
" Cost = " << Cost <<
@@ -401,7 +424,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// If this call site was obtained by inlining another function, verify
// that the include path for the function did not include the callee
- // itself. If so, we'd be recursively inlinling the same function,
+ // itself. If so, we'd be recursively inlining the same function,
// which would provide the same callsites, which would cause us to
// infinitely inline.
int InlineHistoryID = CallSites[CSi].second;
@@ -416,7 +439,8 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
continue;
// Attempt to inline the function.
- if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas))
+ if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+ InlineHistoryID))
continue;
++NumInlined;
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
index a1d919f..9b9ebad 100644
--- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -64,10 +64,11 @@ namespace {
char InternalizePass::ID = 0;
INITIALIZE_PASS(InternalizePass, "internalize",
- "Internalize Global Symbols", false, false);
+ "Internalize Global Symbols", false, false)
InternalizePass::InternalizePass(bool AllButMain)
: ModulePass(ID), AllButMain(AllButMain){
+ initializeInternalizePassPass(*PassRegistry::getPassRegistry());
if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
if (!APIList.empty()) // If a list is specified, use it as well.
@@ -76,6 +77,7 @@ InternalizePass::InternalizePass(bool AllButMain)
InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
: ModulePass(ID), AllButMain(false){
+ initializeInternalizePassPass(*PassRegistry::getPassRegistry());
for(std::vector<const char *>::const_iterator itr = exportList.begin();
itr != exportList.end(); itr++) {
ExternalNames.insert(*itr);
diff --git a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index f88dff6..848944d 100644
--- a/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -37,7 +37,9 @@ namespace {
unsigned NumLoops;
explicit LoopExtractor(unsigned numLoops = ~0)
- : LoopPass(ID), NumLoops(numLoops) {}
+ : LoopPass(ID), NumLoops(numLoops) {
+ initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -50,8 +52,13 @@ namespace {
}
char LoopExtractor::ID = 0;
-INITIALIZE_PASS(LoopExtractor, "loop-extract",
- "Extract loops into new functions", false, false);
+INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false)
namespace {
/// SingleLoopExtractor - For bugpoint.
@@ -63,7 +70,7 @@ namespace {
char SingleLoopExtractor::ID = 0;
INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
- "Extract at most one loop into a new function", false, false);
+ "Extract at most one loop into a new function", false, false)
// createLoopExtractorPass - This pass extracts all natural loops from the
// program into a function if it can.
@@ -159,7 +166,7 @@ namespace {
char BlockExtractorPass::ID = 0;
INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
"Extract Basic Blocks From Module (for bugpoint use)",
- false, false);
+ false, false)
// createBlockExtractorPass - This pass extracts all blocks (except those
// specified in the argument list) from the functions in the module.
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
index 6c715de..b545f0b 100644
--- a/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -109,7 +109,9 @@ namespace {
bool IsTransformableFunction(StringRef Name);
public:
static char ID; // Pass identification, replacement for typeid
- LowerSetJmp() : ModulePass(ID) {}
+ LowerSetJmp() : ModulePass(ID) {
+ initializeLowerSetJmpPass(*PassRegistry::getPassRegistry());
+ }
void visitCallInst(CallInst& CI);
void visitInvokeInst(InvokeInst& II);
@@ -122,7 +124,7 @@ namespace {
} // end anonymous namespace
char LowerSetJmp::ID = 0;
-INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false);
+INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false)
// run - Run the transformation on the program. We grab the function
// prototypes for longjmp and setjmp. If they are used in the program,
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 5d838f9..cccffca 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -67,42 +67,87 @@
using namespace llvm;
STATISTIC(NumFunctionsMerged, "Number of functions merged");
+STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumAliasesWritten, "Number of aliases generated");
+STATISTIC(NumDoubleWeak, "Number of new functions created");
+
+/// Creates a hash-code for the function which is the same for any two
+/// functions that will compare equal, without looking at the instructions
+/// inside the function.
+static unsigned profileFunction(const Function *F) {
+ const FunctionType *FTy = F->getFunctionType();
-namespace {
- /// MergeFunctions finds functions which will generate identical machine code,
- /// by considering all pointer types to be equivalent. Once identified,
- /// MergeFunctions will fold them by replacing a call to one to a call to a
- /// bitcast of the other.
- ///
- class MergeFunctions : public ModulePass {
- public:
- static char ID;
- MergeFunctions() : ModulePass(ID) {}
-
- bool runOnModule(Module &M);
-
- private:
- /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G
- /// may be deleted, or may be converted into a thunk. In either case, it
- /// should never be visited again.
- void MergeTwoFunctions(Function *F, Function *G) const;
-
- /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also
- /// replace direct uses of G with bitcast(F).
- void WriteThunk(Function *F, Function *G) const;
-
- TargetData *TD;
- };
+ FoldingSetNodeID ID;
+ ID.AddInteger(F->size());
+ ID.AddInteger(F->getCallingConv());
+ ID.AddBoolean(F->hasGC());
+ ID.AddBoolean(FTy->isVarArg());
+ ID.AddInteger(FTy->getReturnType()->getTypeID());
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ ID.AddInteger(FTy->getParamType(i)->getTypeID());
+ return ID.ComputeHash();
}
-char MergeFunctions::ID = 0;
-INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false);
+namespace {
+
+/// ComparableFunction - A struct that pairs together functions with a
+/// TargetData so that we can keep them together as elements in the DenseSet.
+class ComparableFunction {
+public:
+ static const ComparableFunction EmptyKey;
+ static const ComparableFunction TombstoneKey;
+ static TargetData * const LookupOnly;
+
+ ComparableFunction(Function *Func, TargetData *TD)
+ : Func(Func), Hash(profileFunction(Func)), TD(TD) {}
+
+ Function *getFunc() const { return Func; }
+ unsigned getHash() const { return Hash; }
+ TargetData *getTD() const { return TD; }
+
+ // Drops AssertingVH reference to the function. Outside of debug mode, this
+ // does nothing.
+ void release() {
+ assert(Func &&
+ "Attempted to release function twice, or release empty/tombstone!");
+ Func = NULL;
+ }
+
+private:
+ explicit ComparableFunction(unsigned Hash)
+ : Func(NULL), Hash(Hash), TD(NULL) {}
+
+ AssertingVH<Function> Func;
+ unsigned Hash;
+ TargetData *TD;
+};
+
+const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
+const ComparableFunction ComparableFunction::TombstoneKey =
+ ComparableFunction(1);
+TargetData * const ComparableFunction::LookupOnly = (TargetData*)(-1);
-ModulePass *llvm::createMergeFunctionsPass() {
- return new MergeFunctions();
+}
+
+namespace llvm {
+ template <>
+ struct DenseMapInfo<ComparableFunction> {
+ static ComparableFunction getEmptyKey() {
+ return ComparableFunction::EmptyKey;
+ }
+ static ComparableFunction getTombstoneKey() {
+ return ComparableFunction::TombstoneKey;
+ }
+ static unsigned getHashValue(const ComparableFunction &CF) {
+ return CF.getHash();
+ }
+ static bool isEqual(const ComparableFunction &LHS,
+ const ComparableFunction &RHS);
+ };
}
namespace {
+
/// FunctionComparator - Compares two functions to determine whether or not
/// they will generate machine code with the same behaviour. TargetData is
/// used if available. The comparator always fails conservatively (erring on the
@@ -111,34 +156,34 @@ class FunctionComparator {
public:
FunctionComparator(const TargetData *TD, const Function *F1,
const Function *F2)
- : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {}
+ : F1(F1), F2(F2), TD(TD) {}
- /// Compare - test whether the two functions have equivalent behaviour.
- bool Compare();
+ /// Test whether the two functions have equivalent behaviour.
+ bool compare();
private:
- /// Compare - test whether two basic blocks have equivalent behaviour.
- bool Compare(const BasicBlock *BB1, const BasicBlock *BB2);
+ /// Test whether two basic blocks have equivalent behaviour.
+ bool compare(const BasicBlock *BB1, const BasicBlock *BB2);
- /// Enumerate - Assign or look up previously assigned numbers for the two
- /// values, and return whether the numbers are equal. Numbers are assigned in
- /// the order visited.
- bool Enumerate(const Value *V1, const Value *V2);
+ /// Assign or look up previously assigned numbers for the two values, and
+ /// return whether the numbers are equal. Numbers are assigned in the order
+ /// visited.
+ bool enumerate(const Value *V1, const Value *V2);
- /// isEquivalentOperation - Compare two Instructions for equivalence, similar
- /// to Instruction::isSameOperationAs but with modifications to the type
+ /// Compare two Instructions for equivalence, similar to
+ /// Instruction::isSameOperationAs but with modifications to the type
/// comparison.
bool isEquivalentOperation(const Instruction *I1,
const Instruction *I2) const;
- /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic.
+ /// Compare two GEPs for equivalent pointer arithmetic.
bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
bool isEquivalentGEP(const GetElementPtrInst *GEP1,
const GetElementPtrInst *GEP2) {
return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
}
- /// isEquivalentType - Compare two Types, treating all pointer types as equal.
+ /// Compare two Types, treating all pointer types as equal.
bool isEquivalentType(const Type *Ty1, const Type *Ty2) const;
// The two functions undergoing comparison.
@@ -146,20 +191,26 @@ private:
const TargetData *TD;
- typedef DenseMap<const Value *, unsigned long> IDMap;
- IDMap Map1, Map2;
- unsigned long IDMap1Count, IDMap2Count;
+ DenseMap<const Value *, const Value *> id_map;
+ DenseSet<const Value *> seen_values;
};
+
}
-/// isEquivalentType - any two pointers in the same address space are
-/// equivalent. Otherwise, standard type equivalence rules apply.
+// Any two pointers in the same address space are equivalent, intptr_t and
+// pointers are equivalent. Otherwise, standard type equivalence rules apply.
bool FunctionComparator::isEquivalentType(const Type *Ty1,
const Type *Ty2) const {
if (Ty1 == Ty2)
return true;
- if (Ty1->getTypeID() != Ty2->getTypeID())
+ if (Ty1->getTypeID() != Ty2->getTypeID()) {
+ if (TD) {
+ LLVMContext &Ctx = Ty1->getContext();
+ if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
+ if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
+ }
return false;
+ }
switch(Ty1->getTypeID()) {
default:
@@ -167,6 +218,7 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
// Fall through in Release mode.
case Type::IntegerTyID:
case Type::OpaqueTyID:
+ case Type::VectorTyID:
// Ty1 == Ty2 would have returned true earlier.
return false;
@@ -225,21 +277,18 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
return ATy1->getNumElements() == ATy2->getNumElements() &&
isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
}
-
- case Type::VectorTyID: {
- const VectorType *VTy1 = cast<VectorType>(Ty1);
- const VectorType *VTy2 = cast<VectorType>(Ty2);
- return VTy1->getNumElements() == VTy2->getNumElements() &&
- isEquivalentType(VTy1->getElementType(), VTy2->getElementType());
- }
}
}
-/// isEquivalentOperation - determine whether the two operations are the same
-/// except that pointer-to-A and pointer-to-B are equivalent. This should be
-/// kept in sync with Instruction::isSameOperationAs.
+// Determine whether the two operations are the same except that pointer-to-A
+// and pointer-to-B are equivalent. This should be kept in sync with
+// Instruction::isSameOperationAs.
bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
const Instruction *I2) const {
+ // Differences from Instruction::isSameOperationAs:
+ // * replace type comparison with calls to isEquivalentType.
+ // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
+ // * because of the above, we don't test for the tail bit on calls later on
if (I1->getOpcode() != I2->getOpcode() ||
I1->getNumOperands() != I2->getNumOperands() ||
!isEquivalentType(I1->getType(), I2->getType()) ||
@@ -263,14 +312,11 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
if (const CallInst *CI = dyn_cast<CallInst>(I1))
- return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() &&
- CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
- CI->getAttributes().getRawPointer() ==
- cast<CallInst>(I2)->getAttributes().getRawPointer();
+ return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
+ CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
- CI->getAttributes().getRawPointer() ==
- cast<InvokeInst>(I2)->getAttributes().getRawPointer();
+ CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes();
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) {
if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices())
return false;
@@ -291,8 +337,7 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
return true;
}
-/// isEquivalentGEP - determine whether two GEP operations perform the same
-/// underlying arithmetic.
+// Determine whether two GEP operations perform the same underlying arithmetic.
bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
const GEPOperator *GEP2) {
// When we have target data, we can reduce the GEP down to the value in bytes
@@ -315,17 +360,17 @@ bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
return false;
for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
- if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
+ if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
return false;
}
return true;
}
-/// Enumerate - Compare two values used by the two functions under pair-wise
-/// comparison. If this is the first time the values are seen, they're added to
-/// the mapping so that we will detect mismatches on next use.
-bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) {
+// Compare two values used by the two functions under pair-wise comparison. If
+// this is the first time the values are seen, they're added to the mapping so
+// that we will detect mismatches on next use.
+bool FunctionComparator::enumerate(const Value *V1, const Value *V2) {
// Check for function @f1 referring to itself and function @f2 referring to
// itself, or referring to each other, or both referring to either of them.
// They're all equivalent if the two functions are otherwise equivalent.
@@ -334,35 +379,44 @@ bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) {
if (V1 == F2 && V2 == F1)
return true;
- // TODO: constant expressions with GEP or references to F1 or F2.
- if (isa<Constant>(V1))
- return V1 == V2;
-
- if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) {
- const InlineAsm *IA1 = cast<InlineAsm>(V1);
- const InlineAsm *IA2 = cast<InlineAsm>(V2);
- return IA1->getAsmString() == IA2->getAsmString() &&
- IA1->getConstraintString() == IA2->getConstraintString();
+ if (const Constant *C1 = dyn_cast<Constant>(V1)) {
+ if (V1 == V2) return true;
+ const Constant *C2 = dyn_cast<Constant>(V2);
+ if (!C2) return false;
+ // TODO: constant expressions with GEP or references to F1 or F2.
+ if (C1->isNullValue() && C2->isNullValue() &&
+ isEquivalentType(C1->getType(), C2->getType()))
+ return true;
+ // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1
+ // then they must have equal bit patterns.
+ return C1->getType()->canLosslesslyBitCastTo(C2->getType()) &&
+ C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType());
}
- unsigned long &ID1 = Map1[V1];
- if (!ID1)
- ID1 = ++IDMap1Count;
+ if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2))
+ return V1 == V2;
- unsigned long &ID2 = Map2[V2];
- if (!ID2)
- ID2 = ++IDMap2Count;
+ // Check that V1 maps to V2. If we find a value that V1 maps to then we simply
+ // check whether it's equal to V2. When there is no mapping then we need to
+ // ensure that V2 isn't already equivalent to something else. For this
+ // purpose, we track the V2 values in a set.
- return ID1 == ID2;
+ const Value *&map_elem = id_map[V1];
+ if (map_elem)
+ return map_elem == V2;
+ if (!seen_values.insert(V2).second)
+ return false;
+ map_elem = V2;
+ return true;
}
-/// Compare - test whether two basic blocks have equivalent behaviour.
-bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
+// Test whether two basic blocks have equivalent behaviour.
+bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) {
BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
do {
- if (!Enumerate(F1I, F2I))
+ if (!enumerate(F1I, F2I))
return false;
if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
@@ -370,7 +424,7 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
if (!GEP2)
return false;
- if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
+ if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
return false;
if (!isEquivalentGEP(GEP1, GEP2))
@@ -384,7 +438,7 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
Value *OpF1 = F1I->getOperand(i);
Value *OpF2 = F2I->getOperand(i);
- if (!Enumerate(OpF1, OpF2))
+ if (!enumerate(OpF1, OpF2))
return false;
if (OpF1->getValueID() != OpF2->getValueID() ||
@@ -399,8 +453,8 @@ bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
return F1I == F1E && F2I == F2E;
}
-/// Compare - test whether the two functions have equivalent behaviour.
-bool FunctionComparator::Compare() {
+// Test whether the two functions have equivalent behaviour.
+bool FunctionComparator::compare() {
// We need to recheck everything, but check the things that weren't included
// in the hash first.
@@ -431,14 +485,14 @@ bool FunctionComparator::Compare() {
return false;
assert(F1->arg_size() == F2->arg_size() &&
- "Identical functions have a different number of args.");
+ "Identically typed functions have different numbers of args!");
// Visit the arguments so that they get enumerated in the order they're
// passed in.
for (Function::const_arg_iterator f1i = F1->arg_begin(),
f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
- if (!Enumerate(f1i, f2i))
- llvm_unreachable("Arguments repeat");
+ if (!enumerate(f1i, f2i))
+ llvm_unreachable("Arguments repeat!");
}
// We do a CFG-ordered walk since the actual ordering of the blocks in the
@@ -456,7 +510,7 @@ bool FunctionComparator::Compare() {
const BasicBlock *F1BB = F1BBs.pop_back_val();
const BasicBlock *F2BB = F2BBs.pop_back_val();
- if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB))
+ if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB))
return false;
const TerminatorInst *F1TI = F1BB->getTerminator();
@@ -474,23 +528,190 @@ bool FunctionComparator::Compare() {
return true;
}
-/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace
-/// direct uses of G with bitcast(F).
-void MergeFunctions::WriteThunk(Function *F, Function *G) const {
+namespace {
+
+/// MergeFunctions finds functions which will generate identical machine code,
+/// by considering all pointer types to be equivalent. Once identified,
+/// MergeFunctions will fold them by replacing a call to one to a call to a
+/// bitcast of the other.
+///
+class MergeFunctions : public ModulePass {
+public:
+ static char ID;
+ MergeFunctions()
+ : ModulePass(ID), HasGlobalAliases(false) {
+ initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M);
+
+private:
+ typedef DenseSet<ComparableFunction> FnSetType;
+
+ /// A work queue of functions that may have been modified and should be
+ /// analyzed again.
+ std::vector<WeakVH> Deferred;
+
+ /// Insert a ComparableFunction into the FnSet, or merge it away if it's
+ /// equal to one that's already present.
+ bool insert(ComparableFunction &NewF);
+
+ /// Remove a Function from the FnSet and queue it up for a second sweep of
+ /// analysis.
+ void remove(Function *F);
+
+ /// Find the functions that use this Value and remove them from FnSet and
+ /// queue the functions.
+ void removeUsers(Value *V);
+
+ /// Replace all direct calls of Old with calls of New. Will bitcast New if
+ /// necessary to make types match.
+ void replaceDirectCallers(Function *Old, Function *New);
+
+ /// Merge two equivalent functions. Upon completion, G may be deleted, or may
+ /// be converted into a thunk. In either case, it should never be visited
+ /// again.
+ void mergeTwoFunctions(Function *F, Function *G);
+
+ /// Replace G with a thunk or an alias to F. Deletes G.
+ void writeThunkOrAlias(Function *F, Function *G);
+
+ /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+ /// of G with bitcast(F). Deletes G.
+ void writeThunk(Function *F, Function *G);
+
+ /// Replace G with an alias to F. Deletes G.
+ void writeAlias(Function *F, Function *G);
+
+ /// The set of all distinct functions. Use the insert() and remove() methods
+ /// to modify it.
+ FnSetType FnSet;
+
+ /// TargetData for more accurate GEP comparisons. May be NULL.
+ TargetData *TD;
+
+ /// Whether or not the target supports global aliases.
+ bool HasGlobalAliases;
+};
+
+} // end anonymous namespace
+
+char MergeFunctions::ID = 0;
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
+
+ModulePass *llvm::createMergeFunctionsPass() {
+ return new MergeFunctions();
+}
+
+bool MergeFunctions::runOnModule(Module &M) {
+ bool Changed = false;
+ TD = getAnalysisIfAvailable<TargetData>();
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+ Deferred.push_back(WeakVH(I));
+ }
+ FnSet.resize(Deferred.size());
+
+ do {
+ std::vector<WeakVH> Worklist;
+ Deferred.swap(Worklist);
+
+ DEBUG(dbgs() << "size of module: " << M.size() << '\n');
+ DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
+
+ // Insert only strong functions and merge them. Strong function merging
+ // always deletes one of them.
+ for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ if (!*I) continue;
+ Function *F = cast<Function>(*I);
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+ !F->mayBeOverridden()) {
+ ComparableFunction CF = ComparableFunction(F, TD);
+ Changed |= insert(CF);
+ }
+ }
+
+ // Insert only weak functions and merge them. By doing these second we
+ // create thunks to the strong function when possible. When two weak
+ // functions are identical, we create a new strong function with two weak
+ // weak thunks to it which are identical but not mergable.
+ for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+ E = Worklist.end(); I != E; ++I) {
+ if (!*I) continue;
+ Function *F = cast<Function>(*I);
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+ F->mayBeOverridden()) {
+ ComparableFunction CF = ComparableFunction(F, TD);
+ Changed |= insert(CF);
+ }
+ }
+ DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n');
+ } while (!Deferred.empty());
+
+ FnSet.clear();
+
+ return Changed;
+}
+
+bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS,
+ const ComparableFunction &RHS) {
+ if (LHS.getFunc() == RHS.getFunc() &&
+ LHS.getHash() == RHS.getHash())
+ return true;
+ if (!LHS.getFunc() || !RHS.getFunc())
+ return false;
+
+ // One of these is a special "underlying pointer comparison only" object.
+ if (LHS.getTD() == ComparableFunction::LookupOnly ||
+ RHS.getTD() == ComparableFunction::LookupOnly)
+ return false;
+
+ assert(LHS.getTD() == RHS.getTD() &&
+ "Comparing functions for different targets");
+
+ return FunctionComparator(LHS.getTD(), LHS.getFunc(),
+ RHS.getFunc()).compare();
+}
+
+// Replace direct callers of Old with New.
+void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
+ Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
+ for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+ UI != UE;) {
+ Value::use_iterator TheIter = UI;
+ ++UI;
+ CallSite CS(*TheIter);
+ if (CS && CS.isCallee(TheIter)) {
+ remove(CS.getInstruction()->getParent()->getParent());
+ TheIter.getUse().set(BitcastNew);
+ }
+ }
+}
+
+// Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
+void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
+ if (HasGlobalAliases && G->hasUnnamedAddr()) {
+ if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
+ G->hasWeakLinkage()) {
+ writeAlias(F, G);
+ return;
+ }
+ }
+
+ writeThunk(F, G);
+}
+
+// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+// of G with bitcast(F). Deletes G.
+void MergeFunctions::writeThunk(Function *F, Function *G) {
if (!G->mayBeOverridden()) {
// Redirect direct callers of G to F.
- Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
- for (Value::use_iterator UI = G->use_begin(), UE = G->use_end();
- UI != UE;) {
- Value::use_iterator TheIter = UI;
- ++UI;
- CallSite CS(*TheIter);
- if (CS && CS.isCallee(TheIter))
- TheIter.getUse().set(BitcastF);
- }
+ replaceDirectCallers(G, F);
}
- // If G was internal then we may have replaced all uses if G with F. If so,
+ // If G was internal then we may have replaced all uses of G with F. If so,
// stop here and delete G. There's no need for a thunk.
if (G->hasLocalLinkage() && G->use_empty()) {
G->eraseFromParent();
@@ -522,131 +743,126 @@ void MergeFunctions::WriteThunk(Function *F, Function *G) const {
NewG->copyAttributesFrom(G);
NewG->takeName(G);
+ removeUsers(G);
G->replaceAllUsesWith(NewG);
G->eraseFromParent();
+
+ DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
+ ++NumThunksWritten;
}
-/// MergeTwoFunctions - Merge two equivalent functions. Upon completion,
-/// Function G is deleted.
-void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const {
- if (F->isWeakForLinker()) {
- assert(G->isWeakForLinker());
+// Replace G with an alias to F and delete G.
+void MergeFunctions::writeAlias(Function *F, Function *G) {
+ Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+ GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
+ BitcastF, G->getParent());
+ F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+ GA->takeName(G);
+ GA->setVisibility(G->getVisibility());
+ removeUsers(G);
+ G->replaceAllUsesWith(GA);
+ G->eraseFromParent();
+
+ DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
+ ++NumAliasesWritten;
+}
+
+// Merge two equivalent functions. Upon completion, Function G is deleted.
+void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
+ if (F->mayBeOverridden()) {
+ assert(G->mayBeOverridden());
+
+ if (HasGlobalAliases) {
+ // Make them both thunks to the same internal function.
+ Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+ F->getParent());
+ H->copyAttributesFrom(F);
+ H->takeName(F);
+ removeUsers(F);
+ F->replaceAllUsesWith(H);
- // Make them both thunks to the same internal function.
- Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
- F->getParent());
- H->copyAttributesFrom(F);
- H->takeName(F);
- F->replaceAllUsesWith(H);
+ unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
- unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+ writeAlias(F, G);
+ writeAlias(F, H);
- WriteThunk(F, G);
- WriteThunk(F, H);
+ F->setAlignment(MaxAlignment);
+ F->setLinkage(GlobalValue::PrivateLinkage);
+ } else {
+ // We can't merge them. Instead, pick one and update all direct callers
+ // to call it and hope that we improve the instruction cache hit rate.
+ replaceDirectCallers(G, F);
+ }
- F->setAlignment(MaxAlignment);
- F->setLinkage(GlobalValue::InternalLinkage);
+ ++NumDoubleWeak;
} else {
- WriteThunk(F, G);
+ writeThunkOrAlias(F, G);
}
++NumFunctionsMerged;
}
-static unsigned ProfileFunction(const Function *F) {
- const FunctionType *FTy = F->getFunctionType();
-
- FoldingSetNodeID ID;
- ID.AddInteger(F->size());
- ID.AddInteger(F->getCallingConv());
- ID.AddBoolean(F->hasGC());
- ID.AddBoolean(FTy->isVarArg());
- ID.AddInteger(FTy->getReturnType()->getTypeID());
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- ID.AddInteger(FTy->getParamType(i)->getTypeID());
- return ID.ComputeHash();
-}
-
-class ComparableFunction {
-public:
- ComparableFunction(Function *Func, TargetData *TD)
- : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {}
+// Insert a ComparableFunction into the FnSet, or merge it away if equal to one
+// that was already inserted.
+bool MergeFunctions::insert(ComparableFunction &NewF) {
+ std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+ if (Result.second) {
+ DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n');
+ return false;
+ }
- AssertingVH<Function> const Func;
- const unsigned Hash;
- TargetData * const TD;
-};
+ const ComparableFunction &OldF = *Result.first;
-struct MergeFunctionsEqualityInfo {
- static ComparableFunction *getEmptyKey() {
- return reinterpret_cast<ComparableFunction*>(0);
- }
- static ComparableFunction *getTombstoneKey() {
- return reinterpret_cast<ComparableFunction*>(-1);
- }
- static unsigned getHashValue(const ComparableFunction *CF) {
- return CF->Hash;
- }
- static bool isEqual(const ComparableFunction *LHS,
- const ComparableFunction *RHS) {
- if (LHS == RHS)
- return true;
- if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
- RHS == getEmptyKey() || RHS == getTombstoneKey())
- return false;
- assert(LHS->TD == RHS->TD && "Comparing functions for different targets");
- return FunctionComparator(LHS->TD, LHS->Func, RHS->Func).Compare();
- }
-};
+ // Never thunk a strong function to a weak function.
+ assert(!OldF.getFunc()->mayBeOverridden() ||
+ NewF.getFunc()->mayBeOverridden());
-bool MergeFunctions::runOnModule(Module &M) {
- typedef DenseSet<ComparableFunction *, MergeFunctionsEqualityInfo> FnSetType;
+ DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == "
+ << NewF.getFunc()->getName() << '\n');
- bool Changed = false;
- TD = getAnalysisIfAvailable<TargetData>();
+ Function *DeleteF = NewF.getFunc();
+ NewF.release();
+ mergeTwoFunctions(OldF.getFunc(), DeleteF);
+ return true;
+}
- std::vector<Function *> Funcs;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
- Funcs.push_back(F);
+// Remove a function from FnSet. If it was already in FnSet, add it to Deferred
+// so that we'll look at it in the next round.
+void MergeFunctions::remove(Function *F) {
+ // We need to make sure we remove F, not a function "equal" to F per the
+ // function equality comparator.
+ //
+ // The special "lookup only" ComparableFunction bypasses the expensive
+ // function comparison in favour of a pointer comparison on the underlying
+ // Function*'s.
+ ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly);
+ if (FnSet.erase(CF)) {
+ DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n");
+ Deferred.push_back(F);
}
+}
- bool LocalChanged;
- do {
- LocalChanged = false;
-
- FnSetType FnSet;
- for (unsigned i = 0, e = Funcs.size(); i != e;) {
- Function *F = Funcs[i];
- ComparableFunction *NewF = new ComparableFunction(F, TD);
- std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
- if (!Result.second) {
- ComparableFunction *&OldF = *Result.first;
- assert(OldF && "Expected a hash collision");
-
- // NewF will be deleted in favour of OldF unless NewF is strong and
- // OldF is weak in which case swap them to keep the strong definition.
-
- if (OldF->Func->isWeakForLinker() && !NewF->Func->isWeakForLinker())
- std::swap(OldF, NewF);
-
- DEBUG(dbgs() << " " << OldF->Func->getName() << " == "
- << NewF->Func->getName() << '\n');
-
- Funcs.erase(Funcs.begin() + i);
- --e;
-
- Function *DeleteF = NewF->Func;
- delete NewF;
- MergeTwoFunctions(OldF->Func, DeleteF);
- LocalChanged = true;
- Changed = true;
- } else {
- ++i;
+// For each instruction used by the value, remove() the function that contains
+// the instruction. This should happen right before a call to RAUW.
+void MergeFunctions::removeUsers(Value *V) {
+ std::vector<Value *> Worklist;
+ Worklist.push_back(V);
+ while (!Worklist.empty()) {
+ Value *V = Worklist.back();
+ Worklist.pop_back();
+
+ for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ Use &U = UI.getUse();
+ if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
+ remove(I->getParent()->getParent());
+ } else if (isa<GlobalValue>(U.getUser())) {
+ // do nothing
+ } else if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+ for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end();
+ CUI != CUE; ++CUI)
+ Worklist.push_back(*CUI);
}
}
- DeleteContainerPointers(FnSet);
- } while (LocalChanged);
-
- return Changed;
+ }
}
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 432f7c5..2afd029 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -30,7 +30,9 @@ namespace {
struct PartialInliner : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
static char ID; // Pass identification, replacement for typeid
- PartialInliner() : ModulePass(ID) {}
+ PartialInliner() : ModulePass(ID) {
+ initializePartialInlinerPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module& M);
@@ -41,7 +43,7 @@ namespace {
char PartialInliner::ID = 0;
INITIALIZE_PASS(PartialInliner, "partial-inliner",
- "Partial Inliner", false, false);
+ "Partial Inliner", false, false)
ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
@@ -67,7 +69,7 @@ Function* PartialInliner::unswitchFunction(Function* F) {
return 0;
// Clone the function, so that we can hack away on it.
- ValueMap<const Value*, Value*> VMap;
+ ValueToValueMapTy VMap;
Function* duplicateFunction = CloneFunction(F, VMap,
/*ModuleLevelChanges=*/false);
duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp b/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp
deleted file mode 100644
index 4a99a41..0000000
--- a/contrib/llvm/lib/Transforms/IPO/PartialSpecialization.cpp
+++ /dev/null
@@ -1,216 +0,0 @@
-//===-- PartialSpecialization.cpp - Specialize for common constants--------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass finds function arguments that are often a common constant and
-// specializes a version of the called function for that constant.
-//
-// This pass simply does the cloning for functions it specializes. It depends
-// on IPSCCP and DAE to clean up the results.
-//
-// The initial heuristic favors constant arguments that are used in control
-// flow.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "partialspecialization"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Constant.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/ADT/DenseSet.h"
-#include <map>
-using namespace llvm;
-
-STATISTIC(numSpecialized, "Number of specialized functions created");
-STATISTIC(numReplaced, "Number of callers replaced by specialization");
-
-// Maximum number of arguments markable interested
-static const int MaxInterests = 6;
-
-// Call must be used at least occasionally
-static const int CallsMin = 5;
-
-// Must have 10% of calls having the same constant to specialize on
-static const double ConstValPercent = .1;
-
-namespace {
- typedef SmallVector<int, MaxInterests> InterestingArgVector;
- class PartSpec : public ModulePass {
- void scanForInterest(Function&, InterestingArgVector&);
- int scanDistribution(Function&, int, std::map<Constant*, int>&);
- public :
- static char ID; // Pass identification, replacement for typeid
- PartSpec() : ModulePass(ID) {}
- bool runOnModule(Module &M);
- };
-}
-
-char PartSpec::ID = 0;
-INITIALIZE_PASS(PartSpec, "partialspecialization",
- "Partial Specialization", false, false);
-
-// Specialize F by replacing the arguments (keys) in replacements with the
-// constants (values). Replace all calls to F with those constants with
-// a call to the specialized function. Returns the specialized function
-static Function*
-SpecializeFunction(Function* F,
- ValueMap<const Value*, Value*>& replacements) {
- // arg numbers of deleted arguments
- DenseMap<unsigned, const Argument*> deleted;
- for (ValueMap<const Value*, Value*>::iterator
- repb = replacements.begin(), repe = replacements.end();
- repb != repe; ++repb) {
- Argument const *arg = cast<const Argument>(repb->first);
- deleted[arg->getArgNo()] = arg;
- }
-
- Function* NF = CloneFunction(F, replacements,
- /*ModuleLevelChanges=*/false);
- NF->setLinkage(GlobalValue::InternalLinkage);
- F->getParent()->getFunctionList().push_back(NF);
-
- for (Value::use_iterator ii = F->use_begin(), ee = F->use_end();
- ii != ee; ) {
- Value::use_iterator i = ii;
- ++ii;
- User *U = *i;
- CallSite CS(U);
- if (CS) {
- if (CS.getCalledFunction() == F) {
- SmallVector<Value*, 6> args;
- // Assemble the non-specialized arguments for the updated callsite.
- // In the process, make sure that the specialized arguments are
- // constant and match the specialization. If that's not the case,
- // this callsite needs to call the original or some other
- // specialization; don't change it here.
- CallSite::arg_iterator as = CS.arg_begin(), ae = CS.arg_end();
- for (CallSite::arg_iterator ai = as; ai != ae; ++ai) {
- DenseMap<unsigned, const Argument*>::iterator delit = deleted.find(
- std::distance(as, ai));
- if (delit == deleted.end())
- args.push_back(cast<Value>(ai));
- else {
- Constant *ci = dyn_cast<Constant>(ai);
- if (!(ci && ci == replacements[delit->second]))
- goto next_use;
- }
- }
- Value* NCall;
- if (CallInst *CI = dyn_cast<CallInst>(U)) {
- NCall = CallInst::Create(NF, args.begin(), args.end(),
- CI->getName(), CI);
- cast<CallInst>(NCall)->setTailCall(CI->isTailCall());
- cast<CallInst>(NCall)->setCallingConv(CI->getCallingConv());
- } else {
- InvokeInst *II = cast<InvokeInst>(U);
- NCall = InvokeInst::Create(NF, II->getNormalDest(),
- II->getUnwindDest(),
- args.begin(), args.end(),
- II->getName(), II);
- cast<InvokeInst>(NCall)->setCallingConv(II->getCallingConv());
- }
- CS.getInstruction()->replaceAllUsesWith(NCall);
- CS.getInstruction()->eraseFromParent();
- ++numReplaced;
- }
- }
- next_use:;
- }
- return NF;
-}
-
-
-bool PartSpec::runOnModule(Module &M) {
- bool Changed = false;
- for (Module::iterator I = M.begin(); I != M.end(); ++I) {
- Function &F = *I;
- if (F.isDeclaration() || F.mayBeOverridden()) continue;
- InterestingArgVector interestingArgs;
- scanForInterest(F, interestingArgs);
-
- // Find the first interesting Argument that we can specialize on
- // If there are multiple interesting Arguments, then those will be found
- // when processing the cloned function.
- bool breakOuter = false;
- for (unsigned int x = 0; !breakOuter && x < interestingArgs.size(); ++x) {
- std::map<Constant*, int> distribution;
- int total = scanDistribution(F, interestingArgs[x], distribution);
- if (total > CallsMin)
- for (std::map<Constant*, int>::iterator ii = distribution.begin(),
- ee = distribution.end(); ii != ee; ++ii)
- if (total > ii->second && ii->first &&
- ii->second > total * ConstValPercent) {
- ValueMap<const Value*, Value*> m;
- Function::arg_iterator arg = F.arg_begin();
- for (int y = 0; y < interestingArgs[x]; ++y)
- ++arg;
- m[&*arg] = ii->first;
- SpecializeFunction(&F, m);
- ++numSpecialized;
- breakOuter = true;
- Changed = true;
- }
- }
- }
- return Changed;
-}
-
-/// scanForInterest - This function decides which arguments would be worth
-/// specializing on.
-void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
- for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
- ii != ee; ++ii) {
- for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end();
- ui != ue; ++ui) {
-
- bool interesting = false;
- User *U = *ui;
- if (isa<CmpInst>(U)) interesting = true;
- else if (isa<CallInst>(U))
- interesting = ui->getOperand(0) == ii;
- else if (isa<InvokeInst>(U))
- interesting = ui->getOperand(0) == ii;
- else if (isa<SwitchInst>(U)) interesting = true;
- else if (isa<BranchInst>(U)) interesting = true;
-
- if (interesting) {
- args.push_back(std::distance(F.arg_begin(), ii));
- break;
- }
- }
- }
-}
-
-/// scanDistribution - Construct a histogram of constants for arg of F at arg.
-int PartSpec::scanDistribution(Function& F, int arg,
- std::map<Constant*, int>& dist) {
- bool hasIndirect = false;
- int total = 0;
- for (Value::use_iterator ii = F.use_begin(), ee = F.use_end();
- ii != ee; ++ii) {
- User *U = *ii;
- CallSite CS(U);
- if (CS && CS.getCalledFunction() == &F) {
- ++dist[dyn_cast<Constant>(CS.getArgument(arg))];
- ++total;
- } else
- hasIndirect = true;
- }
-
- // Preserve the original address taken function even if all other uses
- // will be specialized.
- if (hasIndirect) ++total;
- return total;
-}
-
-ModulePass* llvm::createPartialSpecializationPass() { return new PartSpec(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index 09ac76f..d91c2c4 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -37,7 +37,9 @@ STATISTIC(NumUnreach, "Number of noreturn calls optimized");
namespace {
struct PruneEH : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- PruneEH() : CallGraphSCCPass(ID) {}
+ PruneEH() : CallGraphSCCPass(ID) {
+ initializePruneEHPass(*PassRegistry::getPassRegistry());
+ }
// runOnSCC - Analyze the SCC, performing the transformation if possible.
bool runOnSCC(CallGraphSCC &SCC);
@@ -48,8 +50,11 @@ namespace {
}
char PruneEH::ID = 0;
-INITIALIZE_PASS(PruneEH, "prune-eh",
- "Remove unused exception handling info", false, false);
+INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false)
Pass *llvm::createPruneEHPass() { return new PruneEH(); }
diff --git a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
index ee10ad0..b5f09ec 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -29,7 +29,9 @@ namespace {
class StripDeadPrototypesPass : public ModulePass {
public:
static char ID; // Pass identification, replacement for typeid
- StripDeadPrototypesPass() : ModulePass(ID) { }
+ StripDeadPrototypesPass() : ModulePass(ID) {
+ initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
};
@@ -37,7 +39,7 @@ public:
char StripDeadPrototypesPass::ID = 0;
INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
- "Strip Unused Function Prototypes", false, false);
+ "Strip Unused Function Prototypes", false, false)
bool StripDeadPrototypesPass::runOnModule(Module &M) {
bool MadeChange = false;
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 20b7b8f..a690765 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -39,7 +39,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripSymbols(bool ODI = false)
- : ModulePass(ID), OnlyDebugInfo(ODI) {}
+ : ModulePass(ID), OnlyDebugInfo(ODI) {
+ initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -52,7 +54,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripNonDebugSymbols()
- : ModulePass(ID) {}
+ : ModulePass(ID) {
+ initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -65,7 +69,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripDebugDeclare()
- : ModulePass(ID) {}
+ : ModulePass(ID) {
+ initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -78,7 +84,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripDeadDebugInfo()
- : ModulePass(ID) {}
+ : ModulePass(ID) {
+ initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnModule(Module &M);
@@ -90,7 +98,7 @@ namespace {
char StripSymbols::ID = 0;
INITIALIZE_PASS(StripSymbols, "strip",
- "Strip all symbols from a module", false, false);
+ "Strip all symbols from a module", false, false)
ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
return new StripSymbols(OnlyDebugInfo);
@@ -99,7 +107,7 @@ ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
char StripNonDebugSymbols::ID = 0;
INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
"Strip all symbols, except dbg symbols, from a module",
- false, false);
+ false, false)
ModulePass *llvm::createStripNonDebugSymbolsPass() {
return new StripNonDebugSymbols();
@@ -107,7 +115,7 @@ ModulePass *llvm::createStripNonDebugSymbolsPass() {
char StripDebugDeclare::ID = 0;
INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
- "Strip all llvm.dbg.declare intrinsics", false, false);
+ "Strip all llvm.dbg.declare intrinsics", false, false)
ModulePass *llvm::createStripDebugDeclarePass() {
return new StripDebugDeclare();
@@ -115,7 +123,7 @@ ModulePass *llvm::createStripDebugDeclarePass() {
char StripDeadDebugInfo::ID = 0;
INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
- "Strip debug info for unused symbols", false, false);
+ "Strip debug info for unused symbols", false, false)
ModulePass *llvm::createStripDeadDebugInfoPass() {
return new StripDeadDebugInfo();
diff --git a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp
index b82b03f..584deac 100644
--- a/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -50,7 +50,9 @@ namespace {
virtual bool runOnSCC(CallGraphSCC &SCC);
static char ID; // Pass identification, replacement for typeid
- SRETPromotion() : CallGraphSCCPass(ID) {}
+ SRETPromotion() : CallGraphSCCPass(ID) {
+ initializeSRETPromotionPass(*PassRegistry::getPassRegistry());
+ }
private:
CallGraphNode *PromoteReturn(CallGraphNode *CGN);
@@ -61,8 +63,11 @@ namespace {
}
char SRETPromotion::ID = 0;
-INITIALIZE_PASS(SRETPromotion, "sretpromotion",
- "Promote sret arguments to multiple ret values", false, false);
+INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion",
+ "Promote sret arguments to multiple ret values", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SRETPromotion, "sretpromotion",
+ "Promote sret arguments to multiple ret values", false, false)
Pass *llvm::createStructRetPromotionPass() {
return new SRETPromotion();
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
index 6f9609c..9c2969c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -81,7 +81,9 @@ public:
BuilderTy *Builder;
static char ID; // Pass identification, replacement for typeid
- InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {}
+ InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
+ initializeInstCombinerPass(*PassRegistry::getPassRegistry());
+ }
public:
virtual bool runOnFunction(Function &F);
@@ -143,6 +145,8 @@ public:
ConstantInt *RHS);
Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
ConstantInt *DivRHS);
+ Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
+ ConstantInt *DivRHS);
Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
ICmpInst::Predicate Pred, Value *TheAdd);
Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
@@ -284,9 +288,16 @@ public:
private:
- /// SimplifyCommutative - This performs a few simplifications for
- /// commutative operators.
- bool SimplifyCommutative(BinaryOperator &I);
+ /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+ /// operators which are associative or commutative.
+ bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
+
+ /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+ /// which some other binary operation distributes over either by factorizing
+ /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+ /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+ /// a win). Returns the simplified value, or null if it didn't simplify.
+ Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
/// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
/// based on the demanded bits.
@@ -310,10 +321,7 @@ private:
// into the PHI (which is only possible if all operands to the PHI are
// constants).
//
- // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
- // that would normally be unprofitable because they strongly encourage jump
- // threading.
- Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false);
+ Instruction *FoldOpIntoPhi(Instruction &I);
// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
// operator and they all are only used by the PHI, PHI together their
@@ -339,10 +347,6 @@ private:
Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
-
- unsigned GetOrEnforceKnownAlignment(Value *V,
- unsigned PrefAlign = 0);
-
};
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 4d2c89e..c36a955 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -84,43 +84,37 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
}
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
I.hasNoUnsignedWrap(), TD))
return ReplaceInstUsesWith(I, V);
-
- if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
- // X + (signbit) --> X ^ signbit
- const APInt& Val = CI->getValue();
- uint32_t BitWidth = Val.getBitWidth();
- if (Val == APInt::getSignBit(BitWidth))
- return BinaryOperator::CreateXor(LHS, RHS);
-
- // See if SimplifyDemandedBits can simplify this. This handles stuff like
- // (X & 254)+1 -> (X&254)|1
- if (SimplifyDemandedInstructionBits(I))
- return &I;
-
- // zext(bool) + C -> bool ? C + 1 : C
- if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
- if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
- return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
- }
+ // (A*B)+(A*C) -> A*(B+C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
- if (isa<PHINode>(LHS))
- if (Instruction *NV = FoldOpIntoPhi(I))
- return NV;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // X + (signbit) --> X ^ signbit
+ const APInt &Val = CI->getValue();
+ if (Val.isSignBit())
+ return BinaryOperator::CreateXor(LHS, RHS);
+
+ // See if SimplifyDemandedBits can simplify this. This handles stuff like
+ // (X & 254)+1 -> (X&254)|1
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ // zext(bool) + C -> bool ? C + 1 : C
+ if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
+ if (ZI->getSrcTy()->isIntegerTy(1))
+ return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
- ConstantInt *XorRHS = 0;
- Value *XorLHS = 0;
- if (isa<ConstantInt>(RHSC) &&
- match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
+ Value *XorLHS = 0; ConstantInt *XorRHS = 0;
+ if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
- const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
+ const APInt &RHSVal = CI->getValue();
unsigned ExtendAmt = 0;
// If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
// If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
@@ -130,13 +124,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
else if (XorRHS->getValue().isPowerOf2())
ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
}
-
+
if (ExtendAmt) {
APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
if (!MaskedValueIsZero(XorLHS, Mask))
ExtendAmt = 0;
}
-
+
if (ExtendAmt) {
Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
@@ -145,34 +139,28 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
}
+ if (isa<Constant>(RHS) && isa<PHINode>(LHS))
+ if (Instruction *NV = FoldOpIntoPhi(I))
+ return NV;
+
if (I.getType()->isIntegerTy(1))
return BinaryOperator::CreateXor(LHS, RHS);
- if (I.getType()->isIntegerTy()) {
- // X + X --> X << 1
- if (LHS == RHS)
- return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
-
- if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
- if (RHSI->getOpcode() == Instruction::Sub)
- if (LHS == RHSI->getOperand(1)) // A + (B - A) --> B
- return ReplaceInstUsesWith(I, RHSI->getOperand(0));
- }
- if (Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
- if (LHSI->getOpcode() == Instruction::Sub)
- if (RHS == LHSI->getOperand(1)) // (B - A) + A --> B
- return ReplaceInstUsesWith(I, LHSI->getOperand(0));
- }
+ // X + X --> X << 1
+ if (LHS == RHS) {
+ BinaryOperator *New =
+ BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
+ New->setHasNoSignedWrap(I.hasNoSignedWrap());
+ New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ return New;
}
// -A + B --> B - A
// -A + -B --> -(A + B)
if (Value *LHSV = dyn_castNegVal(LHS)) {
- if (LHS->getType()->isIntOrIntVectorTy()) {
- if (Value *RHSV = dyn_castNegVal(RHS)) {
- Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
- return BinaryOperator::CreateNeg(NewAdd);
- }
+ if (Value *RHSV = dyn_castNegVal(RHS)) {
+ Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
+ return BinaryOperator::CreateNeg(NewAdd);
}
return BinaryOperator::CreateSub(RHS, LHSV);
@@ -199,11 +187,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (dyn_castFoldableMul(RHS, C2) == LHS)
return BinaryOperator::CreateMul(LHS, AddOne(C2));
- // X + ~X --> -1 since ~X = -X-1
- if (match(LHS, m_Not(m_Specific(RHS))) ||
- match(RHS, m_Not(m_Specific(LHS))))
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
// A+B --> A|B iff A and B have no bits set in common.
if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
@@ -222,7 +205,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
// W*X + Y*Z --> W * (X+Z) iff W == Y
- if (I.getType()->isIntOrIntVectorTy()) {
+ {
Value *W, *X, *Y, *Z;
if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
@@ -251,24 +234,22 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// (X & FF00) + xx00 -> (X+xx00) & FF00
if (LHS->hasOneUse() &&
- match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
- Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
- if (Anded == CRHS) {
- // See if all bits from the first bit set in the Add RHS up are included
- // in the mask. First, get the rightmost bit.
- const APInt &AddRHSV = CRHS->getValue();
-
- // Form a mask of all bits from the lowest bit added through the top.
- APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
-
- // See if the and mask includes all of these bits.
- APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
-
- if (AddRHSHighBits == AddRHSHighBitsAnd) {
- // Okay, the xform is safe. Insert the new add pronto.
- Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
- return BinaryOperator::CreateAnd(NewAdd, C2);
- }
+ match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) &&
+ CRHS->getValue() == (CRHS->getValue() & C2->getValue())) {
+ // See if all bits from the first bit set in the Add RHS up are included
+ // in the mask. First, get the rightmost bit.
+ const APInt &AddRHSV = CRHS->getValue();
+
+ // Form a mask of all bits from the lowest bit added through the top.
+ APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
+
+ // See if the and mask includes all of these bits.
+ APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
+
+ if (AddRHSHighBits == AddRHSHighBitsAnd) {
+ // Okay, the xform is safe. Insert the new add pronto.
+ Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
+ return BinaryOperator::CreateAnd(NewAdd, C2);
}
}
@@ -293,12 +274,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// Can we fold the add into the argument of the select?
// We check both true and false select arguments for a matching subtract.
- if (match(FV, m_Zero()) &&
- match(TV, m_Sub(m_Value(N), m_Specific(A))))
+ if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
// Fold the add into the true select value.
return SelectInst::Create(SI->getCondition(), N, A);
- if (match(TV, m_Zero()) &&
- match(FV, m_Sub(m_Value(N), m_Specific(A))))
+
+ if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
// Fold the add into the false select value.
return SelectInst::Create(SI->getCondition(), A, N);
}
@@ -342,7 +322,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
@@ -424,6 +404,10 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
Value *Result = Constant::getNullValue(IntPtrTy);
+ // If the GEP is inbounds, we know that none of the addressing operations will
+ // overflow in an unsigned sense.
+ bool isInBounds = cast<GEPOperator>(GEP)->isInBounds();
+
// Build a mask for high order bits.
unsigned IntPtrWidth = TD.getPointerSizeInBits();
uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
@@ -439,16 +423,16 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
- Result = Builder->CreateAdd(Result,
- ConstantInt::get(IntPtrTy, Size),
- GEP->getName()+".offs");
+ if (Size)
+ Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size),
+ GEP->getName()+".offs");
continue;
}
Constant *Scale = ConstantInt::get(IntPtrTy, Size);
Constant *OC =
ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
- Scale = ConstantExpr::getMul(OC, Scale);
+ Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/);
// Emit an add instruction.
Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
continue;
@@ -457,9 +441,9 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
if (Op->getType() != IntPtrTy)
Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
if (Size != 1) {
- Constant *Scale = ConstantInt::get(IntPtrTy, Size);
// We'll let instcombine(mul) convert this to a shl if possible.
- Op = Builder->CreateMul(Op, Scale, GEP->getName()+".idx");
+ Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size),
+ GEP->getName()+".idx", isInBounds /*NUW*/);
}
// Emit an add instruction.
@@ -545,8 +529,13 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Op0 == Op1) // sub X, X -> 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A*B)-(A*C) -> A*(B-C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
// If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW.
if (Value *V = dyn_castNegVal(Op1)) {
@@ -556,18 +545,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return Res;
}
- if (isa<UndefValue>(Op0))
- return ReplaceInstUsesWith(I, Op0); // undef - X -> undef
- if (isa<UndefValue>(Op1))
- return ReplaceInstUsesWith(I, Op1); // X - undef -> undef
if (I.getType()->isIntegerTy(1))
return BinaryOperator::CreateXor(Op0, Op1);
+
+ // Replace (-1 - A) with (~A).
+ if (match(Op0, m_AllOnes()))
+ return BinaryOperator::CreateNot(Op1);
if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
- // Replace (-1 - A) with (~A).
- if (C->isAllOnesValue())
- return BinaryOperator::CreateNot(Op1);
-
// C - ~X == X + (1+C)
Value *X = 0;
if (match(Op1, m_Not(m_Value(X))))
@@ -576,29 +561,16 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// -(X >>u 31) -> (X >>s 31)
// -(X >>s 31) -> (X >>u 31)
if (C->isZero()) {
- if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
- if (SI->getOpcode() == Instruction::LShr) {
- if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
- // Check to see if we are shifting out everything but the sign bit.
- if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
- SI->getType()->getPrimitiveSizeInBits()-1) {
- // Ok, the transformation is safe. Insert AShr.
- return BinaryOperator::Create(Instruction::AShr,
- SI->getOperand(0), CU, SI->getName());
- }
- }
- } else if (SI->getOpcode() == Instruction::AShr) {
- if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
- // Check to see if we are shifting out everything but the sign bit.
- if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
- SI->getType()->getPrimitiveSizeInBits()-1) {
- // Ok, the transformation is safe. Insert LShr.
- return BinaryOperator::CreateLShr(
- SI->getOperand(0), CU, SI->getName());
- }
- }
- }
- }
+ Value *X; ConstantInt *CI;
+ if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) &&
+ // Verify we are shifting out everything but the sign bit.
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+ return BinaryOperator::CreateAShr(X, CI);
+
+ if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) &&
+ // Verify we are shifting out everything but the sign bit.
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+ return BinaryOperator::CreateLShr(X, CI);
}
// Try to fold constant sub into select arguments.
@@ -608,86 +580,80 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// C - zext(bool) -> bool ? C - 1 : C
if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
- if (ZI->getSrcTy() == Type::getInt1Ty(I.getContext()))
+ if (ZI->getSrcTy()->isIntegerTy(1))
return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
+
+ // C-(X+C2) --> (C-C2)-X
+ ConstantInt *C2;
+ if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2))))
+ return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
}
- if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
- if (Op1I->getOpcode() == Instruction::Add) {
- if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y
- return BinaryOperator::CreateNeg(Op1I->getOperand(1),
- I.getName());
- else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y
- return BinaryOperator::CreateNeg(Op1I->getOperand(0),
- I.getName());
- else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
- if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
- // C1-(X+C2) --> (C1-C2)-X
- return BinaryOperator::CreateSub(
- ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));
- }
+
+ { Value *Y;
+ // X-(X+Y) == -Y X-(Y+X) == -Y
+ if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
+ match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
+ return BinaryOperator::CreateNeg(Y);
+
+ // (X-Y)-X == -Y
+ if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))
+ return BinaryOperator::CreateNeg(Y);
+ }
+
+ if (Op1->hasOneUse()) {
+ Value *X = 0, *Y = 0, *Z = 0;
+ Constant *C = 0;
+ ConstantInt *CI = 0;
+
+ // (X - (Y - Z)) --> (X + (Z - Y)).
+ if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
+ return BinaryOperator::CreateAdd(Op0,
+ Builder->CreateSub(Z, Y, Op1->getName()));
+
+ // (X - (X & Y)) --> (X & ~Y)
+ //
+ if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) ||
+ match(Op1, m_And(m_Specific(Op0), m_Value(Y))))
+ return BinaryOperator::CreateAnd(Op0,
+ Builder->CreateNot(Y, Y->getName() + ".not"));
+
+ // 0 - (X sdiv C) -> (X sdiv -C)
+ if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) &&
+ match(Op0, m_Zero()))
+ return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
+
+ // 0 - (X << Y) -> (-X << Y) when X is freely negatable.
+ if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero()))
+ if (Value *XNeg = dyn_castNegVal(X))
+ return BinaryOperator::CreateShl(XNeg, Y);
+
+ // X - X*C --> X * (1-C)
+ if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) {
+ Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI);
+ return BinaryOperator::CreateMul(Op0, CP1);
}
- if (Op1I->hasOneUse()) {
- // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression
- // is not used by anyone else...
- //
- if (Op1I->getOpcode() == Instruction::Sub) {
- // Swap the two operands of the subexpr...
- Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1);
- Op1I->setOperand(0, IIOp1);
- Op1I->setOperand(1, IIOp0);
-
- // Create the new top level add instruction...
- return BinaryOperator::CreateAdd(Op0, Op1);
- }
-
- // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
- //
- if (Op1I->getOpcode() == Instruction::And &&
- (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {
- Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
-
- Value *NewNot = Builder->CreateNot(OtherOp, "B.not");
- return BinaryOperator::CreateAnd(Op0, NewNot);
- }
-
- // 0 - (X sdiv C) -> (X sdiv -C)
- if (Op1I->getOpcode() == Instruction::SDiv)
- if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
- if (CSI->isZero())
- if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
- return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
- ConstantExpr::getNeg(DivRHS));
-
- // 0 - (C << X) -> (-C << X)
- if (Op1I->getOpcode() == Instruction::Shl)
- if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
- if (CSI->isZero())
- if (Value *ShlLHSNeg = dyn_castNegVal(Op1I->getOperand(0)))
- return BinaryOperator::CreateShl(ShlLHSNeg, Op1I->getOperand(1));
-
- // X - X*C --> X * (1-C)
- ConstantInt *C2 = 0;
- if (dyn_castFoldableMul(Op1I, C2) == Op0) {
- Constant *CP1 =
- ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
- C2);
- return BinaryOperator::CreateMul(Op0, CP1);
- }
+ // X - X<<C --> X * (1-(1<<C))
+ if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) {
+ Constant *One = ConstantInt::get(I.getType(), 1);
+ C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI));
+ return BinaryOperator::CreateMul(Op0, C);
}
- }
-
- if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
- if (Op0I->getOpcode() == Instruction::Add) {
- if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X
- return ReplaceInstUsesWith(I, Op0I->getOperand(1));
- else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X
- return ReplaceInstUsesWith(I, Op0I->getOperand(0));
- } else if (Op0I->getOpcode() == Instruction::Sub) {
- if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y
- return BinaryOperator::CreateNeg(Op0I->getOperand(1),
- I.getName());
+
+ // X - A*-B -> X + A*B
+ // X - -A*B -> X + A*B
+ Value *A, *B;
+ if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) ||
+ match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B))))
+ return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
+
+ // X - A*CI -> X + A*-CI
+ // X - CI*A -> X + A*-CI
+ if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) ||
+ match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) {
+ Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI));
+ return BinaryOperator::CreateAdd(Op0, NewMul);
}
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 19a05bf..b6b6b84 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -172,7 +172,9 @@ static Value *getFCmpValue(bool isordered, unsigned code,
case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
- case 7: return ConstantInt::getTrue(LHS->getContext());
+ case 7:
+ if (!isordered) return ConstantInt::getTrue(LHS->getContext());
+ Pred = FCmpInst::FCMP_ORD; break;
}
return Builder->CreateFCmp(Pred, LHS, RHS);
}
@@ -207,15 +209,26 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
}
break;
case Instruction::Or:
- if (Together == AndRHS) // (X | C) & C --> C
- return ReplaceInstUsesWith(TheAnd, AndRHS);
-
- if (Op->hasOneUse() && Together != OpRHS) {
- // (X | C1) & C2 --> (X | (C1&C2)) & C2
- Value *Or = Builder->CreateOr(X, Together);
- Or->takeName(Op);
- return BinaryOperator::CreateAnd(Or, AndRHS);
+ if (Op->hasOneUse()){
+ if (Together != OpRHS) {
+ // (X | C1) & C2 --> (X | (C1&C2)) & C2
+ Value *Or = Builder->CreateOr(X, Together);
+ Or->takeName(Op);
+ return BinaryOperator::CreateAnd(Or, AndRHS);
+ }
+
+ ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
+ if (TogetherCI && !TogetherCI->isZero()){
+ // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
+ // NOTE: This reduces the number of bits set in the & mask, which
+ // can expose opportunities for store narrowing.
+ Together = ConstantExpr::getXor(AndRHS, Together);
+ Value *And = Builder->CreateAnd(X, Together);
+ And->takeName(Op);
+ return BinaryOperator::CreateOr(And, OpRHS);
+ }
}
+
break;
case Instruction::Add:
if (Op->hasOneUse()) {
@@ -261,10 +274,11 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
AndRHS->getValue() & ShlMask);
- if (CI->getValue() == ShlMask) {
- // Masking out bits that the shift already masks
+ if (CI->getValue() == ShlMask)
+ // Masking out bits that the shift already masks.
return ReplaceInstUsesWith(TheAnd, Op); // No need for the and.
- } else if (CI != AndRHS) { // Reducing bits set in and.
+
+ if (CI != AndRHS) { // Reducing bits set in and.
TheAnd.setOperand(1, CI);
return &TheAnd;
}
@@ -281,10 +295,11 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
ConstantInt *CI = ConstantInt::get(Op->getContext(),
AndRHS->getValue() & ShrMask);
- if (CI->getValue() == ShrMask) {
- // Masking out bits that the shift already masks.
+ if (CI->getValue() == ShrMask)
+ // Masking out bits that the shift already masks.
return ReplaceInstUsesWith(TheAnd, Op);
- } else if (CI != AndRHS) {
+
+ if (CI != AndRHS) {
TheAnd.setOperand(1, CI); // Reduce bits set in and cst.
return &TheAnd;
}
@@ -434,6 +449,270 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
}
+/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
+/// One of A and B is considered the mask, the other the value. This is
+/// described as the "AMask" or "BMask" part of the enum. If the enum
+/// contains only "Mask", then both A and B can be considered masks.
+/// If A is the mask, then it was proven, that (A & C) == C. This
+/// is trivial if C == A, or C == 0. If both A and C are constants, this
+/// proof is also easy.
+/// For the following explanations we assume that A is the mask.
+/// The part "AllOnes" declares, that the comparison is true only
+/// if (A & B) == A, or all bits of A are set in B.
+/// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
+/// The part "AllZeroes" declares, that the comparison is true only
+/// if (A & B) == 0, or all bits of A are cleared in B.
+/// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
+/// The part "Mixed" declares, that (A & B) == C and C might or might not
+/// contain any number of one bits and zero bits.
+/// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
+/// The Part "Not" means, that in above descriptions "==" should be replaced
+/// by "!=".
+/// Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes
+/// If the mask A contains a single bit, then the following is equivalent:
+/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
+/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
+enum MaskedICmpType {
+ FoldMskICmp_AMask_AllOnes = 1,
+ FoldMskICmp_AMask_NotAllOnes = 2,
+ FoldMskICmp_BMask_AllOnes = 4,
+ FoldMskICmp_BMask_NotAllOnes = 8,
+ FoldMskICmp_Mask_AllZeroes = 16,
+ FoldMskICmp_Mask_NotAllZeroes = 32,
+ FoldMskICmp_AMask_Mixed = 64,
+ FoldMskICmp_AMask_NotMixed = 128,
+ FoldMskICmp_BMask_Mixed = 256,
+ FoldMskICmp_BMask_NotMixed = 512
+};
+
+/// return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies
+static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
+ ICmpInst::Predicate SCC)
+{
+ ConstantInt *ACst = dyn_cast<ConstantInt>(A);
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
+ bool icmp_abit = (ACst != 0 && !ACst->isZero() &&
+ ACst->getValue().isPowerOf2());
+ bool icmp_bbit = (BCst != 0 && !BCst->isZero() &&
+ BCst->getValue().isPowerOf2());
+ unsigned result = 0;
+ if (CCst != 0 && CCst->isZero()) {
+ // if C is zero, then both A and B qualify as mask
+ result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_AMask_Mixed |
+ FoldMskICmp_BMask_Mixed)
+ : (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_AMask_NotMixed |
+ FoldMskICmp_BMask_NotMixed));
+ if (icmp_abit)
+ result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
+ FoldMskICmp_AMask_NotMixed)
+ : (FoldMskICmp_AMask_AllOnes |
+ FoldMskICmp_AMask_Mixed));
+ if (icmp_bbit)
+ result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_BMask_NotMixed)
+ : (FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_BMask_Mixed));
+ return result;
+ }
+ if (A == C) {
+ result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes |
+ FoldMskICmp_AMask_Mixed)
+ : (FoldMskICmp_AMask_NotAllOnes |
+ FoldMskICmp_AMask_NotMixed));
+ if (icmp_abit)
+ result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_AMask_NotMixed)
+ : (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_AMask_Mixed));
+ }
+ else if (ACst != 0 && CCst != 0 &&
+ ConstantExpr::getAnd(ACst, CCst) == CCst) {
+ result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
+ : FoldMskICmp_AMask_NotMixed);
+ }
+ if (B == C)
+ {
+ result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_BMask_Mixed)
+ : (FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_BMask_NotMixed));
+ if (icmp_bbit)
+ result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+ FoldMskICmp_BMask_NotMixed)
+ : (FoldMskICmp_Mask_AllZeroes |
+ FoldMskICmp_BMask_Mixed));
+ }
+ else if (BCst != 0 && CCst != 0 &&
+ ConstantExpr::getAnd(BCst, CCst) == CCst) {
+ result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
+ : FoldMskICmp_BMask_NotMixed);
+ }
+ return result;
+}
+
+/// foldLogOpOfMaskedICmpsHelper:
+/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy
+static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
+ Value*& B, Value*& C,
+ Value*& D, Value*& E,
+ ICmpInst *LHS, ICmpInst *RHS) {
+ ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0;
+ if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0;
+ if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
+ // vectors are not (yet?) supported
+ if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
+
+ // Here comes the tricky part:
+ // LHS might be of the form L11 & L12 == X, X == L21 & L22,
+ // and L11 & L12 == L21 & L22. The same goes for RHS.
+ // Now we must find those components L** and R**, that are equal, so
+ // that we can extract the parameters A, B, C, D, and E for the canonical
+ // above.
+ Value *L1 = LHS->getOperand(0);
+ Value *L2 = LHS->getOperand(1);
+ Value *L11,*L12,*L21,*L22;
+ if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+ if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+ L21 = L22 = 0;
+ }
+ else {
+ if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
+ return 0;
+ std::swap(L1, L2);
+ L21 = L22 = 0;
+ }
+
+ Value *R1 = RHS->getOperand(0);
+ Value *R2 = RHS->getOperand(1);
+ Value *R11,*R12;
+ bool ok = false;
+ if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+ A = R11; D = R12; E = R2; ok = true;
+ }
+ else
+ if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+ A = R12; D = R11; E = R2; ok = true;
+ }
+ }
+ if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+ A = R11; D = R12; E = R1; ok = true;
+ }
+ else
+ if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+ A = R12; D = R11; E = R1; ok = true;
+ }
+ else
+ return 0;
+ }
+ if (!ok)
+ return 0;
+
+ if (L11 == A) {
+ B = L12; C = L2;
+ }
+ else if (L12 == A) {
+ B = L11; C = L2;
+ }
+ else if (L21 == A) {
+ B = L22; C = L1;
+ }
+ else if (L22 == A) {
+ B = L21; C = L1;
+ }
+
+ unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC);
+ unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC);
+ return left_type & right_type;
+}
+/// foldLogOpOfMaskedICmps:
+/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y)
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
+ ICmpInst::Predicate NEWCC,
+ llvm::InstCombiner::BuilderTy* Builder) {
+ Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
+ unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS);
+ if (mask == 0) return 0;
+
+ if (NEWCC == ICmpInst::ICMP_NE)
+ mask >>= 1; // treat "Not"-states as normal states
+
+ if (mask & FoldMskICmp_Mask_AllZeroes) {
+ // (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
+ // -> (icmp eq (A & (B|D)), 0)
+ Value* newOr = Builder->CreateOr(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newOr);
+ // we can't use C as zero, because we might actually handle
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // with B and D, having a single bit set
+ Value* zero = Constant::getNullValue(A->getType());
+ return Builder->CreateICmp(NEWCC, newAnd, zero);
+ }
+ else if (mask & FoldMskICmp_BMask_AllOnes) {
+ // (icmp eq (A & B), B) & (icmp eq (A & D), D)
+ // -> (icmp eq (A & (B|D)), (B|D))
+ Value* newOr = Builder->CreateOr(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newOr);
+ return Builder->CreateICmp(NEWCC, newAnd, newOr);
+ }
+ else if (mask & FoldMskICmp_AMask_AllOnes) {
+ // (icmp eq (A & B), A) & (icmp eq (A & D), A)
+ // -> (icmp eq (A & (B&D)), A)
+ Value* newAnd1 = Builder->CreateAnd(B, D);
+ Value* newAnd = Builder->CreateAnd(A, newAnd1);
+ return Builder->CreateICmp(NEWCC, newAnd, A);
+ }
+ else if (mask & FoldMskICmp_BMask_Mixed) {
+ // (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ // We already know that B & C == C && D & E == E.
+ // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
+ // C and E, which are shared by both the mask B and the mask D, don't
+ // contradict, then we can transform to
+ // -> (icmp eq (A & (B|D)), (C|E))
+ // Currently, we only handle the case of B, C, D, and E being constant.
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ if (BCst == 0) return 0;
+ ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+ if (DCst == 0) return 0;
+ // we can't simply use C and E, because we might actually handle
+ // (icmp ne (A & B), B) & (icmp eq (A & D), D)
+ // with B and D, having a single bit set
+
+ ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ if (CCst == 0) return 0;
+ if (LHS->getPredicate() != NEWCC)
+ CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
+ ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+ if (ECst == 0) return 0;
+ if (RHS->getPredicate() != NEWCC)
+ ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
+ ConstantInt* MCst = dyn_cast<ConstantInt>(
+ ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst),
+ ConstantExpr::getXor(CCst, ECst)) );
+ // if there is a conflict we should actually return a false for the
+ // whole construct
+ if (!MCst->isZero())
+ return 0;
+ Value *newOr1 = Builder->CreateOr(B, D);
+ Value *newOr2 = ConstantExpr::getOr(CCst, ECst);
+ Value *newAnd = Builder->CreateAnd(A, newOr1);
+ return Builder->CreateICmp(NEWCC, newAnd, newOr2);
+ }
+ return 0;
+}
+
/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -451,6 +730,10 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
return getICmpValue(isSigned, Code, Op0, Op1, Builder);
}
}
+
+ // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+ return V;
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
@@ -472,22 +755,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *NewOr = Builder->CreateOr(Val, Val2);
return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
}
-
- // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) -->
- // (icmp eq (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT
- if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
- Value *Op1 = 0, *Op2 = 0;
- ConstantInt *CI1 = 0, *CI2 = 0;
- if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) &&
- match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) {
- if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() &&
- CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) {
- Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
- Value *NewAnd = Builder->CreateAnd(Op1, ConstOr);
- return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr);
- }
- }
- }
}
// From here on, we only handle:
@@ -712,12 +979,16 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (Value *V = SimplifyAndInst(Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
+ // (A|B)&(A|C) -> A|(B&C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
@@ -725,7 +996,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
const APInt &AndRHSMask = AndRHS->getValue();
- APInt NotAndRHS(~AndRHSMask);
// Optimize a variety of ((val OP C1) & C2) combinations...
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
@@ -734,10 +1004,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
switch (Op0I->getOpcode()) {
default: break;
case Instruction::Xor:
- case Instruction::Or:
+ case Instruction::Or: {
// If the mask is only needed on one incoming arm, push it up.
if (!Op0I->hasOneUse()) break;
+ APInt NotAndRHS(~AndRHSMask);
if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
// Not masking anything out for the LHS, move to RHS.
Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
@@ -753,6 +1024,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
break;
+ }
case Instruction::Add:
// ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
// ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
@@ -772,14 +1044,12 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
// has 1's for all bits that the subtraction with A might affect.
- if (Op0I->hasOneUse()) {
+ if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
uint32_t BitWidth = AndRHSMask.getBitWidth();
uint32_t Zeros = AndRHSMask.countLeadingZeros();
APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
- ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);
- if (!(A && A->isZero()) && // avoid infinite recursion.
- MaskedValueIsZero(Op0LHS, Mask)) {
+ if (MaskedValueIsZero(Op0LHS, Mask)) {
Value *NewNeg = Builder->CreateNeg(Op0RHS);
return BinaryOperator::CreateAnd(NewNeg, AndRHS);
}
@@ -797,39 +1067,25 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
break;
}
-
+
if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
return Res;
- } else if (CastInst *CI = dyn_cast<CastInst>(Op0)) {
- // If this is an integer truncation or change from signed-to-unsigned, and
- // if the source is an and/or with immediate, transform it. This
- // frequently occurs for bitfield accesses.
- if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
- if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
- CastOp->getNumOperands() == 2)
- if (ConstantInt *AndCI =dyn_cast<ConstantInt>(CastOp->getOperand(1))){
- if (CastOp->getOpcode() == Instruction::And) {
- // Change: and (cast (and X, C1) to T), C2
- // into : and (cast X to T), trunc_or_bitcast(C1)&C2
- // This will fold the two constants together, which may allow
- // other simplifications.
- Value *NewCast = Builder->CreateTruncOrBitCast(
- CastOp->getOperand(0), I.getType(),
- CastOp->getName()+".shrunk");
- // trunc_or_bitcast(C1)&C2
- Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
- C3 = ConstantExpr::getAnd(C3, AndRHS);
- return BinaryOperator::CreateAnd(NewCast, C3);
- } else if (CastOp->getOpcode() == Instruction::Or) {
- // Change: and (cast (or X, C1) to T), C2
- // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2
- Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
- if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)
- // trunc(C1)&C2
- return ReplaceInstUsesWith(I, AndRHS);
- }
- }
+ }
+
+ // If this is an integer truncation, and if the source is an 'and' with
+ // immediate, transform it. This frequently occurs for bitfield accesses.
+ {
+ Value *X = 0; ConstantInt *YC = 0;
+ if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
+ // Change: and (trunc (and X, YC) to T), C2
+ // into : and (trunc X to T), trunc(YC) & C2
+ // This will fold the two constants together, which may allow
+ // other simplifications.
+ Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
+ Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
+ C3 = ConstantExpr::getAnd(C3, AndRHS);
+ return BinaryOperator::CreateAnd(NewCast, C3);
}
}
@@ -851,7 +1107,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
I.getName()+".demorgan");
return BinaryOperator::CreateNot(Or);
}
-
+
{
Value *A = 0, *B = 0, *C = 0, *D = 0;
// (A|B) & ~(A&B) -> A^B
@@ -884,7 +1140,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
cast<BinaryOperator>(Op1)->swapOperands();
std::swap(A, B);
}
- if (A == Op0) // A&(A^B) -> A & ~B
+ // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if
+ // A is originally -1 (or a vector of -1 and undefs), then we enter
+ // an endless loop. By checking that A is non-constant we ensure that
+ // we will never get to the loop.
+ if (A == Op0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
}
@@ -1160,7 +1420,12 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
return getICmpValue(isSigned, Code, Op0, Op1, Builder);
}
}
-
+
+ // handle (roughly):
+ // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+ return V;
+
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
@@ -1173,24 +1438,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *NewOr = Builder->CreateOr(Val, Val2);
return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
}
-
- // (icmp eq (A & C1), 0) | (icmp eq (A & C2), 0) -->
- // (icmp ne (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT
- if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
- Value *Op1 = 0, *Op2 = 0;
- ConstantInt *CI1 = 0, *CI2 = 0;
- if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) &&
- match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) {
- if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() &&
- CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) {
- Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
- Value *NewAnd = Builder->CreateAnd(Op1, ConstOr);
- return Builder->CreateICmp(ICmpInst::ICMP_NE, NewAnd, ConstOr);
- }
- }
- }
}
-
+
+ // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
+ // iff C2 + CA == C1.
+ if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) {
+ ConstantInt *AddCst;
+ if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst))))
+ if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue())
+ return Builder->CreateICmpULE(Val, LHSCst);
+ }
+
// From here on, we only handle:
// (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
if (Val != Val2) return 0;
@@ -1429,12 +1687,16 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
}
Instruction *InstCombiner::visitOr(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (Value *V = SimplifyOrInst(Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
+ // (A&B)|(A&C) -> A&(B|C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
@@ -1481,8 +1743,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
if (match(Op0, m_Or(m_Value(), m_Value())) ||
match(Op1, m_Or(m_Value(), m_Value())) ||
- (match(Op0, m_Shift(m_Value(), m_Value())) &&
- match(Op1, m_Shift(m_Value(), m_Value())))) {
+ (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+ match(Op1, m_LogicalShift(m_Value(), m_Value())))) {
if (Instruction *BSwap = MatchBSwap(I))
return BSwap;
}
@@ -1509,7 +1771,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *C = 0, *D = 0;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
match(Op1, m_And(m_Value(B), m_Value(D)))) {
- Value *V1 = 0, *V2 = 0, *V3 = 0;
+ Value *V1 = 0, *V2 = 0;
C1 = dyn_cast<ConstantInt>(C);
C2 = dyn_cast<ConstantInt>(D);
if (C1 && C2) { // (A & C1)|(B & C2)
@@ -1567,25 +1829,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
}
}
-
- // Check to see if we have any common things being and'ed. If so, find the
- // terms for V1 & (V2|V3).
- if (Op0->hasOneUse() || Op1->hasOneUse()) {
- V1 = 0;
- if (A == B) // (A & C)|(A & D) == A & (C|D)
- V1 = A, V2 = C, V3 = D;
- else if (A == D) // (A & C)|(B & A) == A & (B|C)
- V1 = A, V2 = B, V3 = C;
- else if (C == B) // (A & C)|(C & D) == C & (A|D)
- V1 = C, V2 = A, V3 = D;
- else if (C == D) // (A & C)|(B & C) == C & (A|B)
- V1 = C, V2 = A, V3 = B;
-
- if (V1) {
- Value *Or = Builder->CreateOr(V2, V3, "tmp");
- return BinaryOperator::CreateAnd(V1, Or);
- }
- }
// (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants.
// Don't do this for vector select idioms, the code generator doesn't handle
@@ -1667,65 +1910,69 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// fold (or (cast A), (cast B)) -> (cast (or A, B))
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
- if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
- if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
- const Type *SrcTy = Op0C->getOperand(0)->getType();
- if (SrcTy == Op1C->getOperand(0)->getType() &&
- SrcTy->isIntOrIntVectorTy()) {
- Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
-
- if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
- // Only do this if the casts both really cause code to be
- // generated.
- ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
- ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
- Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
- return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
- }
-
- // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
- // cast is otherwise not optimizable. This happens for vector sexts.
- if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
- if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
- if (Value *Res = FoldOrOfICmps(LHS, RHS))
- return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
-
- // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
- // cast is otherwise not optimizable. This happens for vector sexts.
- if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
- if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
- if (Value *Res = FoldOrOfFCmps(LHS, RHS))
- return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+ CastInst *Op1C = dyn_cast<CastInst>(Op1);
+ if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
+ const Type *SrcTy = Op0C->getOperand(0)->getType();
+ if (SrcTy == Op1C->getOperand(0)->getType() &&
+ SrcTy->isIntOrIntVectorTy()) {
+ Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+ if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
+ // Only do this if the casts both really cause code to be
+ // generated.
+ ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+ ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+ Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
+ return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
}
+
+ // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+ if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+ if (Value *Res = FoldOrOfICmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+
+ // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+ // cast is otherwise not optimizable. This happens for vector sexts.
+ if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+ if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+ if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+ return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
}
+ }
+ }
+
+ // Note: If we've gotten to the point of visiting the outer OR, then the
+ // inner one couldn't be simplified. If it was a constant, then it won't
+ // be simplified by a later pass either, so we try swapping the inner/outer
+ // ORs in the hopes that we'll be able to simplify it this way.
+ // (X|C) | V --> (X|V) | C
+ if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
+ match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
+ Value *Inner = Builder->CreateOr(A, Op1);
+ Inner->takeName(Op0);
+ return BinaryOperator::CreateOr(Inner, C1);
}
return Changed ? &I : 0;
}
Instruction *InstCombiner::visitXor(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (isa<UndefValue>(Op1)) {
- if (isa<UndefValue>(Op0))
- // Handle undef ^ undef -> 0 special case. This is a common
- // idiom (misuse).
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef
- }
+ if (Value *V = SimplifyXorInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A&B)^(A&C) -> A&(B^C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
- // xor X, X = 0
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
- if (I.getType()->isVectorTy())
- if (isa<ConstantAggregateZero>(Op1))
- return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X
// Is this a ~ operation?
if (Value *NotOp = dyn_castNotVal(&I)) {
@@ -1844,15 +2091,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
return NV;
}
- if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1
- if (X == Op1)
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
- if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1
- if (X == Op0)
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
-
BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
if (Op1I) {
Value *A, *B;
@@ -1865,10 +2103,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
I.swapOperands(); // Simplified below.
std::swap(Op0, Op1);
}
- } else if (match(Op1I, m_Xor(m_Specific(Op0), m_Value(B)))) {
- return ReplaceInstUsesWith(I, B); // A^(A^B) == B
- } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {
- return ReplaceInstUsesWith(I, A); // A^(B^A) == B
} else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
Op1I->hasOneUse()){
if (A == Op0) { // A^(A&B) -> A^(B&A)
@@ -1891,10 +2125,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
std::swap(A, B);
if (B == Op1) // (A|B)^B == A & ~B
return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
- } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {
- return ReplaceInstUsesWith(I, B); // (A^B)^A == B
- } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {
- return ReplaceInstUsesWith(I, A); // (B^A)^A == B
} else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
Op0I->hasOneUse()){
if (A == Op1) // (A&B)^A -> (B&A)^A
@@ -1932,29 +2162,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if ((A == C && B == D) || (A == D && B == C))
return BinaryOperator::CreateXor(A, B);
}
-
- // (A & B)^(C & D)
- if ((Op0I->hasOneUse() || Op1I->hasOneUse()) &&
- match(Op0I, m_And(m_Value(A), m_Value(B))) &&
- match(Op1I, m_And(m_Value(C), m_Value(D)))) {
- // (X & Y)^(X & Y) -> (Y^Z) & X
- Value *X = 0, *Y = 0, *Z = 0;
- if (A == C)
- X = A, Y = B, Z = D;
- else if (A == D)
- X = A, Y = B, Z = C;
- else if (B == C)
- X = B, Y = A, Z = D;
- else if (B == D)
- X = B, Y = A, Z = C;
-
- if (X) {
- Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());
- return BinaryOperator::CreateAnd(NewOp, X);
- }
- }
}
-
+
// (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0ebe3b4..8449f7b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -17,6 +17,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
/// getPromotedType - Return the specified type promoted as it would be to pass
@@ -29,100 +30,10 @@ static const Type *getPromotedType(const Type *Ty) {
return Ty;
}
-/// EnforceKnownAlignment - If the specified pointer points to an object that
-/// we control, modify the object's alignment to PrefAlign. This isn't
-/// often possible though. If alignment is important, a more reliable approach
-/// is to simply align all global variables and allocation instructions to
-/// their preferred alignment from the beginning.
-///
-static unsigned EnforceKnownAlignment(Value *V,
- unsigned Align, unsigned PrefAlign) {
-
- User *U = dyn_cast<User>(V);
- if (!U) return Align;
-
- switch (Operator::getOpcode(U)) {
- default: break;
- case Instruction::BitCast:
- return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
- case Instruction::GetElementPtr: {
- // If all indexes are zero, it is just the alignment of the base pointer.
- bool AllZeroOperands = true;
- for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
- if (!isa<Constant>(*i) ||
- !cast<Constant>(*i)->isNullValue()) {
- AllZeroOperands = false;
- break;
- }
-
- if (AllZeroOperands) {
- // Treat this like a bitcast.
- return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
- }
- return Align;
- }
- case Instruction::Alloca: {
- AllocaInst *AI = cast<AllocaInst>(V);
- // If there is a requested alignment and if this is an alloca, round up.
- if (AI->getAlignment() >= PrefAlign)
- return AI->getAlignment();
- AI->setAlignment(PrefAlign);
- return PrefAlign;
- }
- }
-
- if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- // If there is a large requested alignment and we can, bump up the alignment
- // of the global.
- if (GV->isDeclaration()) return Align;
-
- if (GV->getAlignment() >= PrefAlign)
- return GV->getAlignment();
- // We can only increase the alignment of the global if it has no alignment
- // specified or if it is not assigned a section. If it is assigned a
- // section, the global could be densely packed with other objects in the
- // section, increasing the alignment could cause padding issues.
- if (!GV->hasSection() || GV->getAlignment() == 0)
- GV->setAlignment(PrefAlign);
- return GV->getAlignment();
- }
-
- return Align;
-}
-
-/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
-/// we can determine, return it, otherwise return 0. If PrefAlign is specified,
-/// and it is more than the alignment of the ultimate object, see if we can
-/// increase the alignment of the ultimate object, making this check succeed.
-unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
- unsigned PrefAlign) {
- assert(V->getType()->isPointerTy() &&
- "GetOrEnforceKnownAlignment expects a pointer!");
- unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
- APInt Mask = APInt::getAllOnesValue(BitWidth);
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
- unsigned TrailZ = KnownZero.countTrailingOnes();
-
- // Avoid trouble with rediculously large TrailZ values, such as
- // those computed from a null pointer.
- TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
-
- unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
-
- // LLVM doesn't support alignments larger than this currently.
- Align = std::min(Align, +Value::MaximumAlignment);
-
- if (PrefAlign > Align)
- Align = EnforceKnownAlignment(V, Align, PrefAlign);
-
- // We don't need to make any adjustment.
- return Align;
-}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
- unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0));
- unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1));
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD);
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
@@ -211,7 +122,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
}
Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
- unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
+ unsigned Alignment = getKnownAlignment(MI->getDest(), TD);
if (MI->getAlignment() < Alignment) {
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
Alignment, false));
@@ -234,7 +145,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
const Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
Value *Dest = MI->getDest();
- Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));
+ unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
+ Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
+ Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
// Alignment 0 is identity for alignment 1 for memset, but not store.
if (Alignment == 0) Alignment = 1;
@@ -280,7 +193,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// memmove/cpy/set of zero bytes is a noop.
if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
- if (NumBytes->isNullValue()) return EraseInstFromFunction(CI);
+ if (NumBytes->isNullValue())
+ return EraseInstFromFunction(CI);
if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
if (CI->getZExtValue() == 1) {
@@ -289,6 +203,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// alignment is sufficient.
}
}
+
+ // No other transformations apply to volatile transfers.
+ if (MI->isVolatile())
+ return 0;
// If we have a memmove and the source operation is a constant global,
// then the source and dest pointers can't alias, so we can change this
@@ -332,82 +250,73 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (!TD) break;
const Type *ReturnTy = CI.getType();
- bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+ uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL;
// Get to the real allocated thing and offset as fast as possible.
Value *Op1 = II->getArgOperand(0)->stripPointerCasts();
-
+
+ uint64_t Offset = 0;
+ uint64_t Size = -1ULL;
+
+ // Try to look through constant GEPs.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) {
+ if (!GEP->hasAllConstantIndices()) break;
+
+ // Get the current byte offset into the thing. Use the original
+ // operand in case we're looking through a bitcast.
+ SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+ Offset = TD->getIndexedOffset(GEP->getPointerOperandType(),
+ Ops.data(), Ops.size());
+
+ Op1 = GEP->getPointerOperand()->stripPointerCasts();
+
+ // Make sure we're not a constant offset from an external
+ // global.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1))
+ if (!GV->hasDefinitiveInitializer()) break;
+ }
+
// If we've stripped down to a single global variable that we
// can know the size of then just return that.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
if (GV->hasDefinitiveInitializer()) {
Constant *C = GV->getInitializer();
- uint64_t GlobalSize = TD->getTypeAllocSize(C->getType());
- return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize));
+ Size = TD->getTypeAllocSize(C->getType());
} else {
// Can't determine size of the GV.
- Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+ Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow);
return ReplaceInstUsesWith(CI, RetVal);
}
} else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
// Get alloca size.
if (AI->getAllocatedType()->isSized()) {
- uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+ Size = TD->getTypeAllocSize(AI->getAllocatedType());
if (AI->isArrayAllocation()) {
const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize());
if (!C) break;
- AllocaSize *= C->getZExtValue();
+ Size *= C->getZExtValue();
}
- return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize));
}
} else if (CallInst *MI = extractMallocCall(Op1)) {
+ // Get allocation size.
const Type* MallocType = getMallocAllocatedType(MI);
- // Get alloca size.
- if (MallocType && MallocType->isSized()) {
- if (Value *NElems = getMallocArraySize(MI, TD, true)) {
+ if (MallocType && MallocType->isSized())
+ if (Value *NElems = getMallocArraySize(MI, TD, true))
if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
- return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy,
- (NElements->getZExtValue() * TD->getTypeAllocSize(MallocType))));
- }
- }
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) {
- // Only handle constant GEPs here.
- if (CE->getOpcode() != Instruction::GetElementPtr) break;
- GEPOperator *GEP = cast<GEPOperator>(CE);
-
- // Make sure we're not a constant offset from an external
- // global.
- Value *Operand = GEP->getPointerOperand();
- Operand = Operand->stripPointerCasts();
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand))
- if (!GV->hasDefinitiveInitializer()) break;
-
- // Get what we're pointing to and its size.
- const PointerType *BaseType =
- cast<PointerType>(Operand->getType());
- uint64_t Size = TD->getTypeAllocSize(BaseType->getElementType());
-
- // Get the current byte offset into the thing. Use the original
- // operand in case we're looking through a bitcast.
- SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end());
- const PointerType *OffsetType =
- cast<PointerType>(GEP->getPointerOperand()->getType());
- uint64_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size());
-
- if (Size < Offset) {
- // Out of bound reference? Negative index normalized to large
- // index? Just return "I don't know".
- Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
- return ReplaceInstUsesWith(CI, RetVal);
- }
-
- Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset);
- return ReplaceInstUsesWith(CI, RetVal);
- }
+ Size = NElements->getZExtValue() * TD->getTypeAllocSize(MallocType);
+ }
// Do not return "I don't know" here. Later optimization passes could
// make it possible to evaluate objectsize to a constant.
- break;
+ if (Size == -1ULL)
+ break;
+
+ if (Size < Offset) {
+ // Out of bound reference? Negative index normalized to large
+ // index? Just return "I don't know".
+ return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow));
+ }
+ return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset));
}
case Intrinsic::bswap:
// bswap(bswap(x)) -> x
@@ -604,7 +513,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_sse2_loadu_dq:
// Turn PPC lvx -> load if the pointer is known aligned.
// Turn X86 loadups -> load if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
return new LoadInst(Ptr);
@@ -613,7 +522,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) {
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
const Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
@@ -624,16 +533,23 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
const Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(1)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
return new StoreInst(II->getArgOperand(1), Ptr);
}
break;
-
- case Intrinsic::x86_sse_cvttss2si: {
- // These intrinsics only demands the 0th element of its input vector. If
+
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64: {
+ // These intrinsics only demand the 0th element of their input vectors. If
// we can simplify the input based on that, do so now.
unsigned VWidth =
cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
@@ -646,7 +562,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
}
-
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
@@ -697,6 +613,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD);
+ unsigned AlignArg = II->getNumArgOperands() - 1;
+ ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
+ if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
+ II->setArgOperand(AlignArg,
+ ConstantInt::get(Type::getInt32Ty(II->getContext()),
+ MemAlign, false));
+ return II;
+ }
+ break;
+ }
+
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
@@ -783,6 +725,8 @@ protected:
NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
}
bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+ if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+ return true;
if (ConstantInt *SizeCI =
dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
if (SizeCI->isAllOnesValue())
@@ -819,11 +763,11 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
Instruction *InstCombiner::visitCallSite(CallSite CS) {
bool Changed = false;
- // If the callee is a constexpr cast of a function, attempt to move the cast
- // to the arguments of the call/invoke.
- if (transformConstExprCastCall(CS)) return 0;
-
+ // If the callee is a pointer to a function, attempt to move any casts to the
+ // arguments of the call/invoke.
Value *Callee = CS.getCalledValue();
+ if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
+ return 0;
if (Function *CalleeF = dyn_cast<Function>(Callee))
// If the call and callee calling conventions don't match, this call must
@@ -917,12 +861,10 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// attempt to move the cast to the arguments of the call/invoke.
//
bool InstCombiner::transformConstExprCastCall(CallSite CS) {
- if (!isa<ConstantExpr>(CS.getCalledValue())) return false;
- ConstantExpr *CE = cast<ConstantExpr>(CS.getCalledValue());
- if (CE->getOpcode() != Instruction::BitCast ||
- !isa<Function>(CE->getOperand(0)))
+ Function *Callee =
+ dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (Callee == 0)
return false;
- Function *Callee = cast<Function>(CE->getOperand(0));
Instruction *Caller = CS.getInstruction();
const AttrListPtr &CallerPAL = CS.getAttributes();
@@ -984,9 +926,22 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!CastInst::isCastable(ActTy, ParamTy))
return false; // Cannot transform this parameter value.
- if (CallerPAL.getParamAttributes(i + 1)
- & Attribute::typeIncompatible(ParamTy))
+ unsigned Attrs = CallerPAL.getParamAttributes(i + 1);
+ if (Attrs & Attribute::typeIncompatible(ParamTy))
return false; // Attribute not compatible with transformed value.
+
+ // If the parameter is passed as a byval argument, then we have to have a
+ // sized type and the sized type has to have the same size as the old type.
+ if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
+ const PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
+ if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
+ return false;
+
+ const Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+ if (TD->getTypeAllocSize(CurElTy) !=
+ TD->getTypeAllocSize(ParamPTy->getElementType()))
+ return false;
+ }
// Converting from one pointer type to another or between a pointer and an
// integer of the same size is safe even if we do not have a body.
@@ -1109,8 +1064,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Value *NV = NC;
if (OldRetTy != NV->getType() && !Caller->use_empty()) {
if (!NV->getType()->isVoidTy()) {
- Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false,
- OldRetTy, false);
+ Instruction::CastOps opcode =
+ CastInst::getCastOpcode(NC, false, OldRetTy, false);
NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
// If this is an invoke instruction, we should insert it after the first
@@ -1119,7 +1074,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
InsertNewInstBefore(NC, *I);
} else {
- // Otherwise, it's a call, just insert cast right after the call instr
+ // Otherwise, it's a call, just insert cast right after the call.
InsertNewInstBefore(NC, *Caller);
}
Worklist.AddUsersToWorkList(*Caller);
@@ -1128,7 +1083,6 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
}
}
-
if (!Caller->use_empty())
Caller->replaceAllUsesWith(NV);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 79a9b09..b432641 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -462,8 +462,8 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
Value *A = 0; ConstantInt *Cst = 0;
- if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) &&
- Src->hasOneUse()) {
+ if (Src->hasOneUse() &&
+ match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) {
// We have three types to worry about here, the type of A, the source of
// the truncate (MidSize), and the destination of the truncate. We know that
// ASize < MidSize and MidSize > ResultSize, but don't know the relation
@@ -482,6 +482,16 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Shift->takeName(Src);
return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
}
+
+ // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
+ // type isn't non-native.
+ if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
+ ShouldChangeType(Src->getType(), CI.getType()) &&
+ match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
+ Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr");
+ return BinaryOperator::CreateAnd(NewTrunc,
+ ConstantExpr::getTrunc(Cst, CI.getType()));
+ }
return 0;
}
@@ -1019,8 +1029,22 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
}
}
}
-
-
+
+ // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed.
+ if (const VectorType *VTy = dyn_cast<VectorType>(DestTy)) {
+ ICmpInst::Predicate Pred; Value *CmpLHS;
+ if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_Zero()))) {
+ if (Pred == ICmpInst::ICMP_SLT && CmpLHS->getType() == DestTy) {
+ const Type *EltTy = VTy->getElementType();
+
+ // splat the shift constant to a constant vector.
+ Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
+ Value *In = Builder->CreateAShr(CmpLHS, VSh,CmpLHS->getName()+".lobit");
+ return ReplaceInstUsesWith(CI, In);
+ }
+ }
+ }
+
// If the input is a shl/ashr pair of a same constant, then this is a sign
// extension from a smaller value. If we could trust arbitrary bitwidth
// integers, we could turn this into a truncate to the smaller bit and then
@@ -1363,8 +1387,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
ConstantInt::get(Int32Ty, SrcElts));
}
- Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size());
- return new ShuffleVectorInst(InVal, V2, Mask);
+ return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask));
}
static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index d7e2b72..999de34 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -22,13 +22,17 @@
using namespace llvm;
using namespace PatternMatch;
+static ConstantInt *getOne(Constant *C) {
+ return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
+}
+
/// AddOne - Add one to a ConstantInt
static Constant *AddOne(Constant *C) {
return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
}
/// SubOne - Subtract one from a ConstantInt
-static Constant *SubOne(ConstantInt *C) {
- return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
+static Constant *SubOne(Constant *C) {
+ return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
}
static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
@@ -160,8 +164,8 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
Max = KnownOne|UnknownBits;
if (UnknownBits.isNegative()) { // Sign bit is unknown
- Min.set(Min.getBitWidth()-1);
- Max.clear(Max.getBitWidth()-1);
+ Min.setBit(Min.getBitWidth()-1);
+ Max.clearBit(Max.getBitWidth()-1);
}
}
@@ -694,13 +698,6 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
if (Pred == ICmpInst::ICMP_NE)
return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
- // If this is an instruction (as opposed to constantexpr) get NUW/NSW info.
- bool isNUW = false, isNSW = false;
- if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) {
- isNUW = Add->hasNoUnsignedWrap();
- isNSW = Add->hasNoSignedWrap();
- }
-
// From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
// so the values can never be equal. Similiarly for all other "or equals"
// operators.
@@ -709,10 +706,6 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
// (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
- // If this is an NUW add, then this is always false.
- if (isNUW)
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
-
Value *R =
ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
@@ -721,12 +714,8 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+1) >u X --> X <u (0-1) --> X != 255
// (X+2) >u X --> X <u (0-2) --> X <u 254
// (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
- if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
- // If this is an NUW add, then this is always true.
- if (isNUW)
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
- }
unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
ConstantInt *SMax = ConstantInt::get(X->getContext(),
@@ -738,16 +727,8 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1
// (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
// (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
- if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
- // If this is an NSW add, then we have two cases: if the constant is
- // positive, then this is always false, if negative, this is always true.
- if (isNSW) {
- bool isTrue = CI->getValue().isNegative();
- return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
- }
-
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
- }
// (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
// (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
@@ -756,13 +737,6 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
// (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
// (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
- // If this is an NSW add, then we have two cases: if the constant is
- // positive, then this is always true, if negative, this is always false.
- if (isNSW) {
- bool isTrue = !CI->getValue().isNegative();
- return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
- }
-
assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
@@ -782,7 +756,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
// (x /u C1) <u C2. Simply casting the operands and result won't
// work. :( The if statement below tests that condition and bails
- // if it finds it.
+ // if it finds it.
bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
return 0;
@@ -790,9 +764,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
return 0; // The ProdOV computation fails on divide by zero.
if (DivIsSigned && DivRHS->isAllOnesValue())
return 0; // The overflow computation also screws up here
- if (DivRHS->isOne())
- return 0; // Not worth bothering, and eliminates some funny cases
- // with INT_MIN.
+ if (DivRHS->isOne()) {
+ // This eliminates some funny cases with INT_MIN.
+ ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X.
+ return &ICI;
+ }
// Compute Prod = CI * DivRHS. We are essentially solving an equation
// of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
@@ -809,6 +785,10 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// Get the ICmp opcode
ICmpInst::Predicate Pred = ICI.getPredicate();
+ /// If the division is known to be exact, then there is no remainder from the
+ /// divide, so the covered range size is unit, otherwise it is the divisor.
+ ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
+
// Figure out the interval that is being checked. For example, a comparison
// like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
// Compute this interval based on the constants involved and the signedness of
@@ -818,38 +798,43 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
int LoOverflow = 0, HiOverflow = 0;
Constant *LoBound = 0, *HiBound = 0;
-
+
if (!DivIsSigned) { // udiv
// e.g. X/5 op 3 --> [15, 20)
LoBound = Prod;
HiOverflow = LoOverflow = ProdOV;
- if (!HiOverflow)
- HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false);
+ if (!HiOverflow) {
+ // If this is not an exact divide, then many values in the range collapse
+ // to the same result value.
+ HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
+ }
+
} else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
if (CmpRHSV == 0) { // (X / pos) op 0
// Can't overflow. e.g. X/2 op 0 --> [-1, 2)
- LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
- HiBound = DivRHS;
+ LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
+ HiBound = RangeSize;
} else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos
LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
HiOverflow = LoOverflow = ProdOV;
if (!HiOverflow)
- HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true);
+ HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
} else { // (X / pos) op neg
// e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
HiBound = AddOne(Prod);
LoOverflow = HiOverflow = ProdOV ? -1 : 0;
if (!LoOverflow) {
- ConstantInt* DivNeg =
- cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+ ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
- }
+ }
}
} else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
+ if (DivI->isExact())
+ RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
if (CmpRHSV == 0) { // (X / neg) op 0
// e.g. X/-5 op 0 --> [-4, 5)
- LoBound = AddOne(DivRHS);
- HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+ LoBound = AddOne(RangeSize);
+ HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
if (HiBound == DivRHS) { // -INTMIN = INTMIN
HiOverflow = 1; // [INTMIN+1, overflow)
HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN
@@ -859,12 +844,12 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
HiBound = AddOne(Prod);
HiOverflow = LoOverflow = ProdOV ? -1 : 0;
if (!LoOverflow)
- LoOverflow = AddWithOverflow(LoBound, HiBound, DivRHS, true) ? -1 : 0;
+ LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
} else { // (X / neg) op neg
LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
LoOverflow = HiOverflow = ProdOV;
if (!HiOverflow)
- HiOverflow = SubWithOverflow(HiBound, Prod, DivRHS, true);
+ HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
}
// Dividing by a negative swaps the condition. LT <-> GT
@@ -883,9 +868,8 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
if (LoOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
ICmpInst::ICMP_ULT, X, HiBound);
- return ReplaceInstUsesWith(ICI,
- InsertRangeTest(X, LoBound, HiBound, DivIsSigned,
- true));
+ return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+ DivIsSigned, true));
case ICmpInst::ICMP_NE:
if (LoOverflow && HiOverflow)
return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
@@ -908,13 +892,100 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
case ICmpInst::ICMP_SGT:
if (HiOverflow == +1) // High bound greater than input range.
return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
- else if (HiOverflow == -1) // High bound less than input range.
+ if (HiOverflow == -1) // High bound less than input range.
return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
if (Pred == ICmpInst::ICMP_UGT)
return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
- else
- return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+ return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+ }
+}
+
+/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)".
+Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
+ ConstantInt *ShAmt) {
+ const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
+
+ // Check that the shift amount is in range. If not, don't perform
+ // undefined shifts. When the shift is visited it will be
+ // simplified.
+ uint32_t TypeBits = CmpRHSV.getBitWidth();
+ uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+ if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+ return 0;
+
+ if (!ICI.isEquality()) {
+ // If we have an unsigned comparison and an ashr, we can't simplify this.
+ // Similarly for signed comparisons with lshr.
+ if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
+ return 0;
+
+ // Otherwise, all lshr and all exact ashr's are equivalent to a udiv/sdiv by
+ // a power of 2. Since we already have logic to simplify these, transform
+ // to div and then simplify the resultant comparison.
+ if (Shr->getOpcode() == Instruction::AShr &&
+ !Shr->isExact())
+ return 0;
+
+ // Revisit the shift (to delete it).
+ Worklist.Add(Shr);
+
+ Constant *DivCst =
+ ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+
+ Value *Tmp =
+ Shr->getOpcode() == Instruction::AShr ?
+ Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
+ Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
+
+ ICI.setOperand(0, Tmp);
+
+ // If the builder folded the binop, just return it.
+ BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
+ if (TheDiv == 0)
+ return &ICI;
+
+ // Otherwise, fold this div/compare.
+ assert(TheDiv->getOpcode() == Instruction::SDiv ||
+ TheDiv->getOpcode() == Instruction::UDiv);
+
+ Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
+ assert(Res && "This div/cst should have folded!");
+ return Res;
+ }
+
+
+ // If we are comparing against bits always shifted out, the
+ // comparison cannot succeed.
+ APInt Comp = CmpRHSV << ShAmtVal;
+ ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp);
+ if (Shr->getOpcode() == Instruction::LShr)
+ Comp = Comp.lshr(ShAmtVal);
+ else
+ Comp = Comp.ashr(ShAmtVal);
+
+ if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
+ bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+ Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+ IsICMP_NE);
+ return ReplaceInstUsesWith(ICI, Cst);
+ }
+
+ // Otherwise, check to see if the bits shifted out are known to be zero.
+ // If so, we can compare against the unshifted value:
+ // (X & 4) >> 1 == 2 --> (X & 4) == 4.
+ if (Shr->hasOneUse() && Shr->isExact())
+ return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
+
+ if (Shr->hasOneUse()) {
+ // Otherwise strength reduce the shift into an and.
+ APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+ Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
+
+ Value *And = Builder->CreateAnd(Shr->getOperand(0),
+ Mask, Shr->getName()+".mask");
+ return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
}
+ return 0;
}
@@ -939,8 +1010,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// If all the high bits are known, we can do this xform.
if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
// Pull in the high bits from known-ones set.
- APInt NewRHS(RHS->getValue());
- NewRHS.zext(SrcBits);
+ APInt NewRHS = RHS->getValue().zext(SrcBits);
NewRHS |= KnownOne;
return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
ConstantInt::get(ICI.getContext(), NewRHS));
@@ -1022,10 +1092,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
(AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) {
uint32_t BitWidth =
cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
- APInt NewCST = AndCST->getValue();
- NewCST.zext(BitWidth);
- APInt NewCI = RHSV;
- NewCI.zext(BitWidth);
+ APInt NewCST = AndCST->getValue().zext(BitWidth);
+ APInt NewCI = RHSV.zext(BitWidth);
Value *NewAnd =
Builder->CreateAnd(Cast->getOperand(0),
ConstantInt::get(ICI.getContext(), NewCST),
@@ -1145,7 +1213,6 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
// Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
// -> and (icmp eq P, null), (icmp eq Q, null).
-
Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P,
Constant::getNullValue(P->getType()));
Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q,
@@ -1185,6 +1252,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return ReplaceInstUsesWith(ICI, Cst);
}
+ // If the shift is NUW, then it is just shifting out zeros, no need for an
+ // AND.
+ if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap())
+ return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+ ConstantExpr::getLShr(RHS, ShAmt));
+
if (LHSI->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
@@ -1195,8 +1268,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Value *And =
Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
return new ICmpInst(ICI.getPredicate(), And,
- ConstantInt::get(ICI.getContext(),
- RHSV.lshr(ShAmtVal)));
+ ConstantExpr::getLShr(RHS, ShAmt));
}
}
@@ -1205,8 +1277,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (LHSI->hasOneUse() &&
isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
// (X << 31) <s 0 --> (X&1) != 0
- Constant *Mask = ConstantInt::get(ICI.getContext(), APInt(TypeBits, 1) <<
- (TypeBits-ShAmt->getZExtValue()-1));
+ Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(),
+ APInt::getOneBitSet(TypeBits,
+ TypeBits-ShAmt->getZExtValue()-1));
Value *And =
Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
@@ -1216,57 +1289,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI)
- case Instruction::AShr: {
+ case Instruction::AShr:
// Only handle equality comparisons of shift-by-constant.
- ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
- if (!ShAmt || !ICI.isEquality()) break;
-
- // Check that the shift amount is in range. If not, don't perform
- // undefined shifts. When the shift is visited it will be
- // simplified.
- uint32_t TypeBits = RHSV.getBitWidth();
- if (ShAmt->uge(TypeBits))
- break;
-
- uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-
- // If we are comparing against bits always shifted out, the
- // comparison cannot succeed.
- APInt Comp = RHSV << ShAmtVal;
- if (LHSI->getOpcode() == Instruction::LShr)
- Comp = Comp.lshr(ShAmtVal);
- else
- Comp = Comp.ashr(ShAmtVal);
-
- if (Comp != RHSV) { // Comparing against a bit that we know is zero.
- bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
- Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
- IsICMP_NE);
- return ReplaceInstUsesWith(ICI, Cst);
- }
-
- // Otherwise, check to see if the bits shifted out are known to be zero.
- // If so, we can compare against the unshifted value:
- // (X & 4) >> 1 == 2 --> (X & 4) == 4.
- if (LHSI->hasOneUse() &&
- MaskedValueIsZero(LHSI->getOperand(0),
- APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
- return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
- ConstantExpr::getShl(RHS, ShAmt));
- }
-
- if (LHSI->hasOneUse()) {
- // Otherwise strength reduce the shift into an and.
- APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
- Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
-
- Value *And = Builder->CreateAnd(LHSI->getOperand(0),
- Mask, LHSI->getName()+".mask");
- return new ICmpInst(ICI.getPredicate(), And,
- ConstantExpr::getShl(RHS, ShAmt));
- }
+ if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+ if (Instruction *Res = FoldICmpShrCst(ICI, cast<BinaryOperator>(LHSI),
+ ShAmt))
+ return Res;
break;
- }
case Instruction::SDiv:
case Instruction::UDiv:
@@ -1543,50 +1572,174 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// The re-extended constant changed so the constant cannot be represented
// in the shorter type. Consequently, we cannot emit a simple comparison.
+ // All the cases that fold to true or false will have already been handled
+ // by SimplifyICmpInst, so only deal with the tricky case.
- // First, handle some easy cases. We know the result cannot be equal at this
- // point so handle the ICI.isEquality() cases
- if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
- return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
- if (ICI.getPredicate() == ICmpInst::ICMP_NE)
- return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+ if (isSignedCmp || !isSignedExt)
+ return 0;
// Evaluate the comparison for LT (we invert for GT below). LE and GE cases
// should have been folded away previously and not enter in here.
- Value *Result;
- if (isSignedCmp) {
- // We're performing a signed comparison.
- if (cast<ConstantInt>(CI)->getValue().isNegative())
- Result = ConstantInt::getFalse(ICI.getContext()); // X < (small) --> false
- else
- Result = ConstantInt::getTrue(ICI.getContext()); // X < (large) --> true
- } else {
- // We're performing an unsigned comparison.
- if (isSignedExt) {
- // We're performing an unsigned comp with a sign extended value.
- // This is true if the input is >= 0. [aka >s -1]
- Constant *NegOne = Constant::getAllOnesValue(SrcTy);
- Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
- } else {
- // Unsigned extend & unsigned compare -> always true.
- Result = ConstantInt::getTrue(ICI.getContext());
- }
- }
+
+ // We're performing an unsigned comp with a sign extended value.
+ // This is true if the input is >= 0. [aka >s -1]
+ Constant *NegOne = Constant::getAllOnesValue(SrcTy);
+ Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
// Finally, return the value computed.
- if (ICI.getPredicate() == ICmpInst::ICMP_ULT ||
- ICI.getPredicate() == ICmpInst::ICMP_SLT)
+ if (ICI.getPredicate() == ICmpInst::ICMP_ULT)
return ReplaceInstUsesWith(ICI, Result);
- assert((ICI.getPredicate()==ICmpInst::ICMP_UGT ||
- ICI.getPredicate()==ICmpInst::ICMP_SGT) &&
- "ICmp should be folded!");
- if (Constant *CI = dyn_cast<Constant>(Result))
- return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
+ assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
return BinaryOperator::CreateNot(Result);
}
+/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form:
+/// I = icmp ugt (add (add A, B), CI2), CI1
+/// If this is of the form:
+/// sum = a + b
+/// if (sum+128 >u 255)
+/// Then replace it with llvm.sadd.with.overflow.i8.
+///
+static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
+ ConstantInt *CI2, ConstantInt *CI1,
+ InstCombiner &IC) {
+ // The transformation we're trying to do here is to transform this into an
+ // llvm.sadd.with.overflow. To do this, we have to replace the original add
+ // with a narrower add, and discard the add-with-constant that is part of the
+ // range check (if we can't eliminate it, this isn't profitable).
+
+ // In order to eliminate the add-with-constant, the compare can be its only
+ // use.
+ Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
+ if (!AddWithCst->hasOneUse()) return 0;
+
+ // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
+ if (!CI2->getValue().isPowerOf2()) return 0;
+ unsigned NewWidth = CI2->getValue().countTrailingZeros();
+ if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
+
+ // The width of the new add formed is 1 more than the bias.
+ ++NewWidth;
+
+ // Check to see that CI1 is an all-ones value with NewWidth bits.
+ if (CI1->getBitWidth() == NewWidth ||
+ CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
+ return 0;
+
+ // In order to replace the original add with a narrower
+ // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
+ // and truncates that discard the high bits of the add. Verify that this is
+ // the case.
+ Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
+ for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end();
+ UI != E; ++UI) {
+ if (*UI == AddWithCst) continue;
+
+ // Only accept truncates for now. We would really like a nice recursive
+ // predicate like SimplifyDemandedBits, but which goes downwards the use-def
+ // chain to see which bits of a value are actually demanded. If the
+ // original add had another add which was then immediately truncated, we
+ // could still do the transformation.
+ TruncInst *TI = dyn_cast<TruncInst>(*UI);
+ if (TI == 0 ||
+ TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
+ }
+
+ // If the pattern matches, truncate the inputs to the narrower type and
+ // use the sadd_with_overflow intrinsic to efficiently compute both the
+ // result and the overflow bit.
+ Module *M = I.getParent()->getParent()->getParent();
+
+ const Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
+ &NewType, 1);
+
+ InstCombiner::BuilderTy *Builder = IC.Builder;
+
+ // Put the new code above the original add, in case there are any uses of the
+ // add between the add and the compare.
+ Builder->SetInsertPoint(OrigAdd);
+
+ Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
+ Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
+ CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd");
+ Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
+ Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
+
+ // The inner add was the result of the narrow add, zero extended to the
+ // wider type. Replace it with the result computed by the intrinsic.
+ IC.ReplaceInstUsesWith(*OrigAdd, ZExt);
+
+ // The original icmp gets replaced with the overflow value.
+ return ExtractValueInst::Create(Call, 1, "sadd.overflow");
+}
+
+static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
+ InstCombiner &IC) {
+ // Don't bother doing this transformation for pointers, don't do it for
+ // vectors.
+ if (!isa<IntegerType>(OrigAddV->getType())) return 0;
+
+ // If the add is a constant expr, then we don't bother transforming it.
+ Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
+ if (OrigAdd == 0) return 0;
+
+ Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
+
+ // Put the new code above the original add, in case there are any uses of the
+ // add between the add and the compare.
+ InstCombiner::BuilderTy *Builder = IC.Builder;
+ Builder->SetInsertPoint(OrigAdd);
+
+ Module *M = I.getParent()->getParent()->getParent();
+ const Type *Ty = LHS->getType();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, &Ty,1);
+ CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
+ Value *Add = Builder->CreateExtractValue(Call, 0);
+ IC.ReplaceInstUsesWith(*OrigAdd, Add);
+
+ // The original icmp gets replaced with the overflow value.
+ return ExtractValueInst::Create(Call, 1, "uadd.overflow");
+}
+
+// DemandedBitsLHSMask - When performing a comparison against a constant,
+// it is possible that not all the bits in the LHS are demanded. This helper
+// method computes the mask that IS demanded.
+static APInt DemandedBitsLHSMask(ICmpInst &I,
+ unsigned BitWidth, bool isSignCheck) {
+ if (isSignCheck)
+ return APInt::getSignBit(BitWidth);
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+ if (!CI) return APInt::getAllOnesValue(BitWidth);
+ const APInt &RHS = CI->getValue();
+
+ switch (I.getPredicate()) {
+ // For a UGT comparison, we don't care about any bits that
+ // correspond to the trailing ones of the comparand. The value of these
+ // bits doesn't impact the outcome of the comparison, because any value
+ // greater than the RHS must differ in a bit higher than these due to carry.
+ case ICmpInst::ICMP_UGT: {
+ unsigned trailingOnes = RHS.countTrailingOnes();
+ APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes);
+ return ~lowBitsSet;
+ }
+
+ // Similarly, for a ULT comparison, we don't care about the trailing zeros.
+ // Any value less than the RHS must differ in a higher bit because of carries.
+ case ICmpInst::ICMP_ULT: {
+ unsigned trailingZeros = RHS.countTrailingZeros();
+ APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros);
+ return ~lowBitsSet;
+ }
+
+ default:
+ return APInt::getAllOnesValue(BitWidth);
+ }
+
+}
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
@@ -1649,17 +1802,37 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
unsigned BitWidth = 0;
- if (TD)
- BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
- else if (Ty->isIntOrIntVectorTy())
+ if (Ty->isIntOrIntVectorTy())
BitWidth = Ty->getScalarSizeInBits();
-
+ else if (TD) // Pointers require TD info to get their size.
+ BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+
bool isSignBit = false;
// See if we are doing a comparison with a constant.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
Value *A = 0, *B = 0;
+ // Match the following pattern, which is a common idiom when writing
+ // overflow-safe integer arithmetic function. The source performs an
+ // addition in wider type, and explicitly checks for overflow using
+ // comparisons against INT_MIN and INT_MAX. Simplify this by using the
+ // sadd_with_overflow intrinsic.
+ //
+ // TODO: This could probably be generalized to handle other overflow-safe
+ // operations if we worked out the formulas to compute the appropriate
+ // magic constants.
+ //
+ // sum = a + b
+ // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
+ {
+ ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
+ if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+ match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+ if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
+ return Res;
+ }
+
// (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
if (I.isEquality() && CI->isZero() &&
match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
@@ -1704,8 +1877,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
if (SimplifyDemandedBits(I.getOperandUse(0),
- isSignBit ? APInt::getSignBit(BitWidth)
- : APInt::getAllOnesValue(BitWidth),
+ DemandedBitsLHSMask(I, BitWidth, isSignBit),
Op0KnownZero, Op0KnownOne, 0))
return &I;
if (SimplifyDemandedBits(I.getOperandUse(1),
@@ -1744,14 +1916,80 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// simplify this comparison. For example, (x&4) < 8 is always true.
switch (I.getPredicate()) {
default: llvm_unreachable("Unknown icmp opcode!");
- case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_EQ: {
if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+
+ // If all bits are known zero except for one, then we know at most one
+ // bit is set. If the comparison is against zero, then this is a check
+ // to see if *that* bit is set.
+ APInt Op0KnownZeroInverted = ~Op0KnownZero;
+ if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+ // If the LHS is an AND with the same constant, look through it.
+ Value *LHS = 0;
+ ConstantInt *LHSC = 0;
+ if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+ LHSC->getValue() != Op0KnownZeroInverted)
+ LHS = Op0;
+
+ // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+ // then turn "((1 << x)&8) == 0" into "x != 3".
+ Value *X = 0;
+ if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+ unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ }
+
+ // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+ // then turn "((8 >>u x)&1) == 0" into "x != 3".
+ const APInt *CI;
+ if (Op0KnownZeroInverted == 1 &&
+ match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(X->getType(),
+ CI->countTrailingZeros()));
+ }
+
break;
- case ICmpInst::ICMP_NE:
+ }
+ case ICmpInst::ICMP_NE: {
if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+
+ // If all bits are known zero except for one, then we know at most one
+ // bit is set. If the comparison is against zero, then this is a check
+ // to see if *that* bit is set.
+ APInt Op0KnownZeroInverted = ~Op0KnownZero;
+ if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+ // If the LHS is an AND with the same constant, look through it.
+ Value *LHS = 0;
+ ConstantInt *LHSC = 0;
+ if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+ LHSC->getValue() != Op0KnownZeroInverted)
+ LHS = Op0;
+
+ // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+ // then turn "((1 << x)&8) != 0" into "x == 3".
+ Value *X = 0;
+ if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+ unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ }
+
+ // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+ // then turn "((8 >>u x)&1) != 0" into "x == 3".
+ const APInt *CI;
+ if (Op0KnownZeroInverted == 1 &&
+ match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(X->getType(),
+ CI->countTrailingZeros()));
+ }
+
break;
+ }
case ICmpInst::ICMP_ULT:
if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
@@ -1894,7 +2132,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// block. If in the same block, we're encouraging jump threading. If
// not, we are just pessimizing the code by making an i1 phi.
if (LHSI->getParent() == I.getParent())
- if (Instruction *NV = FoldOpIntoPhi(I, true))
+ if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
break;
case Instruction::Select: {
@@ -1995,79 +2233,163 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Instruction *R = visitICmpInstWithCastAndCast(I))
return R;
}
-
- // See if it's the same type of instruction on the left and right.
- if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
- if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
- if (Op0I->getOpcode() == Op1I->getOpcode() && Op0I->hasOneUse() &&
- Op1I->hasOneUse() && Op0I->getOperand(1) == Op1I->getOperand(1)) {
- switch (Op0I->getOpcode()) {
- default: break;
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Xor:
- if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
- return new ICmpInst(I.getPredicate(), Op0I->getOperand(0),
- Op1I->getOperand(0));
- // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
- if (CI->getValue().isSignBit()) {
- ICmpInst::Predicate Pred = I.isSigned()
- ? I.getUnsignedPredicate()
- : I.getSignedPredicate();
- return new ICmpInst(Pred, Op0I->getOperand(0),
- Op1I->getOperand(0));
- }
-
- if (CI->getValue().isMaxSignedValue()) {
- ICmpInst::Predicate Pred = I.isSigned()
- ? I.getUnsignedPredicate()
- : I.getSignedPredicate();
- Pred = I.getSwappedPredicate(Pred);
- return new ICmpInst(Pred, Op0I->getOperand(0),
- Op1I->getOperand(0));
- }
+
+ // Special logic for binary operators.
+ BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
+ BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
+ if (BO0 || BO1) {
+ CmpInst::Predicate Pred = I.getPredicate();
+ bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
+ if (BO0 && isa<OverflowingBinaryOperator>(BO0))
+ NoOp0WrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
+ if (BO1 && isa<OverflowingBinaryOperator>(BO1))
+ NoOp1WrapProblem = ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
+
+ // Analyze the case when either Op0 or Op1 is an add instruction.
+ // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
+ Value *A = 0, *B = 0, *C = 0, *D = 0;
+ if (BO0 && BO0->getOpcode() == Instruction::Add)
+ A = BO0->getOperand(0), B = BO0->getOperand(1);
+ if (BO1 && BO1->getOpcode() == Instruction::Add)
+ C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+ // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+ if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
+ return new ICmpInst(Pred, A == Op1 ? B : A,
+ Constant::getNullValue(Op1->getType()));
+
+ // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+ if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
+ return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
+ C == Op0 ? D : C);
+
+ // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
+ if (A && C && (A == C || A == D || B == C || B == D) &&
+ NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse()) {
+ // Determine Y and Z in the form icmp (X+Y), (X+Z).
+ Value *Y = (A == C || A == D) ? B : A;
+ Value *Z = (C == A || C == B) ? D : C;
+ return new ICmpInst(Pred, Y, Z);
+ }
+
+ // Analyze the case when either Op0 or Op1 is a sub instruction.
+ // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
+ A = 0; B = 0; C = 0; D = 0;
+ if (BO0 && BO0->getOpcode() == Instruction::Sub)
+ A = BO0->getOperand(0), B = BO0->getOperand(1);
+ if (BO1 && BO1->getOpcode() == Instruction::Sub)
+ C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+ // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
+ if (A == Op1 && NoOp0WrapProblem)
+ return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
+
+ // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
+ if (C == Op0 && NoOp1WrapProblem)
+ return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
+
+ // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
+ if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse())
+ return new ICmpInst(Pred, A, C);
+
+ // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
+ if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
+ // Try not to increase register pressure.
+ BO0->hasOneUse() && BO1->hasOneUse())
+ return new ICmpInst(Pred, D, B);
+
+ if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
+ BO0->hasOneUse() && BO1->hasOneUse() &&
+ BO0->getOperand(1) == BO1->getOperand(1)) {
+ switch (BO0->getOpcode()) {
+ default: break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Xor:
+ if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
+ return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+ BO1->getOperand(0));
+ // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+ if (CI->getValue().isSignBit()) {
+ ICmpInst::Predicate Pred = I.isSigned()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ return new ICmpInst(Pred, BO0->getOperand(0),
+ BO1->getOperand(0));
+ }
+
+ if (CI->getValue().isMaxSignedValue()) {
+ ICmpInst::Predicate Pred = I.isSigned()
+ ? I.getUnsignedPredicate()
+ : I.getSignedPredicate();
+ Pred = I.getSwappedPredicate(Pred);
+ return new ICmpInst(Pred, BO0->getOperand(0),
+ BO1->getOperand(0));
}
+ }
+ break;
+ case Instruction::Mul:
+ if (!I.isEquality())
break;
- case Instruction::Mul:
- if (!I.isEquality())
- break;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
- // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
- // Mask = -1 >> count-trailing-zeros(Cst).
- if (!CI->isZero() && !CI->isOne()) {
- const APInt &AP = CI->getValue();
- ConstantInt *Mask = ConstantInt::get(I.getContext(),
- APInt::getLowBitsSet(AP.getBitWidth(),
- AP.getBitWidth() -
- AP.countTrailingZeros()));
- Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask);
- Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask);
- return new ICmpInst(I.getPredicate(), And1, And2);
- }
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+ // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+ // Mask = -1 >> count-trailing-zeros(Cst).
+ if (!CI->isZero() && !CI->isOne()) {
+ const APInt &AP = CI->getValue();
+ ConstantInt *Mask = ConstantInt::get(I.getContext(),
+ APInt::getLowBitsSet(AP.getBitWidth(),
+ AP.getBitWidth() -
+ AP.countTrailingZeros()));
+ Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
+ Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
+ return new ICmpInst(I.getPredicate(), And1, And2);
}
- break;
}
+ break;
}
}
}
- // ~x < ~y --> y < x
{ Value *A, *B;
- if (match(Op0, m_Not(m_Value(A))) &&
- match(Op1, m_Not(m_Value(B))))
- return new ICmpInst(I.getPredicate(), B, A);
+ // ~x < ~y --> y < x
+ // ~x < cst --> ~cst < x
+ if (match(Op0, m_Not(m_Value(A)))) {
+ if (match(Op1, m_Not(m_Value(B))))
+ return new ICmpInst(I.getPredicate(), B, A);
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
+ return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
+ }
+
+ // (a+b) <u a --> llvm.uadd.with.overflow.
+ // (a+b) <u b --> llvm.uadd.with.overflow.
+ if (I.getPredicate() == ICmpInst::ICMP_ULT &&
+ match(Op0, m_Add(m_Value(A), m_Value(B))) &&
+ (Op1 == A || Op1 == B))
+ if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
+ return R;
+
+ // a >u (a+b) --> llvm.uadd.with.overflow.
+ // b >u (a+b) --> llvm.uadd.with.overflow.
+ if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+ match(Op1, m_Add(m_Value(A), m_Value(B))) &&
+ (Op0 == A || Op0 == B))
+ if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
+ return R;
}
if (I.isEquality()) {
Value *A, *B, *C, *D;
-
- // -x == -y --> x == y
- if (match(Op0, m_Neg(m_Value(A))) &&
- match(Op1, m_Neg(m_Value(B))))
- return new ICmpInst(I.getPredicate(), A, B);
-
+
if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
Value *OtherVal = A == Op1 ? B : A;
@@ -2102,16 +2424,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Constant::getNullValue(A->getType()));
}
- // (A-B) == A -> B == 0
- if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B))))
- return new ICmpInst(I.getPredicate(), B,
- Constant::getNullValue(B->getType()));
-
- // A == (A-B) -> B == 0
- if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B))))
- return new ICmpInst(I.getPredicate(), B,
- Constant::getNullValue(B->getType()));
-
// (X&Z) == (Y&Z) -> (X^Y) & Z == 0
if (Op0->hasOneUse() && Op1->hasOneUse() &&
match(Op0, m_And(m_Value(A), m_Value(B))) &&
@@ -2397,7 +2709,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
// block. If in the same block, we're encouraging jump threading. If
// not, we are just pessimizing the code by making an i1 phi.
if (LHSI->getParent() == I.getParent())
- if (Instruction *NV = FoldOpIntoPhi(I, true))
+ if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
break;
case Instruction::SIToFP:
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index b68fbc2..78ff734 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -145,7 +145,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// Attempt to improve the alignment.
if (TD) {
unsigned KnownAlign =
- GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
+ getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD);
unsigned LoadAlign = LI.getAlignment();
unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
TD->getABITypeAlignment(LI.getType());
@@ -165,7 +165,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (LI.isVolatile()) return 0;
// Do really simple store-to-load forwarding and load CSE, to catch cases
- // where there are several consequtive memory accesses to the same location,
+ // where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
BasicBlock::iterator BBI = &LI;
if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
@@ -330,7 +330,9 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
- return new StoreInst(NewCast, CastOp);
+ SI.setOperand(0, NewCast);
+ SI.setOperand(1, CastOp);
+ return &SI;
}
/// equivalentAddressValues - Test if A and B will obviously have the same
@@ -414,7 +416,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// Attempt to improve the alignment.
if (TD) {
unsigned KnownAlign =
- GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
+ getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()),
+ TD);
unsigned StoreAlign = SI.getAlignment();
unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
TD->getABITypeAlignment(Val->getType());
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index b3974e8..d1a1fd6 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -14,26 +14,22 @@
#include "InstCombine.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
-/// SubOne - Subtract one from a ConstantInt.
-static Constant *SubOne(ConstantInt *C) {
- return ConstantInt::get(C->getContext(), C->getValue()-1);
-}
-
/// MultiplyOverflows - True if the multiply can not be expressed in an int
/// this size.
static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
uint32_t W = C1->getBitWidth();
APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
if (sign) {
- LHSExt.sext(W * 2);
- RHSExt.sext(W * 2);
+ LHSExt = LHSExt.sext(W * 2);
+ RHSExt = RHSExt.sext(W * 2);
} else {
- LHSExt.zext(W * 2);
- RHSExt.zext(W * 2);
+ LHSExt = LHSExt.zext(W * 2);
+ RHSExt = RHSExt.zext(W * 2);
}
APInt MulExt = LHSExt * RHSExt;
@@ -47,62 +43,48 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
}
Instruction *InstCombiner::visitMul(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (isa<UndefValue>(Op1)) // undef * X -> 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+ if (Value *V = SimplifyMulInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
- // Simplify mul instructions with a constant RHS.
- if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) {
-
- // ((X << C1)*C2) == (X * (C2 << C1))
- if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
- if (SI->getOpcode() == Instruction::Shl)
- if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
- return BinaryOperator::CreateMul(SI->getOperand(0),
- ConstantExpr::getShl(CI, ShOp));
-
- if (CI->isZero())
- return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0
- if (CI->equalsInt(1)) // X * 1 == X
- return ReplaceInstUsesWith(I, Op0);
- if (CI->isAllOnesValue()) // X * -1 == 0 - X
- return BinaryOperator::CreateNeg(Op0, I.getName());
-
- const APInt& Val = cast<ConstantInt>(CI)->getValue();
- if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
- return BinaryOperator::CreateShl(Op0,
- ConstantInt::get(Op0->getType(), Val.logBase2()));
- }
- } else if (Op1C->getType()->isVectorTy()) {
- if (Op1C->isNullValue())
- return ReplaceInstUsesWith(I, Op1C);
-
- if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
- if (Op1V->isAllOnesValue()) // X * -1 == 0 - X
- return BinaryOperator::CreateNeg(Op0, I.getName());
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return ReplaceInstUsesWith(I, V);
- // As above, vector X*splat(1.0) -> X in all defined cases.
- if (Constant *Splat = Op1V->getSplatValue()) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat))
- if (CI->equalsInt(1))
- return ReplaceInstUsesWith(I, Op0);
- }
- }
+ if (match(Op1, m_AllOnes())) // X * -1 == 0 - X
+ return BinaryOperator::CreateNeg(Op0, I.getName());
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+
+ // ((X << C1)*C2) == (X * (C2 << C1))
+ if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
+ if (SI->getOpcode() == Instruction::Shl)
+ if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
+ return BinaryOperator::CreateMul(SI->getOperand(0),
+ ConstantExpr::getShl(CI, ShOp));
+
+ const APInt &Val = CI->getValue();
+ if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
+ Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
+ BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst);
+ if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
+ if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
+ return Shl;
}
- if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
- if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
- isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) {
- // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
- Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp");
- Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1));
- return BinaryOperator::CreateAdd(Add, C1C2);
-
+ // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
+ { Value *X; ConstantInt *C1;
+ if (Op0->hasOneUse() &&
+ match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) {
+ Value *Add = Builder->CreateMul(X, CI, "tmp");
+ return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI));
}
-
+ }
+ }
+
+ // Simplify mul instructions with a constant RHS.
+ if (isa<Constant>(Op1)) {
// Try to fold constant mul into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -135,8 +117,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
BO->getOpcode() == Instruction::SDiv)) {
Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
- // If the division is exact, X % Y is zero.
- if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO))
+ // If the division is exact, X % Y is zero, so we end up with X or -X.
+ if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO))
if (SDiv->isExact()) {
if (Op1BO == Op1C)
return ReplaceInstUsesWith(I, Op0BO);
@@ -194,7 +176,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
- bool Changed = SimplifyCommutative(I);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// Simplify mul instructions with a constant RHS...
@@ -304,28 +286,6 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
}
-/// This function implements the transforms on div instructions that work
-/// regardless of the kind of div instruction it is (udiv, sdiv, or fdiv). It is
-/// used by the visitors to those instructions.
-/// @brief Transforms common to all three div instructions
-Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
- // undef / X -> 0 for integer.
- // undef / X -> undef for FP (the undef could be a snan).
- if (isa<UndefValue>(Op0)) {
- if (Op0->getType()->isFPOrFPVectorTy())
- return ReplaceInstUsesWith(I, Op0);
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- }
-
- // X / undef -> undef
- if (isa<UndefValue>(Op1))
- return ReplaceInstUsesWith(I, Op1);
-
- return 0;
-}
-
/// This function implements the transforms common to both integer division
/// instructions (udiv and sdiv). It is called by the visitors to those integer
/// division instructions.
@@ -333,31 +293,12 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- // (sdiv X, X) --> 1 (udiv X, X) --> 1
- if (Op0 == Op1) {
- if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
- Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
- std::vector<Constant*> Elts(Ty->getNumElements(), CI);
- return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
- }
-
- Constant *CI = ConstantInt::get(I.getType(), 1);
- return ReplaceInstUsesWith(I, CI);
- }
-
- if (Instruction *Common = commonDivTransforms(I))
- return Common;
-
// Handle cases involving: [su]div X, (select Cond, Y, Z)
// This does not apply for fdiv.
if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
return &I;
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
- // div X, 1 == X
- if (RHS->equalsInt(1))
- return ReplaceInstUsesWith(I, Op0);
-
// (X / C1) / C2 -> X / (C1*C2)
if (Instruction *LHS = dyn_cast<Instruction>(Op0))
if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
@@ -365,9 +306,8 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
if (MultiplyOverflows(RHS, LHSRHS,
I.getOpcode()==Instruction::SDiv))
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- else
- return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
- ConstantExpr::getMul(RHS, LHSRHS));
+ return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
+ ConstantExpr::getMul(RHS, LHSRHS));
}
if (!RHS->isZero()) { // avoid X udiv 0
@@ -380,20 +320,13 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
}
}
- // 0 / X == 0, we don't need to preserve faults!
- if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))
- if (LHS->equalsInt(0))
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
- // It can't be division by zero, hence it must be division by one.
- if (I.getType()->isIntegerTy(1))
- return ReplaceInstUsesWith(I, Op0);
-
- if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
- if (ConstantInt *X = cast_or_null<ConstantInt>(Op1V->getSplatValue()))
- // div X, 1 == X
- if (X->isOne())
- return ReplaceInstUsesWith(I, Op0);
+ // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
+ Value *X = 0, *Z = 0;
+ if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1
+ bool isSigned = I.getOpcode() == Instruction::SDiv;
+ if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) ||
+ (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1)))))
+ return BinaryOperator::Create(I.getOpcode(), X, Op1);
}
return 0;
@@ -402,6 +335,9 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
// Handle the integer div common cases
if (Instruction *Common = commonIDivTransforms(I))
return Common;
@@ -410,60 +346,59 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
// X udiv 2^C -> X >> C
// Check to see if this is an unsigned division with an exact power of 2,
// if so, convert to a right shift.
- if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2
- return BinaryOperator::CreateLShr(Op0,
+ if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2
+ BinaryOperator *LShr =
+ BinaryOperator::CreateLShr(Op0,
ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
+ if (I.isExact()) LShr->setIsExact();
+ return LShr;
+ }
// X udiv C, where C >= signbit
if (C->getValue().isNegative()) {
- Value *IC = Builder->CreateICmpULT( Op0, C);
+ Value *IC = Builder->CreateICmpULT(Op0, C);
return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
ConstantInt::get(I.getType(), 1));
}
}
// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
- if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) {
- if (RHSI->getOpcode() == Instruction::Shl &&
- isa<ConstantInt>(RHSI->getOperand(0))) {
- const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue();
- if (C1.isPowerOf2()) {
- Value *N = RHSI->getOperand(1);
- const Type *NTy = N->getType();
- if (uint32_t C2 = C1.logBase2())
- N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp");
- return BinaryOperator::CreateLShr(Op0, N);
- }
+ { const APInt *CI; Value *N;
+ if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) {
+ if (*CI != 1)
+ N = Builder->CreateAdd(N, ConstantInt::get(I.getType(), CI->logBase2()),
+ "tmp");
+ if (I.isExact())
+ return BinaryOperator::CreateExactLShr(Op0, N);
+ return BinaryOperator::CreateLShr(Op0, N);
}
}
// udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
// where C1&C2 are powers of two.
- if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
- if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
- if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) {
- const APInt &TVA = STO->getValue(), &FVA = SFO->getValue();
- if (TVA.isPowerOf2() && FVA.isPowerOf2()) {
- // Compute the shift amounts
- uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
- // Construct the "on true" case of the select
- Constant *TC = ConstantInt::get(Op0->getType(), TSA);
- Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t");
+ { Value *Cond; const APInt *C1, *C2;
+ if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+ // Construct the "on true" case of the select
+ Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
+ I.isExact());
- // Construct the "on false" case of the select
- Constant *FC = ConstantInt::get(Op0->getType(), FSA);
- Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f");
-
- // construct the select instruction and return it.
- return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName());
- }
- }
+ // Construct the "on false" case of the select
+ Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
+ I.isExact());
+
+ // construct the select instruction and return it.
+ return SelectInst::Create(Cond, TSI, FSI);
+ }
+ }
return 0;
}
Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifySDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
// Handle the integer div common cases
if (Instruction *Common = commonIDivTransforms(I))
return Common;
@@ -473,20 +408,17 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
if (RHS->isAllOnesValue())
return BinaryOperator::CreateNeg(Op0);
- // sdiv X, C --> ashr X, log2(C)
- if (cast<SDivOperator>(&I)->isExact() &&
- RHS->getValue().isNonNegative() &&
+ // sdiv X, C --> ashr exact X, log2(C)
+ if (I.isExact() && RHS->getValue().isNonNegative() &&
RHS->getValue().isPowerOf2()) {
Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
RHS->getValue().exactLogBase2());
- return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName());
+ return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName());
}
// -X/C --> X/-C provided the negation doesn't overflow.
if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
- if (isa<Constant>(Sub->getOperand(0)) &&
- cast<Constant>(Sub->getOperand(0))->isNullValue() &&
- Sub->hasNoSignedWrap())
+ if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap())
return BinaryOperator::CreateSDiv(Sub->getOperand(1),
ConstantExpr::getNeg(RHS));
}
@@ -500,9 +432,8 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
// X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
}
- ConstantInt *ShiftedInt;
- if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) &&
- ShiftedInt->getValue().isPowerOf2()) {
+
+ if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
// INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
@@ -516,7 +447,12 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
}
Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
- return commonDivTransforms(I);
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ return 0;
}
/// This function implements the transforms on rem instructions that work
@@ -551,6 +487,10 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
if (Instruction *common = commonRemTransforms(I))
return common;
+ // X % X == 0
+ if (Op0 == Op1)
+ return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
// 0 % X == 0 for integer, we don't need to preserve faults!
if (Constant *LHS = dyn_cast<Constant>(Op0))
if (LHS->isNullValue())
@@ -588,42 +528,29 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
if (Instruction *common = commonIRemTransforms(I))
return common;
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
- // X urem C^2 -> X and C
- // Check to see if this is an unsigned remainder with an exact power of 2,
- // if so, convert to a bitwise and.
- if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
- if (C->getValue().isPowerOf2())
- return BinaryOperator::CreateAnd(Op0, SubOne(C));
+ // X urem C^2 -> X and C-1
+ { const APInt *C;
+ if (match(Op1, m_Power2(C)))
+ return BinaryOperator::CreateAnd(Op0,
+ ConstantInt::get(I.getType(), *C-1));
}
- if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
- // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
- if (RHSI->getOpcode() == Instruction::Shl &&
- isa<ConstantInt>(RHSI->getOperand(0))) {
- if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
- Constant *N1 = Constant::getAllOnesValue(I.getType());
- Value *Add = Builder->CreateAdd(RHSI, N1, "tmp");
- return BinaryOperator::CreateAnd(Op0, Add);
- }
- }
+ // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
+ if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
+ Constant *N1 = Constant::getAllOnesValue(I.getType());
+ Value *Add = Builder->CreateAdd(Op1, N1, "tmp");
+ return BinaryOperator::CreateAnd(Op0, Add);
}
- // urem X, (select Cond, 2^C1, 2^C2) --> select Cond, (and X, C1), (and X, C2)
- // where C1&C2 are powers of two.
- if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
- if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
- if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2))) {
- // STO == 0 and SFO == 0 handled above.
- if ((STO->getValue().isPowerOf2()) &&
- (SFO->getValue().isPowerOf2())) {
- Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO),
- SI->getName()+".t");
- Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO),
- SI->getName()+".f");
- return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
- }
- }
+ // urem X, (select Cond, 2^C1, 2^C2) -->
+ // select Cond, (and X, C1-1), (and X, C2-1)
+ // when C1&C2 are powers of two.
+ { Value *Cond; const APInt *C1, *C2;
+ if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+ Value *TrueAnd = Builder->CreateAnd(Op0, *C1-1, Op1->getName()+".t");
+ Value *FalseAnd = Builder->CreateAnd(Op0, *C2-1, Op1->getName()+".f");
+ return SelectInst::Create(Cond, TrueAnd, FalseAnd);
+ }
}
return 0;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index f7fc62f..297a18c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/STLExtras.h"
@@ -30,22 +31,37 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
const Type *LHSType = LHSVal->getType();
const Type *RHSType = RHSVal->getType();
+ bool isNUW = false, isNSW = false, isExact = false;
+ if (OverflowingBinaryOperator *BO =
+ dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+ isNUW = BO->hasNoUnsignedWrap();
+ isNSW = BO->hasNoSignedWrap();
+ } else if (PossiblyExactOperator *PEO =
+ dyn_cast<PossiblyExactOperator>(FirstInst))
+ isExact = PEO->isExact();
+
// Scan to see if all operands are the same opcode, and all have one use.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
// Verify type of the LHS matches so we don't fold cmp's of different
- // types or GEP's with different index types.
+ // types.
I->getOperand(0)->getType() != LHSType ||
I->getOperand(1)->getType() != RHSType)
return 0;
// If they are CmpInst instructions, check their predicates
- if (Opc == Instruction::ICmp || Opc == Instruction::FCmp)
- if (cast<CmpInst>(I)->getPredicate() !=
- cast<CmpInst>(FirstInst)->getPredicate())
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
return 0;
+ if (isNUW)
+ isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+ if (isNSW)
+ isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ if (isExact)
+ isExact = cast<PossiblyExactOperator>(I)->isExact();
+
// Keep track of which operand needs a phi node.
if (I->getOperand(0) != LHSVal) LHSVal = 0;
if (I->getOperand(1) != RHSVal) RHSVal = 0;
@@ -96,11 +112,17 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
}
}
- if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
- return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
- CmpInst *CIOp = cast<CmpInst>(FirstInst);
- return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
- LHSVal, RHSVal);
+ if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
+ return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+ LHSVal, RHSVal);
+
+ BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst);
+ BinaryOperator *NewBinOp =
+ BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
+ if (isNUW) NewBinOp->setHasNoUnsignedWrap();
+ if (isNSW) NewBinOp->setHasNoSignedWrap();
+ if (isExact) NewBinOp->setIsExact();
+ return NewBinOp;
}
Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
@@ -117,6 +139,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// especially bad when the PHIs are in the header of a loop.
bool NeededPhi = false;
+ bool AllInBounds = true;
+
// Scan to see if all operands are the same opcode, and all have one use.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
@@ -124,6 +148,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
GEP->getNumOperands() != FirstInst->getNumOperands())
return 0;
+ AllInBounds &= GEP->isInBounds();
+
// Keep track of whether or not all GEPs are of alloca pointers.
if (AllBasePointersAreAllocas &&
(!isa<AllocaInst>(GEP->getOperand(0)) ||
@@ -201,11 +227,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
}
Value *Base = FixedOperands[0];
- return cast<GEPOperator>(FirstInst)->isInBounds() ?
- GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1,
- FixedOperands.end()) :
+ GetElementPtrInst *NewGEP =
GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
FixedOperands.end());
+ if (AllInBounds) NewGEP->setIsInBounds();
+ return NewGEP;
}
@@ -368,6 +394,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// code size and simplifying code.
Constant *ConstantOp = 0;
const Type *CastSrcTy = 0;
+ bool isNUW = false, isNSW = false, isExact = false;
if (isa<CastInst>(FirstInst)) {
CastSrcTy = FirstInst->getOperand(0)->getType();
@@ -384,6 +411,14 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
if (ConstantOp == 0)
return FoldPHIArgBinOpIntoPHI(PN);
+
+ if (OverflowingBinaryOperator *BO =
+ dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+ isNUW = BO->hasNoUnsignedWrap();
+ isNSW = BO->hasNoSignedWrap();
+ } else if (PossiblyExactOperator *PEO =
+ dyn_cast<PossiblyExactOperator>(FirstInst))
+ isExact = PEO->isExact();
} else {
return 0; // Cannot fold this operation.
}
@@ -399,6 +434,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
} else if (I->getOperand(1) != ConstantOp) {
return 0;
}
+
+ if (isNUW)
+ isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+ if (isNSW)
+ isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ if (isExact)
+ isExact = cast<PossiblyExactOperator>(I)->isExact();
}
// Okay, they are all the same operation. Create a new PHI node of the
@@ -433,8 +475,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst))
return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
- if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
- return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+ if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
+ BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+ if (isNUW) BinOp->setHasNoUnsignedWrap();
+ if (isNSW) BinOp->setHasNoSignedWrap();
+ if (isExact) BinOp->setIsExact();
+ return BinOp;
+ }
CmpInst *CIOp = cast<CmpInst>(FirstInst);
return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
@@ -731,8 +778,8 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// If LCSSA is around, don't mess with Phi nodes
if (MustPreserveLCSSA) return 0;
-
- if (Value *V = PN.hasConstantValue())
+
+ if (Value *V = SimplifyInstruction(&PN, TD))
return ReplaceInstUsesWith(PN, V);
// If all PHI operands are the same operation, pull them through the PHI,
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index c44fe9d..97abc76 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -24,14 +24,14 @@ static SelectPatternFlavor
MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
SelectInst *SI = dyn_cast<SelectInst>(V);
if (SI == 0) return SPF_UNKNOWN;
-
+
ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
if (ICI == 0) return SPF_UNKNOWN;
-
+
LHS = ICI->getOperand(0);
RHS = ICI->getOperand(1);
-
- // (icmp X, Y) ? X : Y
+
+ // (icmp X, Y) ? X : Y
if (SI->getTrueValue() == ICI->getOperand(0) &&
SI->getFalseValue() == ICI->getOperand(1)) {
switch (ICI->getPredicate()) {
@@ -46,8 +46,8 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
case ICmpInst::ICMP_SLE: return SPF_SMIN;
}
}
-
- // (icmp X, Y) ? Y : X
+
+ // (icmp X, Y) ? Y : X
if (SI->getTrueValue() == ICI->getOperand(1) &&
SI->getFalseValue() == ICI->getOperand(0)) {
switch (ICI->getPredicate()) {
@@ -62,9 +62,9 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
case ICmpInst::ICMP_SLE: return SPF_SMAX;
}
}
-
+
// TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5)
-
+
return SPF_UNKNOWN;
}
@@ -136,7 +136,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
FI->getOperand(0), SI.getName()+".v");
InsertNewInstBefore(NewSI, SI);
- return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
+ return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
TI->getType());
}
@@ -195,7 +195,10 @@ static bool isSelect01(Constant *C1, Constant *C2) {
ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
if (!C2I)
return false;
- return (C1I->isZero() || C1I->isOne()) && (C2I->isZero() || C2I->isOne());
+ if (!C1I->isZero() && !C2I->isZero()) // One side must be zero.
+ return false;
+ return C1I->isOne() || C1I->isAllOnesValue() ||
+ C2I->isOne() || C2I->isAllOnesValue();
}
/// FoldSelectIntoOp - Try fold the select into one of the operands to
@@ -219,7 +222,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Constant *C = GetSelectFoldableConstant(TVI);
Value *OOp = TVI->getOperand(2-OpToFold);
// Avoid creating select between 2 constants unless it's selecting
- // between 0 and 1.
+ // between 0, 1 and -1.
if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C);
InsertNewInstBefore(NewSel, SI);
@@ -248,7 +251,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Constant *C = GetSelectFoldableConstant(FVI);
Value *OOp = FVI->getOperand(2-OpToFold);
// Avoid creating select between 2 constants unless it's selecting
- // between 0 and 1.
+ // between 0, 1 and -1.
if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp);
InsertNewInstBefore(NewSel, SI);
@@ -278,52 +281,95 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
Value *FalseVal = SI.getFalseValue();
// Check cases where the comparison is with a constant that
- // can be adjusted to fit the min/max idiom. We may edit ICI in
- // place here, so make sure the select is the only user.
+ // can be adjusted to fit the min/max idiom. We may move or edit ICI
+ // here, so make sure the select is the only user.
if (ICI->hasOneUse())
if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
+ // X < MIN ? T : F --> F
+ if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT)
+ && CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
+ return ReplaceInstUsesWith(SI, FalseVal);
+ // X > MAX ? T : F --> F
+ else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT)
+ && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
+ return ReplaceInstUsesWith(SI, FalseVal);
switch (Pred) {
default: break;
case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_SLT: {
- // X < MIN ? T : F --> F
- if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
- return ReplaceInstUsesWith(SI, FalseVal);
- // X < C ? X : C-1 --> X > C-1 ? C-1 : X
- Constant *AdjustedRHS =
- ConstantInt::get(CI->getContext(), CI->getValue()-1);
- if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
- (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
- Pred = ICmpInst::getSwappedPredicate(Pred);
- CmpRHS = AdjustedRHS;
- std::swap(FalseVal, TrueVal);
- ICI->setPredicate(Pred);
- ICI->setOperand(1, CmpRHS);
- SI.setOperand(1, TrueVal);
- SI.setOperand(2, FalseVal);
- Changed = true;
- }
- break;
- }
+ case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_SGT: {
- // X > MAX ? T : F --> F
- if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
- return ReplaceInstUsesWith(SI, FalseVal);
+ // These transformations only work for selects over integers.
+ const IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType());
+ if (!SelectTy)
+ break;
+
+ Constant *AdjustedRHS;
+ if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT)
+ AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1);
+ else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT)
+ AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1);
+
// X > C ? X : C+1 --> X < C+1 ? C+1 : X
- Constant *AdjustedRHS =
- ConstantInt::get(CI->getContext(), CI->getValue()+1);
+ // X < C ? X : C-1 --> X > C-1 ? C-1 : X
if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
- (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
- Pred = ICmpInst::getSwappedPredicate(Pred);
- CmpRHS = AdjustedRHS;
- std::swap(FalseVal, TrueVal);
- ICI->setPredicate(Pred);
- ICI->setOperand(1, CmpRHS);
- SI.setOperand(1, TrueVal);
- SI.setOperand(2, FalseVal);
- Changed = true;
- }
+ (CmpLHS == FalseVal && AdjustedRHS == TrueVal))
+ ; // Nothing to do here. Values match without any sign/zero extension.
+
+ // Types do not match. Instead of calculating this with mixed types
+ // promote all to the larger type. This enables scalar evolution to
+ // analyze this expression.
+ else if (CmpRHS->getType()->getScalarSizeInBits()
+ < SelectTy->getBitWidth()) {
+ Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy);
+
+ // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
+ // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
+ // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
+ // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
+ if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) &&
+ sextRHS == FalseVal) {
+ CmpLHS = TrueVal;
+ AdjustedRHS = sextRHS;
+ } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) &&
+ sextRHS == TrueVal) {
+ CmpLHS = FalseVal;
+ AdjustedRHS = sextRHS;
+ } else if (ICI->isUnsigned()) {
+ Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy);
+ // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
+ // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
+ // zext + signed compare cannot be changed:
+ // 0xff <s 0x00, but 0x00ff >s 0x0000
+ if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) &&
+ zextRHS == FalseVal) {
+ CmpLHS = TrueVal;
+ AdjustedRHS = zextRHS;
+ } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) &&
+ zextRHS == TrueVal) {
+ CmpLHS = FalseVal;
+ AdjustedRHS = zextRHS;
+ } else
+ break;
+ } else
+ break;
+ } else
+ break;
+
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ CmpRHS = AdjustedRHS;
+ std::swap(FalseVal, TrueVal);
+ ICI->setPredicate(Pred);
+ ICI->setOperand(0, CmpLHS);
+ ICI->setOperand(1, CmpRHS);
+ SI.setOperand(1, TrueVal);
+ SI.setOperand(2, FalseVal);
+
+ // Move ICI instruction right before the select instruction. Otherwise
+ // the sext/zext value may be defined after the ICI instruction uses it.
+ ICI->moveBefore(&SI);
+
+ Changed = true;
break;
}
}
@@ -399,28 +445,28 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
// can always be mapped.
const Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return true;
-
+
// If V is a PHI node defined in the same block as the condition PHI, we can
// map the arguments.
const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
-
+
if (const PHINode *VP = dyn_cast<PHINode>(I))
if (VP->getParent() == CondPHI->getParent())
return true;
-
+
// Otherwise, if the PHI and select are defined in the same block and if V is
// defined in a different block, then we can transform it.
if (SI.getParent() == CondPHI->getParent() &&
I->getParent() != CondPHI->getParent())
return true;
-
+
// Otherwise we have a 'hard' case and we can't tell without doing more
// detailed dominator based analysis, punt.
return false;
}
/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
-/// SPF2(SPF1(A, B), C)
+/// SPF2(SPF1(A, B), C)
Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
SelectPatternFlavor SPF1,
Value *A, Value *B,
@@ -431,7 +477,7 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
// MIN(MIN(a, b), a) -> MIN(a, b)
if (SPF1 == SPF2)
return ReplaceInstUsesWith(Outer, Inner);
-
+
// MAX(MIN(a, b), a) -> a
// MIN(MAX(a, b), a) -> a
if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
@@ -440,13 +486,82 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
(SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
return ReplaceInstUsesWith(Outer, C);
}
-
+
// TODO: MIN(MIN(A, 23), 97)
return 0;
}
+/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
+/// both be) and we have an icmp instruction with zero, and we have an 'and'
+/// with the non-constant value and a power of two we can turn the select
+/// into a shift on the result of the 'and'.
+static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
+ ConstantInt *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+ if (!IC || !IC->isEquality())
+ return 0;
+
+ if (ConstantInt *C = dyn_cast<ConstantInt>(IC->getOperand(1)))
+ if (!C->isZero())
+ return 0;
+ ConstantInt *AndRHS;
+ Value *LHS = IC->getOperand(0);
+ if (LHS->getType() != SI.getType() ||
+ !match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
+ return 0;
+
+ // If both select arms are non-zero see if we have a select of the form
+ // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
+ // for 'x ? 2^n : 0' and fix the thing up at the end.
+ ConstantInt *Offset = 0;
+ if (!TrueVal->isZero() && !FalseVal->isZero()) {
+ if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
+ Offset = FalseVal;
+ else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
+ Offset = TrueVal;
+ else
+ return 0;
+
+ // Adjust TrueVal and FalseVal to the offset.
+ TrueVal = ConstantInt::get(Builder->getContext(),
+ TrueVal->getValue() - Offset->getValue());
+ FalseVal = ConstantInt::get(Builder->getContext(),
+ FalseVal->getValue() - Offset->getValue());
+ }
+
+ // Make sure the mask in the 'and' and one of the select arms is a power of 2.
+ if (!AndRHS->getValue().isPowerOf2() ||
+ (!TrueVal->getValue().isPowerOf2() &&
+ !FalseVal->getValue().isPowerOf2()))
+ return 0;
+
+ // Determine which shift is needed to transform result of the 'and' into the
+ // desired result.
+ ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal;
+ unsigned ValZeros = ValC->getValue().logBase2();
+ unsigned AndZeros = AndRHS->getValue().logBase2();
+
+ Value *V = LHS;
+ if (ValZeros > AndZeros)
+ V = Builder->CreateShl(V, ValZeros - AndZeros);
+ else if (ValZeros < AndZeros)
+ V = Builder->CreateLShr(V, AndZeros - ValZeros);
+
+ // Okay, now we know that everything is set up, we just don't know whether we
+ // have a icmp_ne or icmp_eq and whether the true or false val is the zero.
+ bool ShouldNotVal = !TrueVal->isZero();
+ ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
+ if (ShouldNotVal)
+ V = Builder->CreateXor(V, ValC);
+
+ // Apply an offset if needed.
+ if (Offset)
+ V = Builder->CreateAdd(V, Offset);
+ return V;
+}
Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
@@ -478,7 +593,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
"not."+CondVal->getName()), SI);
return BinaryOperator::CreateOr(NotCond, TrueVal);
}
-
+
// select a, b, a -> a&b
// select a, a, b -> a|b
if (CondVal == TrueVal)
@@ -497,7 +612,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// select C, -1, 0 -> sext C to int
if (FalseValC->isZero() && TrueValC->isAllOnesValue())
return new SExtInst(CondVal, SI.getType());
-
+
// select C, 0, 1 -> zext !C to int
if (TrueValC->isZero() && FalseValC->getValue() == 1) {
Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
@@ -509,32 +624,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
return new SExtInst(NotCond, SI.getType());
}
-
- if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) {
- // If one of the constants is zero (we know they can't both be) and we
- // have an icmp instruction with zero, and we have an 'and' with the
- // non-constant value, eliminate this whole mess. This corresponds to
- // cases like this: ((X & 27) ? 27 : 0)
- if (TrueValC->isZero() || FalseValC->isZero())
- if (IC->isEquality() && isa<ConstantInt>(IC->getOperand(1)) &&
- cast<Constant>(IC->getOperand(1))->isNullValue())
- if (Instruction *ICA = dyn_cast<Instruction>(IC->getOperand(0)))
- if (ICA->getOpcode() == Instruction::And &&
- isa<ConstantInt>(ICA->getOperand(1)) &&
- (ICA->getOperand(1) == TrueValC ||
- ICA->getOperand(1) == FalseValC) &&
- cast<ConstantInt>(ICA->getOperand(1))->getValue().isPowerOf2()) {
- // Okay, now we know that everything is set up, we just don't
- // know whether we have a icmp_ne or icmp_eq and whether the
- // true or false val is the zero.
- bool ShouldNotVal = !TrueValC->isZero();
- ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
- Value *V = ICA;
- if (ShouldNotVal)
- V = Builder->CreateXor(V, ICA->getOperand(1));
- return ReplaceInstUsesWith(SI, V);
- }
- }
+
+ if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder))
+ return ReplaceInstUsesWith(SI, V);
}
// See if we are selecting two values based on a comparison of the two values.
@@ -542,7 +634,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
// Transform (X == Y) ? X : Y -> Y
if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
- // This is not safe in general for floating point:
+ // This is not safe in general for floating point:
// consider X== -0, Y== +0.
// It becomes safe if either operand is a nonzero constant.
ConstantFP *CFPt, *CFPf;
@@ -554,7 +646,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// Transform (X une Y) ? X : Y -> X
if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
- // This is not safe in general for floating point:
+ // This is not safe in general for floating point:
// consider X== -0, Y== +0.
// It becomes safe if either operand is a nonzero constant.
ConstantFP *CFPt, *CFPf;
@@ -569,7 +661,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
} else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
// Transform (X == Y) ? Y : X -> X
if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
- // This is not safe in general for floating point:
+ // This is not safe in general for floating point:
// consider X== -0, Y== +0.
// It becomes safe if either operand is a nonzero constant.
ConstantFP *CFPt, *CFPf;
@@ -581,7 +673,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// Transform (X une Y) ? Y : X -> Y
if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
- // This is not safe in general for floating point:
+ // This is not safe in general for floating point:
// consider X== -0, Y== +0.
// It becomes safe if either operand is a nonzero constant.
ConstantFP *CFPt, *CFPf;
@@ -639,6 +731,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *NegVal; // Compute -Z
if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
NegVal = ConstantExpr::getNeg(C);
+ } else if (SI.getType()->isFloatingPointTy()) {
+ NegVal = InsertNewInstBefore(
+ BinaryOperator::CreateFNeg(SubOp->getOperand(1),
+ "tmp"), SI);
} else {
NegVal = InsertNewInstBefore(
BinaryOperator::CreateNeg(SubOp->getOperand(1),
@@ -654,7 +750,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
NewFalseOp, SI.getName() + ".p");
NewSel = InsertNewInstBefore(NewSel, SI);
- return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
+ if (SI.getType()->isFloatingPointTy())
+ return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
+ else
+ return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
}
}
}
@@ -663,7 +762,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (SI.getType()->isIntegerTy()) {
if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
return FoldI;
-
+
// MAX(MAX(a, b), a) -> MAX(a, b)
// MIN(MIN(a, b), a) -> MIN(a, b)
// MAX(MIN(a, b), a) -> a
@@ -686,13 +785,26 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// See if we can fold the select into a phi node if the condition is a select.
- if (isa<PHINode>(SI.getCondition()))
+ if (isa<PHINode>(SI.getCondition()))
// The true/false values have to be live in the PHI predecessor's blocks.
if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
if (Instruction *NV = FoldOpIntoPhi(SI))
return NV;
+ if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
+ if (TrueSI->getCondition() == CondVal) {
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
+ }
+ if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
+ if (FalseSI->getCondition() == CondVal) {
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
+ }
+
if (BinaryOperator::isNot(CondVal)) {
SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
SI.setOperand(1, FalseVal);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 27716b8..a7f8005 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -13,6 +13,7 @@
#include "InstCombine.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -21,25 +22,6 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- // shl X, 0 == X and shr X, 0 == X
- // shl 0, X == 0 and shr 0, X == 0
- if (Op1 == Constant::getNullValue(Op1->getType()) ||
- Op0 == Constant::getNullValue(Op0->getType()))
- return ReplaceInstUsesWith(I, Op0);
-
- if (isa<UndefValue>(Op0)) {
- if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef
- return ReplaceInstUsesWith(I, Op0);
- else // undef << X -> 0, undef >>u X -> 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- }
- if (isa<UndefValue>(Op1)) {
- if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X
- return ReplaceInstUsesWith(I, Op0);
- else // X << undef, X >>u undef -> 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
- }
-
// See if we can fold away this shift.
if (SimplifyDemandedInstructionBits(I))
return &I;
@@ -53,6 +35,20 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
return Res;
+
+ // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2.
+ // Because shifts by negative values (which could occur if A were negative)
+ // are undefined.
+ Value *A; const APInt *B;
+ if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) {
+ // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
+ // demand the sign bit (and many others) here??
+ Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1),
+ Op1->getName());
+ I.setOperand(1, Rem);
+ return &I;
+ }
+
return 0;
}
@@ -81,7 +77,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
// if the needed bits are already zero in the input. This allows us to reuse
// the value which means that we don't care if the shift has multiple uses.
// TODO: Handle opposite shift by exact value.
- ConstantInt *CI;
+ ConstantInt *CI = 0;
if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
(!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
if (CI->getZExtValue() == NumBits) {
@@ -131,9 +127,9 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
// We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
// profitable unless we know the and'd out bits are already zero.
if (CI->getZExtValue() > NumBits) {
- unsigned HighBits = CI->getZExtValue() - NumBits;
+ unsigned LowBits = TypeWidth - CI->getZExtValue();
if (MaskedValueIsZero(I->getOperand(0),
- APInt::getHighBitsSet(TypeWidth, HighBits)))
+ APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
return true;
}
@@ -157,7 +153,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
if (CI->getZExtValue() > NumBits) {
unsigned LowBits = CI->getZExtValue() - NumBits;
if (MaskedValueIsZero(I->getOperand(0),
- APInt::getLowBitsSet(TypeWidth, LowBits)))
+ APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
return true;
}
@@ -622,16 +618,49 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
}
Instruction *InstCombiner::visitShl(BinaryOperator &I) {
- return commonShiftTransforms(I);
+ if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
+ I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
+ TD))
+ return ReplaceInstUsesWith(I, V);
+
+ if (Instruction *V = commonShiftTransforms(I))
+ return V;
+
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
+ // If the shifted-out value is known-zero, then this is a NUW shift.
+ if (!I.hasNoUnsignedWrap() &&
+ MaskedValueIsZero(I.getOperand(0),
+ APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) {
+ I.setHasNoUnsignedWrap();
+ return &I;
+ }
+
+ // If the shifted out value is all signbits, this is a NSW shift.
+ if (!I.hasNoSignedWrap() &&
+ ComputeNumSignBits(I.getOperand(0)) > ShAmt) {
+ I.setHasNoSignedWrap();
+ return &I;
+ }
+ }
+
+ return 0;
}
Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+ if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1),
+ I.isExact(), TD))
+ return ReplaceInstUsesWith(I, V);
+
if (Instruction *R = commonShiftTransforms(I))
return R;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1))
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
// ctlz.i32(x)>>5 --> zext(x == 0)
@@ -640,7 +669,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
if ((II->getIntrinsicID() == Intrinsic::ctlz ||
II->getIntrinsicID() == Intrinsic::cttz ||
II->getIntrinsicID() == Intrinsic::ctpop) &&
- isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){
+ isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) {
bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
@@ -648,29 +677,37 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
}
}
+ // If the shifted-out value is known-zero, then this is an exact shift.
+ if (!I.isExact() &&
+ MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+ I.setIsExact();
+ return &I;
+ }
+ }
+
return 0;
}
Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+ if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1),
+ I.isExact(), TD))
+ return ReplaceInstUsesWith(I, V);
+
if (Instruction *R = commonShiftTransforms(I))
return R;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
- if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) {
- // ashr int -1, X = -1 (for any arithmetic shift rights of ~0)
- if (CSI->isAllOnesValue())
- return ReplaceInstUsesWith(I, CSI);
- }
-
+
if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ unsigned ShAmt = Op1C->getZExtValue();
+
// If the input is a SHL by the same constant (ashr (shl X, C), C), then we
// have a sign-extend idiom.
Value *X;
if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
- // If the input value is known to already be sign extended enough, delete
- // the extension.
- if (ComputeNumSignBits(X) > Op1C->getZExtValue())
+ // If the left shift is just shifting out partial signbits, delete the
+ // extension.
+ if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
return ReplaceInstUsesWith(I, X);
// If the input is an extension from the shifted amount value, e.g.
@@ -685,6 +722,13 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
return new SExtInst(ZI->getOperand(0), ZI->getType());
}
}
+
+ // If the shifted-out value is known-zero, then this is an exact shift.
+ if (!I.isExact() &&
+ MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+ I.setIsExact();
+ return &I;
+ }
}
// See if we can turn a signed shr into an unsigned shr.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index adf7a76..bda8cea 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -34,7 +34,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
if (!OpC) return false;
// If there are no bits set that aren't demanded, nothing to do.
- Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
+ Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
if ((~Demanded & OpC->getValue()) == 0)
return false;
@@ -121,13 +121,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
if (isa<ConstantPointerNull>(V)) {
// We know all of the bits for a constant!
- KnownOne.clear();
+ KnownOne.clearAllBits();
KnownZero = DemandedMask;
return 0;
}
- KnownZero.clear();
- KnownOne.clear();
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
if (DemandedMask == 0) { // Not demanding any bits from V.
if (isa<UndefValue>(V))
return 0;
@@ -388,15 +388,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::Trunc: {
unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
- DemandedMask.zext(truncBf);
- KnownZero.zext(truncBf);
- KnownOne.zext(truncBf);
+ DemandedMask = DemandedMask.zext(truncBf);
+ KnownZero = KnownZero.zext(truncBf);
+ KnownOne = KnownOne.zext(truncBf);
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
KnownZero, KnownOne, Depth+1))
return I;
- DemandedMask.trunc(BitWidth);
- KnownZero.trunc(BitWidth);
- KnownOne.trunc(BitWidth);
+ DemandedMask = DemandedMask.trunc(BitWidth);
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
break;
}
@@ -426,15 +426,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Compute the bits in the result that are not present in the input.
unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
- DemandedMask.trunc(SrcBitWidth);
- KnownZero.trunc(SrcBitWidth);
- KnownOne.trunc(SrcBitWidth);
+ DemandedMask = DemandedMask.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
KnownZero, KnownOne, Depth+1))
return I;
- DemandedMask.zext(BitWidth);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ DemandedMask = DemandedMask.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// The top bits are known to be zero.
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
@@ -451,17 +451,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If any of the sign extended bits are demanded, we know that the sign
// bit is demanded.
if ((NewBits & DemandedMask) != 0)
- InputDemandedBits.set(SrcBitWidth-1);
+ InputDemandedBits.setBit(SrcBitWidth-1);
- InputDemandedBits.trunc(SrcBitWidth);
- KnownZero.trunc(SrcBitWidth);
- KnownOne.trunc(SrcBitWidth);
+ InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
+ KnownZero = KnownZero.trunc(SrcBitWidth);
+ KnownOne = KnownOne.trunc(SrcBitWidth);
if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
KnownZero, KnownOne, Depth+1))
return I;
- InputDemandedBits.zext(BitWidth);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ InputDemandedBits = InputDemandedBits.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
@@ -576,8 +576,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::Shl:
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
+
+ // If the shift is NUW/NSW, then it does demand the high bits.
+ ShlOperator *IOp = cast<ShlOperator>(I);
+ if (IOp->hasNoSignedWrap())
+ DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+ else if (IOp->hasNoUnsignedWrap())
+ DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
KnownZero, KnownOne, Depth+1))
return I;
@@ -592,10 +600,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
case Instruction::LShr:
// For a logical shift right
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Unsigned shift right.
APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<LShrOperator>(I)->isExact())
+ DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
KnownZero, KnownOne, Depth+1))
return I;
@@ -627,14 +641,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I->getOperand(0);
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint32_t ShiftAmt = SA->getLimitedValue(BitWidth);
+ uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Signed shift right.
APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
// If any of the "high bits" are demanded, we should set the sign bit as
// demanded.
if (DemandedMask.countLeadingZeros() <= ShiftAmt)
- DemandedMaskIn.set(BitWidth-1);
+ DemandedMaskIn.setBit(BitWidth-1);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<AShrOperator>(I)->isExact())
+ DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
KnownZero, KnownOne, Depth+1))
return I;
@@ -793,10 +813,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
for (unsigned i = 0; i != VWidth; ++i)
if (!DemandedElts[i]) { // If not demanded, set to undef.
Elts.push_back(Undef);
- UndefElts.set(i);
+ UndefElts.setBit(i);
} else if (isa<UndefValue>(CV->getOperand(i))) { // Already undef.
Elts.push_back(Undef);
- UndefElts.set(i);
+ UndefElts.setBit(i);
} else { // Otherwise, defined.
Elts.push_back(CV->getOperand(i));
}
@@ -879,13 +899,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Otherwise, the element inserted overwrites whatever was there, so the
// input demanded set is simpler than the output set.
APInt DemandedElts2 = DemandedElts;
- DemandedElts2.clear(IdxNo);
+ DemandedElts2.clearBit(IdxNo);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
UndefElts, Depth+1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
// The inserted element is defined.
- UndefElts.clear(IdxNo);
+ UndefElts.clearBit(IdxNo);
break;
}
case Instruction::ShuffleVector: {
@@ -900,9 +920,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
assert(MaskVal < LHSVWidth * 2 &&
"shufflevector mask index out of range!");
if (MaskVal < LHSVWidth)
- LeftDemanded.set(MaskVal);
+ LeftDemanded.setBit(MaskVal);
else
- RightDemanded.set(MaskVal - LHSVWidth);
+ RightDemanded.setBit(MaskVal - LHSVWidth);
}
}
}
@@ -921,16 +941,16 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
for (unsigned i = 0; i < VWidth; i++) {
unsigned MaskVal = Shuffle->getMaskValue(i);
if (MaskVal == -1u) {
- UndefElts.set(i);
+ UndefElts.setBit(i);
} else if (MaskVal < LHSVWidth) {
if (UndefElts4[MaskVal]) {
NewUndefElts = true;
- UndefElts.set(i);
+ UndefElts.setBit(i);
}
} else {
if (UndefElts3[MaskVal - LHSVWidth]) {
NewUndefElts = true;
- UndefElts.set(i);
+ UndefElts.setBit(i);
}
}
}
@@ -973,7 +993,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
Ratio = VWidth/InVWidth;
for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
if (DemandedElts[OutIdx])
- InputDemandedElts.set(OutIdx/Ratio);
+ InputDemandedElts.setBit(OutIdx/Ratio);
}
} else {
// Untested so far.
@@ -985,7 +1005,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
Ratio = InVWidth/VWidth;
for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
if (DemandedElts[InIdx/Ratio])
- InputDemandedElts.set(InIdx);
+ InputDemandedElts.setBit(InIdx);
}
// div/rem demand all inputs, because they don't want divide by zero.
@@ -1004,7 +1024,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// undef.
for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
if (UndefElts2[OutIdx/Ratio])
- UndefElts.set(OutIdx);
+ UndefElts.setBit(OutIdx);
} else if (VWidth < InVWidth) {
llvm_unreachable("Unimp");
// If there are more elements in the source than there are in the result,
@@ -1013,7 +1033,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts = ~0ULL >> (64-VWidth); // Start out all undef.
for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
if (!UndefElts2[InIdx]) // Not undef?
- UndefElts.clear(InIdx/Ratio); // Clear undef bit.
+ UndefElts.clearBit(InIdx/Ratio); // Clear undef bit.
}
break;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index a58124d..5caa12d 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
/// is to leave as a vector operation.
static bool CheapToScalarize(Value *V, bool isConstant) {
- if (isa<ConstantAggregateZero>(V))
+ if (isa<ConstantAggregateZero>(V))
return true;
if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
if (isConstant) return true;
@@ -31,7 +31,7 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
}
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
-
+
// Insert element gets simplified to the inserted element or is deleted if
// this is constant idx extract element and its a constant idx insertelt.
if (I->getOpcode() == Instruction::InsertElement && isConstant &&
@@ -49,26 +49,24 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
(CheapToScalarize(CI->getOperand(0), isConstant) ||
CheapToScalarize(CI->getOperand(1), isConstant)))
return true;
-
+
return false;
}
-/// Read and decode a shufflevector mask.
-///
-/// It turns undef elements into values that are larger than the number of
-/// elements in the input.
-static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
+/// getShuffleMask - Read and decode a shufflevector mask.
+/// Turn undef elements into negative values.
+static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) {
unsigned NElts = SVI->getType()->getNumElements();
if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
- return std::vector<unsigned>(NElts, 0);
+ return std::vector<int>(NElts, 0);
if (isa<UndefValue>(SVI->getOperand(2)))
- return std::vector<unsigned>(NElts, 2*NElts);
-
- std::vector<unsigned> Result;
+ return std::vector<int>(NElts, -1);
+
+ std::vector<int> Result;
const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
if (isa<UndefValue>(*i))
- Result.push_back(NElts*2); // undef -> 8
+ Result.push_back(-1); // undef
else
Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
return Result;
@@ -83,42 +81,41 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
unsigned Width = PTy->getNumElements();
if (EltNo >= Width) // Out of range access.
return UndefValue::get(PTy->getElementType());
-
+
if (isa<UndefValue>(V))
return UndefValue::get(PTy->getElementType());
if (isa<ConstantAggregateZero>(V))
return Constant::getNullValue(PTy->getElementType());
if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
return CP->getOperand(EltNo);
-
+
if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
// If this is an insert to a variable element, we don't know what it is.
- if (!isa<ConstantInt>(III->getOperand(2)))
+ if (!isa<ConstantInt>(III->getOperand(2)))
return 0;
unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
-
+
// If this is an insert to the element we are looking for, return the
// inserted value.
- if (EltNo == IIElt)
+ if (EltNo == IIElt)
return III->getOperand(1);
-
+
// Otherwise, the insertelement doesn't modify the value, recurse on its
// vector input.
return FindScalarElement(III->getOperand(0), EltNo);
}
-
+
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
unsigned LHSWidth =
- cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
- unsigned InEl = getShuffleMask(SVI)[EltNo];
- if (InEl < LHSWidth)
- return FindScalarElement(SVI->getOperand(0), InEl);
- else if (InEl < LHSWidth*2)
- return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
- else
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+ int InEl = getShuffleMask(SVI)[EltNo];
+ if (InEl < 0)
return UndefValue::get(PTy->getElementType());
+ if (InEl < (int)LHSWidth)
+ return FindScalarElement(SVI->getOperand(0), InEl);
+ return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
}
-
+
// Otherwise, we don't know.
return 0;
}
@@ -127,11 +124,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If vector val is undef, replace extract with scalar undef.
if (isa<UndefValue>(EI.getOperand(0)))
return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
-
+
// If vector val is constant 0, replace extract with scalar 0.
if (isa<ConstantAggregateZero>(EI.getOperand(0)))
return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
-
+
if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
// If vector val is constant with all elements the same, replace EI with
// that element. When the elements are not identical, we cannot replace yet
@@ -139,53 +136,53 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
Constant *op0 = C->getOperand(0);
for (unsigned i = 1; i != C->getNumOperands(); ++i)
if (C->getOperand(i) != op0) {
- op0 = 0;
+ op0 = 0;
break;
}
if (op0)
return ReplaceInstUsesWith(EI, op0);
}
-
+
// If extracting a specified index from the vector, see if we can recursively
// find a previously computed scalar that was inserted into the vector.
if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
unsigned IndexVal = IdxC->getZExtValue();
unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
-
+
// If this is extracting an invalid index, turn this into undef, to avoid
// crashing the code below.
if (IndexVal >= VectorWidth)
return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
-
+
// This instruction only demands the single element from the input vector.
// If the input vector has a single use, simplify it based on this use
// property.
if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
APInt UndefElts(VectorWidth, 0);
APInt DemandedMask(VectorWidth, 0);
- DemandedMask.set(IndexVal);
+ DemandedMask.setBit(IndexVal);
if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
DemandedMask, UndefElts)) {
EI.setOperand(0, V);
return &EI;
}
}
-
+
if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
return ReplaceInstUsesWith(EI, Elt);
-
+
// If the this extractelement is directly using a bitcast from a vector of
// the same number of elements, see if we can find the source element from
// it. In this case, we will end up needing to bitcast the scalars.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
- if (const VectorType *VT =
+ if (const VectorType *VT =
dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
if (VT->getNumElements() == VectorWidth)
if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
return new BitCastInst(Elt, EI.getType());
}
}
-
+
if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
// Push extractelement into predecessor operation if legal and
// profitable to do so
@@ -193,11 +190,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
if (I->hasOneUse() &&
CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
Value *newEI0 =
- Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
- EI.getName()+".lhs");
+ Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+ EI.getName()+".lhs");
Value *newEI1 =
- Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
- EI.getName()+".rhs");
+ Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+ EI.getName()+".rhs");
return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
}
} else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
@@ -215,21 +212,22 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If this is extracting an element from a shufflevector, figure out where
// it came from and extract from the appropriate input element instead.
if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
- unsigned SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
+ int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
Value *Src;
unsigned LHSWidth =
- cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
-
- if (SrcIdx < LHSWidth)
+ cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+
+ if (SrcIdx < 0)
+ return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+ if (SrcIdx < (int)LHSWidth)
Src = SVI->getOperand(0);
- else if (SrcIdx < LHSWidth*2) {
+ else {
SrcIdx -= LHSWidth;
Src = SVI->getOperand(1);
- } else {
- return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
}
+ const Type *Int32Ty = Type::getInt32Ty(EI.getContext());
return ExtractElementInst::Create(Src,
- ConstantInt::get(Type::getInt32Ty(EI.getContext()),
+ ConstantInt::get(Int32Ty,
SrcIdx, false));
}
}
@@ -239,42 +237,42 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
}
/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
-/// elements from either LHS or RHS, return the shuffle mask and true.
+/// elements from either LHS or RHS, return the shuffle mask and true.
/// Otherwise, return false.
static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
std::vector<Constant*> &Mask) {
assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
-
+
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
return true;
}
-
+
if (V == LHS) {
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
return true;
}
-
+
if (V == RHS) {
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
i+NumElts));
return true;
}
-
+
if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
// If this is an insert of an extract from some other vector, include it.
Value *VecOp = IEI->getOperand(0);
Value *ScalarOp = IEI->getOperand(1);
Value *IdxOp = IEI->getOperand(2);
-
+
if (!isa<ConstantInt>(IdxOp))
return false;
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-
+
if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
// Okay, we can handle this if the vector we are insertinting into is
// transitively ok.
@@ -282,13 +280,13 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
// If so, update the mask to reflect the inserted undef.
Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
return true;
- }
+ }
} else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
if (isa<ConstantInt>(EI->getOperand(1)) &&
EI->getOperand(0)->getType() == V->getType()) {
unsigned ExtractedIdx =
cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
-
+
// This must be extracting from either LHS or RHS.
if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
// Okay, we can handle this if the vector we are insertinting into is
@@ -296,15 +294,14 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted value.
if (EI->getOperand(0) == LHS) {
- Mask[InsertedIdx % NumElts] =
+ Mask[InsertedIdx % NumElts] =
ConstantInt::get(Type::getInt32Ty(V->getContext()),
ExtractedIdx);
} else {
assert(EI->getOperand(0) == RHS);
- Mask[InsertedIdx % NumElts] =
+ Mask[InsertedIdx % NumElts] =
ConstantInt::get(Type::getInt32Ty(V->getContext()),
ExtractedIdx+NumElts);
-
}
return true;
}
@@ -313,7 +310,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
}
}
// TODO: Handle shufflevector here!
-
+
return false;
}
@@ -322,11 +319,11 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
/// that computes V and the LHS value of the shuffle.
static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
Value *&RHS) {
- assert(V->getType()->isVectorTy() &&
+ assert(V->getType()->isVectorTy() &&
(RHS == 0 || V->getType() == RHS->getType()) &&
"Invalid shuffle!");
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
-
+
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
return V;
@@ -338,25 +335,25 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
Value *VecOp = IEI->getOperand(0);
Value *ScalarOp = IEI->getOperand(1);
Value *IdxOp = IEI->getOperand(2);
-
+
if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
EI->getOperand(0)->getType() == V->getType()) {
unsigned ExtractedIdx =
- cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-
+
// Either the extracted from or inserted into vector must be RHSVec,
// otherwise we'd end up with a shuffle of three inputs.
if (EI->getOperand(0) == RHS || RHS == 0) {
RHS = EI->getOperand(0);
Value *V = CollectShuffleElements(VecOp, Mask, RHS);
- Mask[InsertedIdx % NumElts] =
- ConstantInt::get(Type::getInt32Ty(V->getContext()),
- NumElts+ExtractedIdx);
+ Mask[InsertedIdx % NumElts] =
+ ConstantInt::get(Type::getInt32Ty(V->getContext()),
+ NumElts+ExtractedIdx);
return V;
}
-
+
if (VecOp == RHS) {
Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
// Everything but the extracted element is replaced with the RHS.
@@ -367,7 +364,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
}
return V;
}
-
+
// If this insertelement is a chain that comes from exactly these two
// vectors, return the vector and the effective shuffle.
if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
@@ -376,7 +373,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
}
}
// TODO: Handle shufflevector here!
-
+
// Otherwise, can't do anything fancy. Return an identity vector.
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
@@ -387,32 +384,32 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
Value *VecOp = IE.getOperand(0);
Value *ScalarOp = IE.getOperand(1);
Value *IdxOp = IE.getOperand(2);
-
+
// Inserting an undef or into an undefined place, remove this.
if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
ReplaceInstUsesWith(IE, VecOp);
-
- // If the inserted element was extracted from some other vector, and if the
+
+ // If the inserted element was extracted from some other vector, and if the
// indexes are constant, try to turn this into a shufflevector operation.
if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
EI->getOperand(0)->getType() == IE.getType()) {
unsigned NumVectorElts = IE.getType()->getNumElements();
unsigned ExtractedIdx =
- cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
-
+
if (ExtractedIdx >= NumVectorElts) // Out of range extract.
return ReplaceInstUsesWith(IE, VecOp);
-
+
if (InsertedIdx >= NumVectorElts) // Out of range insert.
return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
-
+
// If we are extracting a value from a vector, then inserting it right
// back into the same place, just use the input vector.
if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
- return ReplaceInstUsesWith(IE, VecOp);
-
+ return ReplaceInstUsesWith(IE, VecOp);
+
// If this insertelement isn't used by some other insertelement, turn it
// (and any insertelements it points to), into one big shuffle.
if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
@@ -421,18 +418,20 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
if (RHS == 0) RHS = UndefValue::get(LHS->getType());
// We now have a shuffle of LHS, RHS, Mask.
- return new ShuffleVectorInst(LHS, RHS,
- ConstantVector::get(Mask));
+ return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
}
}
}
-
+
unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
- if (SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts))
+ if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
+ if (V != &IE)
+ return ReplaceInstUsesWith(IE, V);
return &IE;
-
+ }
+
return 0;
}
@@ -440,27 +439,29 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
- std::vector<unsigned> Mask = getShuffleMask(&SVI);
-
+ std::vector<int> Mask = getShuffleMask(&SVI);
+
bool MadeChange = false;
-
+
// Undefined shuffle mask -> undefined value.
if (isa<UndefValue>(SVI.getOperand(2)))
return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
-
+
unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
-
+
if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
return 0;
-
+
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
- if (SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ if (V != &SVI)
+ return ReplaceInstUsesWith(SVI, V);
LHS = SVI.getOperand(0);
RHS = SVI.getOperand(1);
MadeChange = true;
}
-
+
// Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
// Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
if (LHS == RHS || isa<UndefValue>(LHS)) {
@@ -468,16 +469,16 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// shuffle(undef,undef,mask) -> undef.
return ReplaceInstUsesWith(SVI, LHS);
}
-
+
// Remap any references to RHS to use LHS.
std::vector<Constant*> Elts;
for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- if (Mask[i] >= 2*e)
+ if (Mask[i] < 0)
Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
else {
- if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||
- (Mask[i] < e && isa<UndefValue>(LHS))) {
- Mask[i] = 2*e; // Turn into undef.
+ if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
+ (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
+ Mask[i] = -1; // Turn into undef.
Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
} else {
Mask[i] = Mask[i] % e; // Force to LHS.
@@ -493,59 +494,65 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
RHS = SVI.getOperand(1);
MadeChange = true;
}
-
+
// Analyze the shuffle, are the LHS or RHS and identity shuffles?
bool isLHSID = true, isRHSID = true;
-
+
for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- if (Mask[i] >= e*2) continue; // Ignore undef values.
+ if (Mask[i] < 0) continue; // Ignore undef values.
// Is this an identity shuffle of the LHS value?
- isLHSID &= (Mask[i] == i);
-
+ isLHSID &= (Mask[i] == (int)i);
+
// Is this an identity shuffle of the RHS value?
isRHSID &= (Mask[i]-e == i);
}
-
+
// Eliminate identity shuffles.
if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
-
+
// If the LHS is a shufflevector itself, see if we can combine it with this
// one without producing an unusual shuffle. Here we are really conservative:
// we are absolutely afraid of producing a shuffle mask not in the input
// program, because the code gen may not be smart enough to turn a merged
// shuffle into two specific shuffles: it may produce worse code. As such,
- // we only merge two shuffles if the result is one of the two input shuffle
- // masks. In this case, merging the shuffles just removes one instruction,
- // which we know is safe. This is good for things like turning:
- // (splat(splat)) -> splat.
+ // we only merge two shuffles if the result is either a splat or one of the
+ // two input shuffle masks. In this case, merging the shuffles just removes
+ // one instruction, which we know is safe. This is good for things like
+ // turning: (splat(splat)) -> splat.
if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
if (isa<UndefValue>(RHS)) {
- std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI);
-
+ std::vector<int> LHSMask = getShuffleMask(LHSSVI);
+
if (LHSMask.size() == Mask.size()) {
- std::vector<unsigned> NewMask;
- for (unsigned i = 0, e = Mask.size(); i != e; ++i)
- if (Mask[i] >= e)
- NewMask.push_back(2*e);
+ std::vector<int> NewMask;
+ bool isSplat = true;
+ int SplatElt = -1; // undef
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ int MaskElt;
+ if (Mask[i] < 0 || Mask[i] >= (int)e)
+ MaskElt = -1; // undef
else
- NewMask.push_back(LHSMask[Mask[i]]);
-
+ MaskElt = LHSMask[Mask[i]];
+ // Check if this could still be a splat.
+ if (MaskElt >= 0) {
+ if (SplatElt >=0 && SplatElt != MaskElt)
+ isSplat = false;
+ SplatElt = MaskElt;
+ }
+ NewMask.push_back(MaskElt);
+ }
+
// If the result mask is equal to the src shuffle or this
// shuffle mask, do the replacement.
- if (NewMask == LHSMask || NewMask == Mask) {
- unsigned LHSInNElts =
- cast<VectorType>(LHSSVI->getOperand(0)->getType())->
- getNumElements();
+ if (isSplat || NewMask == LHSMask || NewMask == Mask) {
std::vector<Constant*> Elts;
+ const Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
- if (NewMask[i] >= LHSInNElts*2) {
- Elts.push_back(UndefValue::get(
- Type::getInt32Ty(SVI.getContext())));
+ if (NewMask[i] < 0) {
+ Elts.push_back(UndefValue::get(Int32Ty));
} else {
- Elts.push_back(ConstantInt::get(
- Type::getInt32Ty(SVI.getContext()),
- NewMask[i]));
+ Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i]));
}
}
return new ShuffleVectorInst(LHSSVI->getOperand(0),
@@ -555,7 +562,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
}
}
}
-
+
return MadeChange ? &SVI : 0;
}
-
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index e46c679..37123d0 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -48,6 +48,7 @@
#include "llvm/Support/PatternMatch.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm-c/Initialization.h"
#include <algorithm>
#include <climits>
using namespace llvm;
@@ -57,11 +58,22 @@ STATISTIC(NumCombined , "Number of insts combined");
STATISTIC(NumConstProp, "Number of constant folds");
STATISTIC(NumDeadInst , "Number of dead inst eliminated");
STATISTIC(NumSunkInst , "Number of instructions sunk");
+STATISTIC(NumExpand, "Number of expansions");
+STATISTIC(NumFactor , "Number of factorizations");
+STATISTIC(NumReassoc , "Number of reassociations");
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+ initializeInstCombinerPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+ initializeInstCombine(*unwrap(R));
+}
char InstCombiner::ID = 0;
INITIALIZE_PASS(InstCombiner, "instcombine",
- "Combine redundant instructions", false, false);
+ "Combine redundant instructions", false, false)
void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreservedID(LCSSAID);
@@ -97,53 +109,326 @@ bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
}
-// SimplifyCommutative - This performs a few simplifications for commutative
-// operators:
+/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+/// operators which are associative or commutative:
+//
+// Commutative operators:
//
// 1. Order operands such that they are listed from right (least complex) to
// left (most complex). This puts constants before unary operators before
// binary operators.
//
-// 2. Transform: (op (op V, C1), C2) ==> (op V, (op C1, C2))
-// 3. Transform: (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
+// Associative operators:
+//
+// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+//
+// Associative and commutative operators:
+//
+// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+// if C1 and C2 are constants.
//
-bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
+bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
+ Instruction::BinaryOps Opcode = I.getOpcode();
bool Changed = false;
- if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1)))
- Changed = !I.swapOperands();
- if (!I.isAssociative()) return Changed;
-
- Instruction::BinaryOps Opcode = I.getOpcode();
- if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))
- if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {
- if (isa<Constant>(I.getOperand(1))) {
- Constant *Folded = ConstantExpr::get(I.getOpcode(),
- cast<Constant>(I.getOperand(1)),
- cast<Constant>(Op->getOperand(1)));
- I.setOperand(0, Op->getOperand(0));
- I.setOperand(1, Folded);
- return true;
+ do {
+ // Order operands such that they are listed from right (least complex) to
+ // left (most complex). This puts constants before unary operators before
+ // binary operators.
+ if (I.isCommutative() && getComplexity(I.getOperand(0)) <
+ getComplexity(I.getOperand(1)))
+ Changed = !I.swapOperands();
+
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
+
+ if (I.isAssociative()) {
+ // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = I.getOperand(1);
+
+ // Does "B op C" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) {
+ // It simplifies to V. Form "A op V".
+ I.setOperand(0, A);
+ I.setOperand(1, V);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
}
-
- if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1)))
- if (Op1->getOpcode() == Opcode && isa<Constant>(Op1->getOperand(1)) &&
- Op->hasOneUse() && Op1->hasOneUse()) {
- Constant *C1 = cast<Constant>(Op->getOperand(1));
- Constant *C2 = cast<Constant>(Op1->getOperand(1));
-
- // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
- Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
- Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
- Op1->getOperand(0),
- Op1->getName(), &I);
- Worklist.Add(New);
- I.setOperand(0, New);
- I.setOperand(1, Folded);
- return true;
+
+ // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = I.getOperand(0);
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "A op B" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) {
+ // It simplifies to V. Form "V op C".
+ I.setOperand(0, V);
+ I.setOperand(1, C);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ ++NumReassoc;
+ continue;
}
+ }
}
- return Changed;
+
+ if (I.isAssociative() && I.isCommutative()) {
+ // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = I.getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+ // It simplifies to V. Form "V op B".
+ I.setOperand(0, V);
+ I.setOperand(1, B);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+ if (Op1 && Op1->getOpcode() == Opcode) {
+ Value *A = I.getOperand(0);
+ Value *B = Op1->getOperand(0);
+ Value *C = Op1->getOperand(1);
+
+ // Does "C op A" simplify?
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+ // It simplifies to V. Form "B op V".
+ I.setOperand(0, B);
+ I.setOperand(1, V);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ ++NumReassoc;
+ continue;
+ }
+ }
+
+ // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+ // if C1 and C2 are constants.
+ if (Op0 && Op1 &&
+ Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
+ isa<Constant>(Op0->getOperand(1)) &&
+ isa<Constant>(Op1->getOperand(1)) &&
+ Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *A = Op0->getOperand(0);
+ Constant *C1 = cast<Constant>(Op0->getOperand(1));
+ Value *B = Op1->getOperand(0);
+ Constant *C2 = cast<Constant>(Op1->getOperand(1));
+
+ Constant *Folded = ConstantExpr::get(Opcode, C1, C2);
+ Instruction *New = BinaryOperator::Create(Opcode, A, B, Op1->getName(),
+ &I);
+ Worklist.Add(New);
+ I.setOperand(0, New);
+ I.setOperand(1, Folded);
+ // Conservatively clear the optional flags, since they may not be
+ // preserved by the reassociation.
+ I.clearSubclassOptionalData();
+ Changed = true;
+ continue;
+ }
+ }
+
+ // No further simplifications.
+ return Changed;
+ } while (1);
+}
+
+/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to
+/// "(X LOp Y) ROp (X LOp Z)".
+static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
+ Instruction::BinaryOps ROp) {
+ switch (LOp) {
+ default:
+ return false;
+
+ case Instruction::And:
+ // And distributes over Or and Xor.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::Or:
+ case Instruction::Xor:
+ return true;
+ }
+
+ case Instruction::Mul:
+ // Multiplication distributes over addition and subtraction.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::Add:
+ case Instruction::Sub:
+ return true;
+ }
+
+ case Instruction::Or:
+ // Or distributes over And.
+ switch (ROp) {
+ default:
+ return false;
+ case Instruction::And:
+ return true;
+ }
+ }
+}
+
+/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to
+/// "(X ROp Z) LOp (Y ROp Z)".
+static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
+ Instruction::BinaryOps ROp) {
+ if (Instruction::isCommutative(ROp))
+ return LeftDistributesOverRight(ROp, LOp);
+ // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
+ // but this requires knowing that the addition does not overflow and other
+ // such subtleties.
+ return false;
+}
+
+/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+/// which some other binary operation distributes over either by factorizing
+/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+/// a win). Returns the simplified value, or null if it didn't simplify.
+Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+ BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+ Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op
+
+ // Factorization.
+ if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) {
+ // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
+ // a common term.
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+ Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+ Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+ // Does "X op' Y" always equal "Y op' X"?
+ bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
+
+ // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
+ if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
+ // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+ // commutative case, "(A op' B) op (C op' A)"?
+ if (A == C || (InnerCommutative && A == D)) {
+ if (A != C)
+ std::swap(C, D);
+ // Consider forming "A op' (B op D)".
+ // If "B op D" simplifies then it can be formed with no cost.
+ Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD);
+ // If "B op D" doesn't simplify then only go on if both of the existing
+ // operations "A op' B" and "C op' D" will be zapped as no longer used.
+ if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+ V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName());
+ if (V) {
+ ++NumFactor;
+ V = Builder->CreateBinOp(InnerOpcode, A, V);
+ V->takeName(&I);
+ return V;
+ }
+ }
+
+ // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
+ if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
+ // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+ // commutative case, "(A op' B) op (B op' D)"?
+ if (B == D || (InnerCommutative && B == C)) {
+ if (B != D)
+ std::swap(C, D);
+ // Consider forming "(A op C) op' B".
+ // If "A op C" simplifies then it can be formed with no cost.
+ Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD);
+ // If "A op C" doesn't simplify then only go on if both of the existing
+ // operations "A op' B" and "C op' D" will be zapped as no longer used.
+ if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+ V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName());
+ if (V) {
+ ++NumFactor;
+ V = Builder->CreateBinOp(InnerOpcode, V, B);
+ V->takeName(&I);
+ return V;
+ }
+ }
+ }
+
+ // Expansion.
+ if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
+ // The instruction has the form "(A op' B) op C". See if expanding it out
+ // to "(A op C) op' (B op C)" results in simplifications.
+ Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+ Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+ // Do "A op C" and "B op C" both simplify?
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) {
+ // They do! Return "L op' R".
+ ++NumExpand;
+ // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+ if ((L == A && R == B) ||
+ (Instruction::isCommutative(InnerOpcode) && L == B && R == A))
+ return Op0;
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+ return V;
+ // Otherwise, create a new instruction.
+ C = Builder->CreateBinOp(InnerOpcode, L, R);
+ C->takeName(&I);
+ return C;
+ }
+ }
+
+ if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
+ // The instruction has the form "A op (B op' C)". See if expanding it out
+ // to "(A op B) op' (A op C)" results in simplifications.
+ Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+ Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
+
+ // Do "A op B" and "A op C" both simplify?
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) {
+ // They do! Return "L op' R".
+ ++NumExpand;
+ // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+ if ((L == B && R == C) ||
+ (Instruction::isCommutative(InnerOpcode) && L == C && R == B))
+ return Op1;
+ // Otherwise return "L op' R" if it simplifies.
+ if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+ return V;
+ // Otherwise, create a new instruction.
+ A = Builder->CreateBinOp(InnerOpcode, L, R);
+ A->takeName(&I);
+ return A;
+ }
+ }
+
+ return 0;
}
// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
@@ -185,8 +470,9 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const {
static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
InstCombiner *IC) {
- if (CastInst *CI = dyn_cast<CastInst>(&I))
+ if (CastInst *CI = dyn_cast<CastInst>(&I)) {
return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
+ }
// Figure out if the constant is the left or the right argument.
bool ConstIsRHS = isa<Constant>(I.getOperand(1));
@@ -228,11 +514,24 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
// Bool selects with constant operands can be folded to logical ops.
if (SI->getType()->isIntegerTy(1)) return 0;
+ // If it's a bitcast involving vectors, make sure it has the same number of
+ // elements on both sides.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) {
+ const VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
+ const VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
+
+ // Verify that either both or neither are vectors.
+ if ((SrcTy == NULL) != (DestTy == NULL)) return 0;
+ // If vectors, verify that they have the same number of elements.
+ if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
+ return 0;
+ }
+
Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
- return SelectInst::Create(SI->getCondition(), SelectTrueVal,
- SelectFalseVal);
+ return SelectInst::Create(SI->getCondition(),
+ SelectTrueVal, SelectFalseVal);
}
return 0;
}
@@ -242,20 +541,25 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
/// has a PHI node as operand #0, see if we can fold the instruction into the
/// PHI (which is only possible if all operands to the PHI are constants).
///
-/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms
-/// that would normally be unprofitable because they strongly encourage jump
-/// threading.
-Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
- bool AllowAggressive) {
- AllowAggressive = false;
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
PHINode *PN = cast<PHINode>(I.getOperand(0));
unsigned NumPHIValues = PN->getNumIncomingValues();
- if (NumPHIValues == 0 ||
- // We normally only transform phis with a single use, unless we're trying
- // hard to make jump threading happen.
- (!PN->hasOneUse() && !AllowAggressive))
+ if (NumPHIValues == 0)
return 0;
+ // We normally only transform phis with a single use. However, if a PHI has
+ // multiple uses and they are all the same operation, we can fold *all* of the
+ // uses into the PHI.
+ if (!PN->hasOneUse()) {
+ // Walk the use list for the instruction, comparing them to I.
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (User != &I && !I.isIdenticalTo(User))
+ return 0;
+ }
+ // Otherwise, we can replace *all* users with the new PHI we form.
+ }
// Check to see if all of the operands of the PHI are simple constants
// (constantint/constantfp/undef). If there is one non-constant value,
@@ -263,24 +567,34 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
// bail out. We don't do arbitrary constant expressions here because moving
// their computation can be expensive without a cost model.
BasicBlock *NonConstBB = 0;
- for (unsigned i = 0; i != NumPHIValues; ++i)
- if (!isa<Constant>(PN->getIncomingValue(i)) ||
- isa<ConstantExpr>(PN->getIncomingValue(i))) {
- if (NonConstBB) return 0; // More than one non-const value.
- if (isa<PHINode>(PN->getIncomingValue(i))) return 0; // Itself a phi.
- NonConstBB = PN->getIncomingBlock(i);
-
- // If the incoming non-constant value is in I's block, we have an infinite
- // loop.
- if (NonConstBB == I.getParent())
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
+ continue;
+
+ if (isa<PHINode>(InVal)) return 0; // Itself a phi.
+ if (NonConstBB) return 0; // More than one non-const value.
+
+ NonConstBB = PN->getIncomingBlock(i);
+
+ // If the InVal is an invoke at the end of the pred block, then we can't
+ // insert a computation after it without breaking the edge.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+ if (II->getParent() == NonConstBB)
return 0;
- }
+
+ // If the incoming non-constant value is in I's block, we will remove one
+ // instruction, but insert another equivalent one, leading to infinite
+ // instcombine.
+ if (NonConstBB == I.getParent())
+ return 0;
+ }
// If there is exactly one non-constant value, we can insert a copy of the
// operation in that block. However, if this is a critical edge, we would be
// inserting the computation one some other paths (e.g. inside a loop). Only
// do this if the pred block is unconditionally branching into the phi block.
- if (NonConstBB != 0 && !AllowAggressive) {
+ if (NonConstBB != 0) {
BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
if (!BI || !BI->isUnconditional()) return 0;
}
@@ -290,7 +604,12 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
NewPN->reserveOperandSpace(PN->getNumOperands()/2);
InsertNewInstBefore(NewPN, *PN);
NewPN->takeName(PN);
-
+
+ // If we are going to have to insert a new computation, do so right before the
+ // predecessors terminator.
+ if (NonConstBB)
+ Builder->SetInsertPoint(NonConstBB->getTerminator());
+
// Next, add all of the operands to the PHI.
if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
// We only currently try to fold the condition of a select when it is a phi,
@@ -303,42 +622,36 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
Value *InV = 0;
- if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
- } else {
- assert(PN->getIncomingBlock(i) == NonConstBB);
- InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred,
- FalseVInPred,
- "phitmp", NonConstBB->getTerminator());
- Worklist.Add(cast<Instruction>(InV));
- }
+ else
+ InV = Builder->CreateSelect(PN->getIncomingValue(i),
+ TrueVInPred, FalseVInPred, "phitmp");
NewPN->addIncoming(InV, ThisBB);
}
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
+ Constant *C = cast<Constant>(I.getOperand(1));
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ Value *InV = 0;
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
+ else if (isa<ICmpInst>(CI))
+ InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phitmp");
+ else
+ InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
+ C, "phitmp");
+ NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+ }
} else if (I.getNumOperands() == 2) {
Constant *C = cast<Constant>(I.getOperand(1));
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InV = 0;
- if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
- if (CmpInst *CI = dyn_cast<CmpInst>(&I))
- InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
- else
- InV = ConstantExpr::get(I.getOpcode(), InC, C);
- } else {
- assert(PN->getIncomingBlock(i) == NonConstBB);
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
- InV = BinaryOperator::Create(BO->getOpcode(),
- PN->getIncomingValue(i), C, "phitmp",
- NonConstBB->getTerminator());
- else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
- InV = CmpInst::Create(CI->getOpcode(),
- CI->getPredicate(),
- PN->getIncomingValue(i), C, "phitmp",
- NonConstBB->getTerminator());
- else
- llvm_unreachable("Unknown binop!");
-
- Worklist.Add(cast<Instruction>(InV));
- }
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ InV = ConstantExpr::get(I.getOpcode(), InC, C);
+ else
+ InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
+ PN->getIncomingValue(i), C, "phitmp");
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
} else {
@@ -346,18 +659,22 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I,
const Type *RetTy = CI->getType();
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InV;
- if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
+ if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
- } else {
- assert(PN->getIncomingBlock(i) == NonConstBB);
- InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i),
- I.getType(), "phitmp",
- NonConstBB->getTerminator());
- Worklist.Add(cast<Instruction>(InV));
- }
+ else
+ InV = Builder->CreateCast(CI->getOpcode(),
+ PN->getIncomingValue(i), I.getType(), "phitmp");
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
}
+
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ) {
+ Instruction *User = cast<Instruction>(*UI++);
+ if (User == &I) continue;
+ ReplaceInstUsesWith(*User, NewPN);
+ EraseInstFromFunction(*User);
+ }
return ReplaceInstUsesWith(I, NewPN);
}
@@ -432,28 +749,35 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Value *PtrOp = GEP.getOperand(0);
- if (isa<UndefValue>(GEP.getOperand(0)))
- return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));
-
- // Eliminate unneeded casts for indices.
+ // Eliminate unneeded casts for indices, and replace indices which displace
+ // by multiples of a zero size type with zero.
if (TD) {
bool MadeChange = false;
- unsigned PtrSize = TD->getPointerSizeInBits();
-
+ const Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
+
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
I != E; ++I, ++GTI) {
- if (!isa<SequentialType>(*GTI)) continue;
-
- // If we are using a wider index than needed for this platform, shrink it
- // to what we need. If narrower, sign-extend it to what we need. This
- // explicit cast can make subsequent optimizations more obvious.
- unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth();
- if (OpBits == PtrSize)
- continue;
-
- *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true);
- MadeChange = true;
+ // Skip indices into struct types.
+ const SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+ if (!SeqTy) continue;
+
+ // If the element type has zero size then any index over it is equivalent
+ // to an index of zero, so replace it with zero if it is not zero already.
+ if (SeqTy->getElementType()->isSized() &&
+ TD->getTypeAllocSize(SeqTy->getElementType()) == 0)
+ if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+ *I = Constant::getNullValue(IntPtrTy);
+ MadeChange = true;
+ }
+
+ if ((*I)->getType() != IntPtrTy) {
+ // If we are using a wider index than needed for this platform, shrink
+ // it to what we need. If narrower, sign-extend it to what we need.
+ // This explicit cast can make subsequent optimizations more obvious.
+ *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ MadeChange = true;
+ }
}
if (MadeChange) return &GEP;
}
@@ -940,6 +1264,14 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
EraseInstFromFunction(*II);
return BinaryOperator::CreateAdd(LHS, RHS);
}
+
+ // If the normal result of the add is dead, and the RHS is a constant,
+ // we can transform this into a range comparison.
+ // overflow = uadd a, -4 --> overflow = icmp ugt a, 3
+ if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1)))
+ return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0),
+ ConstantExpr::getNot(CI));
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
@@ -964,10 +1296,37 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
}
}
}
- // Can't simplify extracts from other values. Note that nested extracts are
- // already simplified implicitely by the above (extract ( extract (insert) )
+ if (LoadInst *L = dyn_cast<LoadInst>(Agg))
+ // If the (non-volatile) load only has one use, we can rewrite this to a
+ // load from a GEP. This reduces the size of the load.
+ // FIXME: If a load is used only by extractvalue instructions then this
+ // could be done regardless of having multiple uses.
+ if (!L->isVolatile() && L->hasOneUse()) {
+ // extractvalue has integer indices, getelementptr has Value*s. Convert.
+ SmallVector<Value*, 4> Indices;
+ // Prefix an i32 0 since we need the first element.
+ Indices.push_back(Builder->getInt32(0));
+ for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
+ I != E; ++I)
+ Indices.push_back(Builder->getInt32(*I));
+
+ // We need to insert these at the location of the old load, not at that of
+ // the extractvalue.
+ Builder->SetInsertPoint(L->getParent(), L);
+ Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(),
+ Indices.begin(), Indices.end());
+ // Returning the load directly will cause the main loop to insert it in
+ // the wrong spot, so use ReplaceInstUsesWith().
+ return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
+ }
+ // We could simplify extracts from other values. Note that nested extracts may
+ // already be simplified implicitly by the above: extract (extract (insert) )
// will be translated into extract ( insert ( extract ) ) first and then just
- // the value inserted, if appropriate).
+ // the value inserted, if appropriate. Similarly for extracts from single-use
+ // loads: extract (extract (load)) will be translated to extract (load (gep))
+ // and if again single-use then via load (gep (gep)) to load (gep).
+ // However, double extracts from e.g. function arguments or return values
+ // aren't handled yet.
return 0;
}
@@ -1023,10 +1382,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
bool MadeIRChange = false;
SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(BB);
-
- std::vector<Instruction*> InstrsForInstCombineWorklist;
- InstrsForInstCombineWorklist.reserve(128);
+ SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
do {
@@ -1231,6 +1588,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
DEBUG(errs() << "IC: Old = " << *I << '\n'
<< " New = " << *Result << '\n');
+ Result->setDebugLoc(I->getDebugLoc());
// Everything uses the new instruction now.
I->replaceAllUsesWith(Result);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
index a77d70c..1d31fcc 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -17,6 +17,7 @@
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "insert-edge-profiling"
+
#include "ProfilingUtils.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
@@ -34,7 +35,9 @@ namespace {
bool runOnModule(Module &M);
public:
static char ID; // Pass identification, replacement for typeid
- EdgeProfiler() : ModulePass(ID) {}
+ EdgeProfiler() : ModulePass(ID) {
+ initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
+ }
virtual const char *getPassName() const {
return "Edge Profiler";
@@ -44,7 +47,7 @@ namespace {
char EdgeProfiler::ID = 0;
INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
- "Insert instrumentation for edge profiling", false, false);
+ "Insert instrumentation for edge profiling", false, false)
ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
@@ -98,7 +101,7 @@ bool EdgeProfiler::runOnModule(Module &M) {
// otherwise insert it in the successor block.
if (TI->getNumSuccessors() == 1) {
// Insert counter at the start of the block
- IncrementCounterInBlock(BB, i++, Counters);
+ IncrementCounterInBlock(BB, i++, Counters, false);
} else {
// Insert counter at the start of the block
IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
new file mode 100644
index 0000000..96ed4fa
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -0,0 +1,32 @@
+//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// Instrumentation library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeInstrumentation - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeInstrumentation(PassRegistry &Registry) {
+ initializeEdgeProfilerPass(Registry);
+ initializeOptimalEdgeProfilerPass(Registry);
+ initializePathProfilerPass(Registry);
+}
+
+/// LLVMInitializeInstrumentation - C binding for
+/// initializeInstrumentation.
+void LLVMInitializeInstrumentation(LLVMPassRegistryRef R) {
+ initializeInstrumentation(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index 8eec987..c85a1a9 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -36,7 +36,9 @@ namespace {
bool runOnModule(Module &M);
public:
static char ID; // Pass identification, replacement for typeid
- OptimalEdgeProfiler() : ModulePass(ID) {}
+ OptimalEdgeProfiler() : ModulePass(ID) {
+ initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(ProfileEstimatorPassID);
@@ -50,9 +52,14 @@ namespace {
}
char OptimalEdgeProfiler::ID = 0;
-INITIALIZE_PASS(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+ "Insert optimal instrumentation for edge profiling",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
"Insert optimal instrumentation for edge profiling",
- false, false);
+ false, false)
ModulePass *llvm::createOptimalEdgeProfilerPass() {
return new OptimalEdgeProfiler();
@@ -125,11 +132,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
// Calculate a Maximum Spanning Tree with the edge weights determined by
// ProfileEstimator. ProfileEstimator also assign weights to the virtual
// edges (0,entry) and (BB,0) (for blocks with no successors) and this
- // edges also participate in the maximum spanning tree calculation.
+ // edges also participate in the maximum spanning tree calculation.
// The third parameter of MaximumSpanningTree() has the effect that not the
// actual MST is returned but the edges _not_ in the MST.
- ProfileInfo::EdgeWeights ECs =
+ ProfileInfo::EdgeWeights ECs =
getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
MaximumSpanningTree<BasicBlock> MST (EdgeVector);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
new file mode 100644
index 0000000..6449b39
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -0,0 +1,1423 @@
+//===- PathProfiling.cpp - Inserts counters for path profiling ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments functions for Ball-Larus path profiling. Ball-Larus
+// profiling converts the CFG into a DAG by replacing backedges with edges
+// from entry to the start block and from the end block to exit. The paths
+// along the new DAG are enumrated, i.e. each path is given a path number.
+// Edges are instrumented to increment the path number register, such that the
+// path number register will equal the path number of the path taken at the
+// exit.
+//
+// This file defines classes for building a CFG for use with different stages
+// in the Ball-Larus path profiling instrumentation [Ball96]. The
+// requirements are formatting the llvm CFG into the Ball-Larus DAG, path
+// numbering, finding a spanning tree, moving increments from the spanning
+// tree to chords.
+//
+// Terms:
+// DAG - Directed Acyclic Graph.
+// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges
+// removed in the following manner. For every backedge
+// v->w, insert edge ENTRY->w and edge v->EXIT.
+// Path Number - The number corresponding to a specific path through a
+// Ball-Larus DAG.
+// Spanning Tree - A subgraph, S, is a spanning tree if S covers all
+// vertices and is a tree.
+// Chord - An edge not in the spanning tree.
+//
+// [Ball96]
+// T. Ball and J. R. Larus. "Efficient Path Profiling."
+// International Symposium on Microarchitecture, pages 46-57, 1996.
+// http://portal.acm.org/citation.cfm?id=243857
+//
+// [Ball94]
+// Thomas Ball. "Efficiently Counting Program Events with Support for
+// On-line queries."
+// ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5,
+// September 1994, Pages 1399-1410.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-path-profiling"
+
+#include "llvm/DerivedTypes.h"
+#include "ProfilingUtils.h"
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include <map>
+#include <vector>
+
+#define HASH_THRESHHOLD 100000
+
+using namespace llvm;
+
+namespace {
+class BLInstrumentationNode;
+class BLInstrumentationEdge;
+class BLInstrumentationDag;
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationNode extends BallLarusNode with member used by the
+// instrumentation algortihms.
+// ---------------------------------------------------------------------------
+class BLInstrumentationNode : public BallLarusNode {
+public:
+ // Creates a new BLInstrumentationNode from a BasicBlock.
+ BLInstrumentationNode(BasicBlock* BB);
+
+ // Get/sets the Value corresponding to the pathNumber register,
+ // constant or phinode. Used by the instrumentation code to remember
+ // path number Values.
+ Value* getStartingPathNumber();
+ void setStartingPathNumber(Value* pathNumber);
+
+ Value* getEndingPathNumber();
+ void setEndingPathNumber(Value* pathNumber);
+
+ // Get/set the PHINode Instruction for this node.
+ PHINode* getPathPHI();
+ void setPathPHI(PHINode* pathPHI);
+
+private:
+
+ Value* _startingPathNumber; // The Value for the current pathNumber.
+ Value* _endingPathNumber; // The Value for the current pathNumber.
+ PHINode* _pathPHI; // The PHINode for current pathNumber.
+};
+
+// --------------------------------------------------------------------------
+// BLInstrumentationEdge extends BallLarusEdge with data about the
+// instrumentation that will end up on each edge.
+// --------------------------------------------------------------------------
+class BLInstrumentationEdge : public BallLarusEdge {
+public:
+ BLInstrumentationEdge(BLInstrumentationNode* source,
+ BLInstrumentationNode* target);
+
+ // Sets the target node of this edge. Required to split edges.
+ void setTarget(BallLarusNode* node);
+
+ // Get/set whether edge is in the spanning tree.
+ bool isInSpanningTree() const;
+ void setIsInSpanningTree(bool isInSpanningTree);
+
+ // Get/ set whether this edge will be instrumented with a path number
+ // initialization.
+ bool isInitialization() const;
+ void setIsInitialization(bool isInitialization);
+
+ // Get/set whether this edge will be instrumented with a path counter
+ // increment. Notice this is incrementing the path counter
+ // corresponding to the path number register. The path number
+ // increment is determined by getIncrement().
+ bool isCounterIncrement() const;
+ void setIsCounterIncrement(bool isCounterIncrement);
+
+ // Get/set the path number increment that this edge will be instrumented
+ // with. This is distinct from the path counter increment and the
+ // weight. The counter increment counts the number of executions of
+ // some path, whereas the path number keeps track of which path number
+ // the program is on.
+ long getIncrement() const;
+ void setIncrement(long increment);
+
+ // Get/set whether the edge has been instrumented.
+ bool hasInstrumentation();
+ void setHasInstrumentation(bool hasInstrumentation);
+
+ // Returns the successor number of this edge in the source.
+ unsigned getSuccessorNumber();
+
+private:
+ // The increment that the code will be instrumented with.
+ long long _increment;
+
+ // Whether this edge is in the spanning tree.
+ bool _isInSpanningTree;
+
+ // Whether this edge is an initialiation of the path number.
+ bool _isInitialization;
+
+ // Whether this edge is a path counter increment.
+ bool _isCounterIncrement;
+
+ // Whether this edge has been instrumented.
+ bool _hasInstrumentation;
+};
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationDag extends BallLarusDag with algorithms that
+// determine where instrumentation should be placed.
+// ---------------------------------------------------------------------------
+class BLInstrumentationDag : public BallLarusDag {
+public:
+ BLInstrumentationDag(Function &F);
+
+ // Returns the Exit->Root edge. This edge is required for creating
+ // directed cycles in the algorithm for moving instrumentation off of
+ // the spanning tree
+ BallLarusEdge* getExitRootEdge();
+
+ // Returns an array of phony edges which mark those nodes
+ // with function calls
+ BLEdgeVector getCallPhonyEdges();
+
+ // Gets/sets the path counter array
+ GlobalVariable* getCounterArray();
+ void setCounterArray(GlobalVariable* c);
+
+ // Calculates the increments for the chords, thereby removing
+ // instrumentation from the spanning tree edges. Implementation is based
+ // on the algorithm in Figure 4 of [Ball94]
+ void calculateChordIncrements();
+
+ // Updates the state when an edge has been split
+ void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock);
+
+ // Calculates a spanning tree of the DAG ignoring cycles. Whichever
+ // edges are in the spanning tree will not be instrumented, but this
+ // implementation does not try to minimize the instrumentation overhead
+ // by trying to find hot edges.
+ void calculateSpanningTree();
+
+ // Pushes initialization further down in order to group the first
+ // increment and initialization.
+ void pushInitialization();
+
+ // Pushes the path counter increments up in order to group the last path
+ // number increment.
+ void pushCounters();
+
+ // Removes phony edges from the successor list of the source, and the
+ // predecessor list of the target.
+ void unlinkPhony();
+
+ // Generate dot graph for the function
+ void generateDotGraph();
+
+protected:
+ // BLInstrumentationDag creates BLInstrumentationNode objects in this
+ // method overriding the creation of BallLarusNode objects.
+ //
+ // Allows subclasses to determine which type of Node is created.
+ // Override this method to produce subclasses of BallLarusNode if
+ // necessary.
+ virtual BallLarusNode* createNode(BasicBlock* BB);
+
+ // BLInstrumentationDag create BLInstrumentationEdges.
+ //
+ // Allows subclasses to determine which type of Edge is created.
+ // Override this method to produce subclasses of BallLarusEdge if
+ // necessary. Parameters source and target will have been created by
+ // createNode and can be cast to the subclass of BallLarusNode*
+ // returned by createNode.
+ virtual BallLarusEdge* createEdge(
+ BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber);
+
+private:
+ BLEdgeVector _treeEdges; // All edges in the spanning tree.
+ BLEdgeVector _chordEdges; // All edges not in the spanning tree.
+ GlobalVariable* _counterArray; // Array to store path counters
+
+ // Removes the edge from the appropriate predecessor and successor lists.
+ void unlinkEdge(BallLarusEdge* edge);
+
+ // Makes an edge part of the spanning tree.
+ void makeEdgeSpanning(BLInstrumentationEdge* edge);
+
+ // Pushes initialization and calls itself recursively.
+ void pushInitializationFromEdge(BLInstrumentationEdge* edge);
+
+ // Pushes path counter increments up recursively.
+ void pushCountersFromEdge(BLInstrumentationEdge* edge);
+
+ // Depth first algorithm for determining the chord increments.f
+ void calculateChordIncrementsDfs(
+ long weight, BallLarusNode* v, BallLarusEdge* e);
+
+ // Determines the relative direction of two edges.
+ int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f);
+};
+
+// ---------------------------------------------------------------------------
+// PathProfiler is a module pass which intruments path profiling instructions
+// ---------------------------------------------------------------------------
+class PathProfiler : public ModulePass {
+private:
+ // Current context for multi threading support.
+ LLVMContext* Context;
+
+ // Which function are we currently instrumenting
+ unsigned currentFunctionNumber;
+
+ // The function prototype in the profiling runtime for incrementing a
+ // single path counter in a hash table.
+ Constant* llvmIncrementHashFunction;
+ Constant* llvmDecrementHashFunction;
+
+ // Instruments each function with path profiling. 'main' is instrumented
+ // with code to save the profile to disk.
+ bool runOnModule(Module &M);
+
+ // Analyzes the function for Ball-Larus path profiling, and inserts code.
+ void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M);
+
+ // Creates an increment constant representing incr.
+ ConstantInt* createIncrementConstant(long incr, int bitsize);
+
+ // Creates an increment constant representing the value in
+ // edge->getIncrement().
+ ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge);
+
+ // Finds the insertion point after pathNumber in block. PathNumber may
+ // be NULL.
+ BasicBlock::iterator getInsertionPoint(
+ BasicBlock* block, Value* pathNumber);
+
+ // Inserts source's pathNumber Value* into target. Target may or may not
+ // have multiple predecessors, and may or may not have its phiNode
+ // initalized.
+ void pushValueIntoNode(
+ BLInstrumentationNode* source, BLInstrumentationNode* target);
+
+ // Inserts source's pathNumber Value* into the appropriate slot of
+ // target's phiNode.
+ void pushValueIntoPHI(
+ BLInstrumentationNode* target, BLInstrumentationNode* source);
+
+ // The Value* in node, oldVal, is updated with a Value* correspodning to
+ // oldVal + addition.
+ void insertNumberIncrement(BLInstrumentationNode* node, Value* addition,
+ bool atBeginning);
+
+ // Creates a counter increment in the given node. The Value* in node is
+ // taken as the index into a hash table.
+ void insertCounterIncrement(
+ Value* incValue,
+ BasicBlock::iterator insertPoint,
+ BLInstrumentationDag* dag,
+ bool increment = true);
+
+ // A PHINode is created in the node, and its values initialized to -1U.
+ void preparePHI(BLInstrumentationNode* node);
+
+ // Inserts instrumentation for the given edge
+ //
+ // Pre: The edge's source node has pathNumber set if edge is non zero
+ // path number increment.
+ //
+ // Post: Edge's target node has a pathNumber set to the path number Value
+ // corresponding to the value of the path register after edge's
+ // execution.
+ void insertInstrumentationStartingAt(
+ BLInstrumentationEdge* edge,
+ BLInstrumentationDag* dag);
+
+ // If this edge is a critical edge, then inserts a node at this edge.
+ // This edge becomes the first edge, and a new BallLarusEdge is created.
+ bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag);
+
+ // Inserts instrumentation according to the marked edges in dag. Phony
+ // edges must be unlinked from the DAG, but accessible from the
+ // backedges. Dag must have initializations, path number increments, and
+ // counter increments present.
+ //
+ // Counter storage is created here.
+ void insertInstrumentation( BLInstrumentationDag& dag, Module &M);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ PathProfiler() : ModulePass(ID) {
+ initializePathProfilerPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char *getPassName() const {
+ return "Path Profiler";
+ }
+};
+} // end anonymous namespace
+
+// Should we print the dot-graphs
+static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden,
+ cl::desc("Output the path profiling DAG for each function."));
+
+// Register the path profiler as a pass
+char PathProfiler::ID = 0;
+INITIALIZE_PASS(PathProfiler, "insert-path-profiling",
+ "Insert instrumentation for Ball-Larus path profiling",
+ false, false)
+
+ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); }
+
+namespace llvm {
+ class PathProfilingFunctionTable {};
+
+ // Type for global array storing references to hashes or arrays
+ template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable,
+ xcompile> {
+ public:
+ static const StructType *get(LLVMContext& C) {
+ return( StructType::get(
+ C, TypeBuilder<types::i<32>, xcompile>::get(C), // type
+ TypeBuilder<types::i<32>, xcompile>::get(C), // array size
+ TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
+ NULL));
+ }
+ };
+
+ typedef TypeBuilder<PathProfilingFunctionTable, true>
+ ftEntryTypeBuilder;
+
+ // BallLarusEdge << operator overloading
+ raw_ostream& operator<<(raw_ostream& os,
+ const BLInstrumentationEdge& edge) {
+ os << "[" << edge.getSource()->getName() << " -> "
+ << edge.getTarget()->getName() << "] init: "
+ << (edge.isInitialization() ? "yes" : "no")
+ << " incr:" << edge.getIncrement() << " cinc: "
+ << (edge.isCounterIncrement() ? "yes" : "no");
+ return(os);
+ }
+}
+
+// Creates a new BLInstrumentationNode from a BasicBlock.
+BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) :
+ BallLarusNode(BB),
+ _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {}
+
+// Constructor for BLInstrumentationEdge.
+BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source,
+ BLInstrumentationNode* target)
+ : BallLarusEdge(source, target, 0),
+ _increment(0), _isInSpanningTree(false), _isInitialization(false),
+ _isCounterIncrement(false), _hasInstrumentation(false) {}
+
+// Sets the target node of this edge. Required to split edges.
+void BLInstrumentationEdge::setTarget(BallLarusNode* node) {
+ _target = node;
+}
+
+// Returns whether this edge is in the spanning tree.
+bool BLInstrumentationEdge::isInSpanningTree() const {
+ return(_isInSpanningTree);
+}
+
+// Sets whether this edge is in the spanning tree.
+void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) {
+ _isInSpanningTree = isInSpanningTree;
+}
+
+// Returns whether this edge will be instrumented with a path number
+// initialization.
+bool BLInstrumentationEdge::isInitialization() const {
+ return(_isInitialization);
+}
+
+// Sets whether this edge will be instrumented with a path number
+// initialization.
+void BLInstrumentationEdge::setIsInitialization(bool isInitialization) {
+ _isInitialization = isInitialization;
+}
+
+// Returns whether this edge will be instrumented with a path counter
+// increment. Notice this is incrementing the path counter
+// corresponding to the path number register. The path number
+// increment is determined by getIncrement().
+bool BLInstrumentationEdge::isCounterIncrement() const {
+ return(_isCounterIncrement);
+}
+
+// Sets whether this edge will be instrumented with a path counter
+// increment.
+void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) {
+ _isCounterIncrement = isCounterIncrement;
+}
+
+// Gets the path number increment that this edge will be instrumented
+// with. This is distinct from the path counter increment and the
+// weight. The counter increment is counts the number of executions of
+// some path, whereas the path number keeps track of which path number
+// the program is on.
+long BLInstrumentationEdge::getIncrement() const {
+ return(_increment);
+}
+
+// Set whether this edge will be instrumented with a path number
+// increment.
+void BLInstrumentationEdge::setIncrement(long increment) {
+ _increment = increment;
+}
+
+// True iff the edge has already been instrumented.
+bool BLInstrumentationEdge::hasInstrumentation() {
+ return(_hasInstrumentation);
+}
+
+// Set whether this edge has been instrumented.
+void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) {
+ _hasInstrumentation = hasInstrumentation;
+}
+
+// Returns the successor number of this edge in the source.
+unsigned BLInstrumentationEdge::getSuccessorNumber() {
+ BallLarusNode* sourceNode = getSource();
+ BallLarusNode* targetNode = getTarget();
+ BasicBlock* source = sourceNode->getBlock();
+ BasicBlock* target = targetNode->getBlock();
+
+ if(source == NULL || target == NULL)
+ return(0);
+
+ TerminatorInst* terminator = source->getTerminator();
+
+ unsigned i;
+ for(i=0; i < terminator->getNumSuccessors(); i++) {
+ if(terminator->getSuccessor(i) == target)
+ break;
+ }
+
+ return(i);
+}
+
+// BLInstrumentationDag constructor initializes a DAG for the given Function.
+BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F),
+ _counterArray(0) {
+}
+
+// Returns the Exit->Root edge. This edge is required for creating
+// directed cycles in the algorithm for moving instrumentation off of
+// the spanning tree
+BallLarusEdge* BLInstrumentationDag::getExitRootEdge() {
+ BLEdgeIterator erEdge = getExit()->succBegin();
+ return(*erEdge);
+}
+
+BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () {
+ BLEdgeVector callEdges;
+
+ for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+ edge != end; edge++ ) {
+ if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY )
+ callEdges.push_back(*edge);
+ }
+
+ return callEdges;
+}
+
+// Gets the path counter array
+GlobalVariable* BLInstrumentationDag::getCounterArray() {
+ return _counterArray;
+}
+
+void BLInstrumentationDag::setCounterArray(GlobalVariable* c) {
+ _counterArray = c;
+}
+
+// Calculates the increment for the chords, thereby removing
+// instrumentation from the spanning tree edges. Implementation is based on
+// the algorithm in Figure 4 of [Ball94]
+void BLInstrumentationDag::calculateChordIncrements() {
+ calculateChordIncrementsDfs(0, getRoot(), NULL);
+
+ BLInstrumentationEdge* chord;
+ for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+ end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+ chord = (BLInstrumentationEdge*) *chordEdge;
+ chord->setIncrement(chord->getIncrement() + chord->getWeight());
+ }
+}
+
+// Updates the state when an edge has been split
+void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge,
+ BasicBlock* newBlock) {
+ BallLarusNode* oldTarget = formerEdge->getTarget();
+ BallLarusNode* newNode = addNode(newBlock);
+ formerEdge->setTarget(newNode);
+ newNode->addPredEdge(formerEdge);
+
+ DEBUG(dbgs() << " Edge split: " << *formerEdge << "\n");
+
+ oldTarget->removePredEdge(formerEdge);
+ BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0);
+
+ if( formerEdge->getType() == BallLarusEdge::BACKEDGE ||
+ formerEdge->getType() == BallLarusEdge::SPLITEDGE) {
+ newEdge->setType(formerEdge->getType());
+ newEdge->setPhonyRoot(formerEdge->getPhonyRoot());
+ newEdge->setPhonyExit(formerEdge->getPhonyExit());
+ formerEdge->setType(BallLarusEdge::NORMAL);
+ formerEdge->setPhonyRoot(NULL);
+ formerEdge->setPhonyExit(NULL);
+ }
+}
+
+// Calculates a spanning tree of the DAG ignoring cycles. Whichever
+// edges are in the spanning tree will not be instrumented, but this
+// implementation does not try to minimize the instrumentation overhead
+// by trying to find hot edges.
+void BLInstrumentationDag::calculateSpanningTree() {
+ std::stack<BallLarusNode*> dfsStack;
+
+ for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end();
+ nodeIt != end; nodeIt++) {
+ (*nodeIt)->setColor(BallLarusNode::WHITE);
+ }
+
+ dfsStack.push(getRoot());
+ while(dfsStack.size() > 0) {
+ BallLarusNode* node = dfsStack.top();
+ dfsStack.pop();
+
+ if(node->getColor() == BallLarusNode::WHITE)
+ continue;
+
+ BallLarusNode* nextNode;
+ bool forward = true;
+ BLEdgeIterator succEnd = node->succEnd();
+
+ node->setColor(BallLarusNode::WHITE);
+ // first iterate over successors then predecessors
+ for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd();
+ edge != predEnd; edge++) {
+ if(edge == succEnd) {
+ edge = node->predBegin();
+ forward = false;
+ }
+
+ // Ignore split edges
+ if ((*edge)->getType() == BallLarusEdge::SPLITEDGE)
+ continue;
+
+ nextNode = forward? (*edge)->getTarget(): (*edge)->getSource();
+ if(nextNode->getColor() != BallLarusNode::WHITE) {
+ nextNode->setColor(BallLarusNode::WHITE);
+ makeEdgeSpanning((BLInstrumentationEdge*)(*edge));
+ }
+ }
+ }
+
+ for(BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+ edge != end; edge++) {
+ BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge);
+ // safe since createEdge is overriden
+ if(!instEdge->isInSpanningTree() && (*edge)->getType()
+ != BallLarusEdge::SPLITEDGE)
+ _chordEdges.push_back(instEdge);
+ }
+}
+
+// Pushes initialization further down in order to group the first
+// increment and initialization.
+void BLInstrumentationDag::pushInitialization() {
+ BLInstrumentationEdge* exitRootEdge =
+ (BLInstrumentationEdge*) getExitRootEdge();
+ exitRootEdge->setIsInitialization(true);
+ pushInitializationFromEdge(exitRootEdge);
+}
+
+// Pushes the path counter increments up in order to group the last path
+// number increment.
+void BLInstrumentationDag::pushCounters() {
+ BLInstrumentationEdge* exitRootEdge =
+ (BLInstrumentationEdge*) getExitRootEdge();
+ exitRootEdge->setIsCounterIncrement(true);
+ pushCountersFromEdge(exitRootEdge);
+}
+
+// Removes phony edges from the successor list of the source, and the
+// predecessor list of the target.
+void BLInstrumentationDag::unlinkPhony() {
+ BallLarusEdge* edge;
+
+ for(BLEdgeIterator next = _edges.begin(),
+ end = _edges.end(); next != end; next++) {
+ edge = (*next);
+
+ if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+ edge->getType() == BallLarusEdge::SPLITEDGE_PHONY ||
+ edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) {
+ unlinkEdge(edge);
+ }
+ }
+}
+
+// Generate a .dot graph to represent the DAG and pathNumbers
+void BLInstrumentationDag::generateDotGraph() {
+ std::string errorInfo;
+ std::string functionName = getFunction().getNameStr();
+ std::string filename = "pathdag." + functionName + ".dot";
+
+ DEBUG (dbgs() << "Writing '" << filename << "'...\n");
+ raw_fd_ostream dotFile(filename.c_str(), errorInfo);
+
+ if (!errorInfo.empty()) {
+ errs() << "Error opening '" << filename.c_str() <<"' for writing!";
+ errs() << "\n";
+ return;
+ }
+
+ dotFile << "digraph " << functionName << " {\n";
+
+ for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+ edge != end; edge++) {
+ std::string sourceName = (*edge)->getSource()->getName();
+ std::string targetName = (*edge)->getTarget()->getName();
+
+ dotFile << "\t\"" << sourceName.c_str() << "\" -> \""
+ << targetName.c_str() << "\" ";
+
+ long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+
+ switch( (*edge)->getType() ) {
+ case BallLarusEdge::NORMAL:
+ dotFile << "[label=" << inc << "] [color=black];\n";
+ break;
+
+ case BallLarusEdge::BACKEDGE:
+ dotFile << "[color=cyan];\n";
+ break;
+
+ case BallLarusEdge::BACKEDGE_PHONY:
+ dotFile << "[label=" << inc
+ << "] [color=blue];\n";
+ break;
+
+ case BallLarusEdge::SPLITEDGE:
+ dotFile << "[color=violet];\n";
+ break;
+
+ case BallLarusEdge::SPLITEDGE_PHONY:
+ dotFile << "[label=" << inc << "] [color=red];\n";
+ break;
+
+ case BallLarusEdge::CALLEDGE_PHONY:
+ dotFile << "[label=" << inc << "] [color=green];\n";
+ break;
+ }
+ }
+
+ dotFile << "}\n";
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) {
+ return( new BLInstrumentationNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source,
+ BallLarusNode* target, unsigned edgeNumber) {
+ // One can cast from BallLarusNode to BLInstrumentationNode since createNode
+ // is overriden to produce BLInstrumentationNode.
+ return( new BLInstrumentationEdge((BLInstrumentationNode*)source,
+ (BLInstrumentationNode*)target) );
+}
+
+// Sets the Value corresponding to the pathNumber register, constant,
+// or phinode. Used by the instrumentation code to remember path
+// number Values.
+Value* BLInstrumentationNode::getStartingPathNumber(){
+ return(_startingPathNumber);
+}
+
+// Sets the Value of the pathNumber. Used by the instrumentation code.
+void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
+ DEBUG(dbgs() << " SPN-" << getName() << " <-- " << (pathNumber ?
+ pathNumber->getNameStr() : "unused") << "\n");
+ _startingPathNumber = pathNumber;
+}
+
+Value* BLInstrumentationNode::getEndingPathNumber(){
+ return(_endingPathNumber);
+}
+
+void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
+ DEBUG(dbgs() << " EPN-" << getName() << " <-- "
+ << (pathNumber ? pathNumber->getNameStr() : "unused") << "\n");
+ _endingPathNumber = pathNumber;
+}
+
+// Get the PHINode Instruction for this node. Used by instrumentation
+// code.
+PHINode* BLInstrumentationNode::getPathPHI() {
+ return(_pathPHI);
+}
+
+// Set the PHINode Instruction for this node. Used by instrumentation
+// code.
+void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) {
+ _pathPHI = pathPHI;
+}
+
+// Removes the edge from the appropriate predecessor and successor
+// lists.
+void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) {
+ if(edge == getExitRootEdge())
+ DEBUG(dbgs() << " Removing exit->root edge\n");
+
+ edge->getSource()->removeSuccEdge(edge);
+ edge->getTarget()->removePredEdge(edge);
+}
+
+// Makes an edge part of the spanning tree.
+void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) {
+ edge->setIsInSpanningTree(true);
+ _treeEdges.push_back(edge);
+}
+
+// Pushes initialization and calls itself recursively.
+void BLInstrumentationDag::pushInitializationFromEdge(
+ BLInstrumentationEdge* edge) {
+ BallLarusNode* target;
+
+ target = edge->getTarget();
+ if( target->getNumberPredEdges() > 1 || target == getExit() ) {
+ return;
+ } else {
+ for(BLEdgeIterator next = target->succBegin(),
+ end = target->succEnd(); next != end; next++) {
+ BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next;
+
+ // Skip split edges
+ if (intoEdge->getType() == BallLarusEdge::SPLITEDGE)
+ continue;
+
+ intoEdge->setIncrement(intoEdge->getIncrement() +
+ edge->getIncrement());
+ intoEdge->setIsInitialization(true);
+ pushInitializationFromEdge(intoEdge);
+ }
+
+ edge->setIncrement(0);
+ edge->setIsInitialization(false);
+ }
+}
+
+// Pushes path counter increments up recursively.
+void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) {
+ BallLarusNode* source;
+
+ source = edge->getSource();
+ if(source->getNumberSuccEdges() > 1 || source == getRoot()
+ || edge->isInitialization()) {
+ return;
+ } else {
+ for(BLEdgeIterator previous = source->predBegin(),
+ end = source->predEnd(); previous != end; previous++) {
+ BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous;
+
+ // Skip split edges
+ if (fromEdge->getType() == BallLarusEdge::SPLITEDGE)
+ continue;
+
+ fromEdge->setIncrement(fromEdge->getIncrement() +
+ edge->getIncrement());
+ fromEdge->setIsCounterIncrement(true);
+ pushCountersFromEdge(fromEdge);
+ }
+
+ edge->setIncrement(0);
+ edge->setIsCounterIncrement(false);
+ }
+}
+
+// Depth first algorithm for determining the chord increments.
+void BLInstrumentationDag::calculateChordIncrementsDfs(long weight,
+ BallLarusNode* v, BallLarusEdge* e) {
+ BLInstrumentationEdge* f;
+
+ for(BLEdgeIterator treeEdge = _treeEdges.begin(),
+ end = _treeEdges.end(); treeEdge != end; treeEdge++) {
+ f = (BLInstrumentationEdge*) *treeEdge;
+ if(e != f && v == f->getTarget()) {
+ calculateChordIncrementsDfs(
+ calculateChordIncrementsDir(e,f)*(weight) +
+ f->getWeight(), f->getSource(), f);
+ }
+ if(e != f && v == f->getSource()) {
+ calculateChordIncrementsDfs(
+ calculateChordIncrementsDir(e,f)*(weight) +
+ f->getWeight(), f->getTarget(), f);
+ }
+ }
+
+ for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+ end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+ f = (BLInstrumentationEdge*) *chordEdge;
+ if(v == f->getSource() || v == f->getTarget()) {
+ f->setIncrement(f->getIncrement() +
+ calculateChordIncrementsDir(e,f)*weight);
+ }
+ }
+}
+
+// Determines the relative direction of two edges.
+int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e,
+ BallLarusEdge* f) {
+ if( e == NULL)
+ return(1);
+ else if(e->getSource() == f->getTarget()
+ || e->getTarget() == f->getSource())
+ return(1);
+
+ return(-1);
+}
+
+// Creates an increment constant representing incr.
+ConstantInt* PathProfiler::createIncrementConstant(long incr,
+ int bitsize) {
+ return(ConstantInt::get(IntegerType::get(*Context, 32), incr));
+}
+
+// Creates an increment constant representing the value in
+// edge->getIncrement().
+ConstantInt* PathProfiler::createIncrementConstant(
+ BLInstrumentationEdge* edge) {
+ return(createIncrementConstant(edge->getIncrement(), 32));
+}
+
+// Finds the insertion point after pathNumber in block. PathNumber may
+// be NULL.
+BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
+ pathNumber) {
+ if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
+ || (((Instruction*)(pathNumber))->getParent()) != block) {
+ return(block->getFirstNonPHI());
+ } else {
+ Instruction* pathNumberInst = (Instruction*) (pathNumber);
+ BasicBlock::iterator insertPoint;
+ BasicBlock::iterator end = block->end();
+
+ for(insertPoint = block->begin();
+ insertPoint != end; insertPoint++) {
+ Instruction* insertInst = &(*insertPoint);
+
+ if(insertInst == pathNumberInst)
+ return(++insertPoint);
+ }
+
+ return(insertPoint);
+ }
+}
+
+// A PHINode is created in the node, and its values initialized to -1U.
+void PathProfiler::preparePHI(BLInstrumentationNode* node) {
+ BasicBlock* block = node->getBlock();
+ BasicBlock::iterator insertPoint = block->getFirstNonPHI();
+ PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), "pathNumber",
+ insertPoint );
+ node->setPathPHI(phi);
+ node->setStartingPathNumber(phi);
+ node->setEndingPathNumber(phi);
+
+ for(pred_iterator predIt = pred_begin(node->getBlock()),
+ end = pred_end(node->getBlock()); predIt != end; predIt++) {
+ BasicBlock* pred = (*predIt);
+
+ if(pred != NULL)
+ phi->addIncoming(createIncrementConstant((long)-1, 32), pred);
+ }
+}
+
+// Inserts source's pathNumber Value* into target. Target may or may not
+// have multiple predecessors, and may or may not have its phiNode
+// initalized.
+void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source,
+ BLInstrumentationNode* target) {
+ if(target->getBlock() == NULL)
+ return;
+
+
+ if(target->getNumberPredEdges() <= 1) {
+ assert(target->getStartingPathNumber() == NULL &&
+ "Target already has path number");
+ target->setStartingPathNumber(source->getEndingPathNumber());
+ target->setEndingPathNumber(source->getEndingPathNumber());
+ DEBUG(dbgs() << " Passing path number"
+ << (source->getEndingPathNumber() ? "" : " (null)")
+ << " value through.\n");
+ } else {
+ if(target->getPathPHI() == NULL) {
+ DEBUG(dbgs() << " Initializing PHI node for block '"
+ << target->getName() << "'\n");
+ preparePHI(target);
+ }
+ pushValueIntoPHI(target, source);
+ DEBUG(dbgs() << " Passing number value into PHI for block '"
+ << target->getName() << "'\n");
+ }
+}
+
+// Inserts source's pathNumber Value* into the appropriate slot of
+// target's phiNode.
+void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target,
+ BLInstrumentationNode* source) {
+ PHINode* phi = target->getPathPHI();
+ assert(phi != NULL && " Tried to push value into node with PHI, but node"
+ " actually had no PHI.");
+ phi->removeIncomingValue(source->getBlock(), false);
+ phi->addIncoming(source->getEndingPathNumber(), source->getBlock());
+}
+
+// The Value* in node, oldVal, is updated with a Value* correspodning to
+// oldVal + addition.
+void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
+ Value* addition, bool atBeginning) {
+ BasicBlock* block = node->getBlock();
+ assert(node->getStartingPathNumber() != NULL);
+ assert(node->getEndingPathNumber() != NULL);
+
+ BasicBlock::iterator insertPoint;
+
+ if( atBeginning )
+ insertPoint = block->getFirstNonPHI();
+ else
+ insertPoint = block->getTerminator();
+
+ DEBUG(errs() << " Creating addition instruction.\n");
+ Value* newpn = BinaryOperator::Create(Instruction::Add,
+ node->getStartingPathNumber(),
+ addition, "pathNumber", insertPoint);
+
+ node->setEndingPathNumber(newpn);
+
+ if( atBeginning )
+ node->setStartingPathNumber(newpn);
+}
+
+// Creates a counter increment in the given node. The Value* in node is
+// taken as the index into an array or hash table. The hash table access
+// is a call to the runtime.
+void PathProfiler::insertCounterIncrement(Value* incValue,
+ BasicBlock::iterator insertPoint,
+ BLInstrumentationDag* dag,
+ bool increment) {
+ // Counter increment for array
+ if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) {
+ // Get pointer to the array location
+ std::vector<Value*> gepIndices(2);
+ gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
+ gepIndices[1] = incValue;
+
+ GetElementPtrInst* pcPointer =
+ GetElementPtrInst::Create(dag->getCounterArray(),
+ gepIndices.begin(), gepIndices.end(),
+ "counterInc", insertPoint);
+
+ // Load from the array - call it oldPC
+ LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint);
+
+ // Test to see whether adding 1 will overflow the counter
+ ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc,
+ createIncrementConstant(0xffffffff, 32),
+ "isMax");
+
+ // Select increment for the path counter based on overflow
+ SelectInst* inc =
+ SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32),
+ createIncrementConstant(0,32),
+ "pathInc", insertPoint);
+
+ // newPc = oldPc + inc
+ BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add,
+ oldPc, inc, "newPC",
+ insertPoint);
+
+ // Store back in to the array
+ new StoreInst(newPc, pcPointer, insertPoint);
+ } else { // Counter increment for hash
+ std::vector<Value*> args(2);
+ args[0] = ConstantInt::get(Type::getInt32Ty(*Context),
+ currentFunctionNumber);
+ args[1] = incValue;
+
+ CallInst::Create(
+ increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
+ args.begin(), args.end(), "", insertPoint);
+ }
+}
+
+// Inserts instrumentation for the given edge
+//
+// Pre: The edge's source node has pathNumber set if edge is non zero
+// path number increment.
+//
+// Post: Edge's target node has a pathNumber set to the path number Value
+// corresponding to the value of the path register after edge's
+// execution.
+//
+// FIXME: This should be reworked so it's not recursive.
+void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
+ BLInstrumentationDag* dag) {
+ // Mark the edge as instrumented
+ edge->setHasInstrumentation(true);
+ DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n");
+
+ // create a new node for this edge's instrumentation
+ splitCritical(edge, dag);
+
+ BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource();
+ BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget();
+ BLInstrumentationNode* instrumentNode;
+ BLInstrumentationNode* nextSourceNode;
+
+ bool atBeginning = false;
+
+ // Source node has only 1 successor so any information can be simply
+ // inserted in to it without splitting
+ if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) {
+ DEBUG(dbgs() << " Potential instructions to be placed in: "
+ << sourceNode->getName() << " (at end)\n");
+ instrumentNode = sourceNode;
+ nextSourceNode = targetNode; // ... since we never made any new nodes
+ }
+
+ // The target node only has one predecessor, so we can safely insert edge
+ // instrumentation into it. If there was splitting, it must have been
+ // successful.
+ else if( targetNode->getNumberPredEdges() == 1 ) {
+ DEBUG(dbgs() << " Potential instructions to be placed in: "
+ << targetNode->getName() << " (at beginning)\n");
+ pushValueIntoNode(sourceNode, targetNode);
+ instrumentNode = targetNode;
+ nextSourceNode = NULL; // ... otherwise we'll just keep splitting
+ atBeginning = true;
+ }
+
+ // Somehow, splitting must have failed.
+ else {
+ errs() << "Instrumenting could not split a critical edge.\n";
+ DEBUG(dbgs() << " Couldn't split edge " << (*edge) << ".\n");
+ return;
+ }
+
+ // Insert instrumentation if this is a back or split edge
+ if( edge->getType() == BallLarusEdge::BACKEDGE ||
+ edge->getType() == BallLarusEdge::SPLITEDGE ) {
+ BLInstrumentationEdge* top =
+ (BLInstrumentationEdge*) edge->getPhonyRoot();
+ BLInstrumentationEdge* bottom =
+ (BLInstrumentationEdge*) edge->getPhonyExit();
+
+ assert( top->isInitialization() && " Top phony edge did not"
+ " contain a path number initialization.");
+ assert( bottom->isCounterIncrement() && " Bottom phony edge"
+ " did not contain a path counter increment.");
+
+ // split edge has yet to be initialized
+ if( !instrumentNode->getEndingPathNumber() ) {
+ instrumentNode->setStartingPathNumber(createIncrementConstant(0,32));
+ instrumentNode->setEndingPathNumber(createIncrementConstant(0,32));
+ }
+
+ BasicBlock::iterator insertPoint = atBeginning ?
+ instrumentNode->getBlock()->getFirstNonPHI() :
+ instrumentNode->getBlock()->getTerminator();
+
+ // add information from the bottom edge, if it exists
+ if( bottom->getIncrement() ) {
+ Value* newpn =
+ BinaryOperator::Create(Instruction::Add,
+ instrumentNode->getStartingPathNumber(),
+ createIncrementConstant(bottom),
+ "pathNumber", insertPoint);
+ instrumentNode->setEndingPathNumber(newpn);
+ }
+
+ insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+ insertPoint, dag);
+
+ if( atBeginning )
+ instrumentNode->setStartingPathNumber(createIncrementConstant(top));
+
+ instrumentNode->setEndingPathNumber(createIncrementConstant(top));
+
+ // Check for path counter increments
+ if( top->isCounterIncrement() ) {
+ insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+ instrumentNode->getBlock()->getTerminator(),dag);
+ instrumentNode->setEndingPathNumber(0);
+ }
+ }
+
+ // Insert instrumentation if this is a normal edge
+ else {
+ BasicBlock::iterator insertPoint = atBeginning ?
+ instrumentNode->getBlock()->getFirstNonPHI() :
+ instrumentNode->getBlock()->getTerminator();
+
+ if( edge->isInitialization() ) { // initialize path number
+ instrumentNode->setEndingPathNumber(createIncrementConstant(edge));
+ } else if( edge->getIncrement() ) {// increment path number
+ Value* newpn =
+ BinaryOperator::Create(Instruction::Add,
+ instrumentNode->getStartingPathNumber(),
+ createIncrementConstant(edge),
+ "pathNumber", insertPoint);
+ instrumentNode->setEndingPathNumber(newpn);
+
+ if( atBeginning )
+ instrumentNode->setStartingPathNumber(newpn);
+ }
+
+ // Check for path counter increments
+ if( edge->isCounterIncrement() ) {
+ insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+ insertPoint, dag);
+ instrumentNode->setEndingPathNumber(0);
+ }
+ }
+
+ // Push it along
+ if (nextSourceNode && instrumentNode->getEndingPathNumber())
+ pushValueIntoNode(instrumentNode, nextSourceNode);
+
+ // Add all the successors
+ for( BLEdgeIterator next = targetNode->succBegin(),
+ end = targetNode->succEnd(); next != end; next++ ) {
+ // So long as it is un-instrumented, add it to the list
+ if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() )
+ insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag);
+ else
+ DEBUG(dbgs() << " Edge " << *(BLInstrumentationEdge*)(*next)
+ << " already instrumented.\n");
+ }
+}
+
+// Inserts instrumentation according to the marked edges in dag. Phony edges
+// must be unlinked from the DAG, but accessible from the backedges. Dag
+// must have initializations, path number increments, and counter increments
+// present.
+//
+// Counter storage is created here.
+void PathProfiler::insertInstrumentation(
+ BLInstrumentationDag& dag, Module &M) {
+
+ BLInstrumentationEdge* exitRootEdge =
+ (BLInstrumentationEdge*) dag.getExitRootEdge();
+ insertInstrumentationStartingAt(exitRootEdge, &dag);
+
+ // Iterate through each call edge and apply the appropriate hash increment
+ // and decrement functions
+ BLEdgeVector callEdges = dag.getCallPhonyEdges();
+ for( BLEdgeIterator edge = callEdges.begin(),
+ end = callEdges.end(); edge != end; edge++ ) {
+ BLInstrumentationNode* node =
+ (BLInstrumentationNode*)(*edge)->getSource();
+ BasicBlock::iterator insertPoint = node->getBlock()->getFirstNonPHI();
+
+ // Find the first function call
+ while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
+ insertPoint++;
+
+ DEBUG(dbgs() << "\nInstrumenting method call block '"
+ << node->getBlock()->getNameStr() << "'\n");
+ DEBUG(dbgs() << " Path number initialized: "
+ << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
+
+ Value* newpn;
+ if( node->getStartingPathNumber() ) {
+ long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+ if ( inc )
+ newpn = BinaryOperator::Create(Instruction::Add,
+ node->getStartingPathNumber(),
+ createIncrementConstant(inc,32),
+ "pathNumber", insertPoint);
+ else
+ newpn = node->getStartingPathNumber();
+ } else {
+ newpn = (Value*)createIncrementConstant(
+ ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32);
+ }
+
+ insertCounterIncrement(newpn, insertPoint, &dag);
+ insertCounterIncrement(newpn, node->getBlock()->getTerminator(),
+ &dag, false);
+ }
+}
+
+// Entry point of the module
+void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
+ Function &F, Module &M) {
+ // Build DAG from CFG
+ BLInstrumentationDag dag = BLInstrumentationDag(F);
+ dag.init();
+
+ // give each path a unique integer value
+ dag.calculatePathNumbers();
+
+ // modify path increments to increase the efficiency
+ // of instrumentation
+ dag.calculateSpanningTree();
+ dag.calculateChordIncrements();
+ dag.pushInitialization();
+ dag.pushCounters();
+ dag.unlinkPhony();
+
+ // potentially generate .dot graph for the dag
+ if (DotPathDag)
+ dag.generateDotGraph ();
+
+ // Should we store the information in an array or hash
+ if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) {
+ const Type* t = ArrayType::get(Type::getInt32Ty(*Context),
+ dag.getNumberOfPaths());
+
+ dag.setCounterArray(new GlobalVariable(M, t, false,
+ GlobalValue::InternalLinkage,
+ Constant::getNullValue(t), ""));
+ }
+
+ insertInstrumentation(dag, M);
+
+ // Add to global function reference table
+ unsigned type;
+ const Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
+
+ if( dag.getNumberOfPaths() <= HASH_THRESHHOLD )
+ type = ProfilingArray;
+ else
+ type = ProfilingHash;
+
+ std::vector<Constant*> entryArray(3);
+ entryArray[0] = createIncrementConstant(type,32);
+ entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32);
+ entryArray[2] = dag.getCounterArray() ?
+ ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) :
+ Constant::getNullValue(voidPtr);
+
+ const StructType* at = ftEntryTypeBuilder::get(*Context);
+ ConstantStruct* functionEntry =
+ (ConstantStruct*)ConstantStruct::get(at, entryArray);
+ ftInit.push_back(functionEntry);
+}
+
+// Output the bitcode if we want to observe instrumentation changess
+#define PRINT_MODULE dbgs() << \
+ "\n\n============= MODULE BEGIN ===============\n" << M << \
+ "\n============== MODULE END ================\n"
+
+bool PathProfiler::runOnModule(Module &M) {
+ Context = &M.getContext();
+
+ DEBUG(dbgs()
+ << "****************************************\n"
+ << "****************************************\n"
+ << "** **\n"
+ << "** PATH PROFILING INSTRUMENTATION **\n"
+ << "** **\n"
+ << "****************************************\n"
+ << "****************************************\n");
+
+ // No main, no instrumentation!
+ Function *Main = M.getFunction("main");
+
+ // Using fortran? ... this kind of works
+ if (!Main)
+ Main = M.getFunction("MAIN__");
+
+ if (!Main) {
+ errs() << "WARNING: cannot insert path profiling into a module"
+ << " with no main function!\n";
+ return false;
+ }
+
+ BasicBlock::iterator insertPoint = Main->getEntryBlock().getFirstNonPHI();
+
+ llvmIncrementHashFunction = M.getOrInsertFunction(
+ "llvm_increment_path_count",
+ Type::getVoidTy(*Context), // return type
+ Type::getInt32Ty(*Context), // function number
+ Type::getInt32Ty(*Context), // path number
+ NULL );
+
+ llvmDecrementHashFunction = M.getOrInsertFunction(
+ "llvm_decrement_path_count",
+ Type::getVoidTy(*Context), // return type
+ Type::getInt32Ty(*Context), // function number
+ Type::getInt32Ty(*Context), // path number
+ NULL );
+
+ std::vector<Constant*> ftInit;
+ unsigned functionNumber = 0;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+ if (F->isDeclaration())
+ continue;
+
+ DEBUG(dbgs() << "Function: " << F->getNameStr() << "\n");
+ functionNumber++;
+
+ // set function number
+ currentFunctionNumber = functionNumber;
+ runOnFunction(ftInit, *F, M);
+ }
+
+ const Type *t = ftEntryTypeBuilder::get(*Context);
+ const ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
+ Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit);
+
+ DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n");
+
+ GlobalVariable* functionTable =
+ new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage,
+ ftInitConstant, "functionPathTable");
+ const Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
+ InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable,
+ PointerType::getUnqual(eltType));
+
+ DEBUG(PRINT_MODULE);
+
+ return true;
+}
+
+// If this edge is a critical edge, then inserts a node at this edge.
+// This edge becomes the first edge, and a new BallLarusEdge is created.
+// Returns true if the edge was split
+bool PathProfiler::splitCritical(BLInstrumentationEdge* edge,
+ BLInstrumentationDag* dag) {
+ unsigned succNum = edge->getSuccessorNumber();
+ BallLarusNode* sourceNode = edge->getSource();
+ BallLarusNode* targetNode = edge->getTarget();
+ BasicBlock* sourceBlock = sourceNode->getBlock();
+ BasicBlock* targetBlock = targetNode->getBlock();
+
+ if(sourceBlock == NULL || targetBlock == NULL
+ || sourceNode->getNumberSuccEdges() <= 1
+ || targetNode->getNumberPredEdges() == 1 ) {
+ return(false);
+ }
+
+ TerminatorInst* terminator = sourceBlock->getTerminator();
+
+ if( SplitCriticalEdge(terminator, succNum, this, false)) {
+ BasicBlock* newBlock = terminator->getSuccessor(succNum);
+ dag->splitUpdate(edge, newBlock);
+ return(true);
+ } else
+ return(false);
+}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 1a30e9b..b57bbf6 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -22,12 +22,13 @@
#include "llvm/Module.h"
void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
- GlobalValue *Array) {
+ GlobalValue *Array,
+ PointerType *arrayType) {
LLVMContext &Context = MainFn->getContext();
- const Type *ArgVTy =
+ const Type *ArgVTy =
PointerType::getUnqual(Type::getInt8PtrTy(Context));
- const PointerType *UIntPtr =
- Type::getInt32PtrTy(Context);
+ const PointerType *UIntPtr = arrayType ? arrayType :
+ Type::getInt32PtrTy(Context);
Module &M = *MainFn->getParent();
Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
Type::getInt32Ty(Context),
@@ -71,9 +72,9 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
case 2:
AI = MainFn->arg_begin(); ++AI;
if (AI->getType() != ArgVTy) {
- Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
+ Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
false);
- InitCall->setArgOperand(1,
+ InitCall->setArgOperand(1,
CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
} else {
InitCall->setArgOperand(1, AI);
@@ -93,7 +94,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
}
opcode = CastInst::getCastOpcode(AI, true,
Type::getInt32Ty(Context), true);
- InitCall->setArgOperand(0,
+ InitCall->setArgOperand(0,
CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
"argc.cast", InitCall));
} else {
@@ -106,9 +107,10 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
}
void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
- GlobalValue *CounterArray) {
+ GlobalValue *CounterArray, bool beginning) {
// Insert the increment after any alloca or PHI instructions...
- BasicBlock::iterator InsertPos = BB->getFirstNonPHI();
+ BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() :
+ BB->getTerminator();
while (isa<AllocaInst>(InsertPos))
++InsertPos;
@@ -118,7 +120,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
std::vector<Constant*> Indices(2);
Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
- Constant *ElementPtr =
+ Constant *ElementPtr =
ConstantExpr::getGetElementPtr(CounterArray, &Indices[0],
Indices.size());
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
index 94efffe..a76e357 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h
@@ -21,11 +21,14 @@ namespace llvm {
class Function;
class GlobalValue;
class BasicBlock;
+ class PointerType;
void InsertProfilingInitCall(Function *MainFn, const char *FnName,
- GlobalValue *Arr = 0);
+ GlobalValue *Arr = 0,
+ PointerType *arrayType = 0);
void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
- GlobalValue *CounterArray);
+ GlobalValue *CounterArray,
+ bool beginning = true);
}
#endif
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index ada086e..a5adb5e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -33,7 +33,9 @@ STATISTIC(NumRemoved, "Number of instructions removed");
namespace {
struct ADCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- ADCE() : FunctionPass(ID) {}
+ ADCE() : FunctionPass(ID) {
+ initializeADCEPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function& F);
@@ -45,7 +47,7 @@ namespace {
}
char ADCE::ID = 0;
-INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false);
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
bool ADCE::runOnFunction(Function& F) {
SmallPtrSet<Instruction*, 128> alive;
diff --git a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
index b144678..cee5502 100644
--- a/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -41,7 +41,9 @@ STATISTIC(NumMoved, "Number of basic blocks moved");
namespace {
struct BlockPlacement : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- BlockPlacement() : FunctionPass(ID) {}
+ BlockPlacement() : FunctionPass(ID) {
+ initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -74,8 +76,11 @@ namespace {
}
char BlockPlacement::ID = 0;
-INITIALIZE_PASS(BlockPlacement, "block-placement",
- "Profile Guided Basic Block Placement", false, false);
+INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false)
FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
index e07b761..9536939 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -22,6 +22,8 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
@@ -31,6 +33,7 @@
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
@@ -39,31 +42,59 @@
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
using namespace llvm;
using namespace llvm::PatternMatch;
+STATISTIC(NumBlocksElim, "Number of blocks eliminated");
+STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
+ "sunken Cmps");
+STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
+ "of sunken Casts");
+STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
+ "computations were sunk");
+STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+
static cl::opt<bool>
CriticalEdgeSplit("cgp-critical-edge-splitting",
cl::desc("Split critical edges during codegen prepare"),
- cl::init(true), cl::Hidden);
+ cl::init(false), cl::Hidden);
namespace {
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
+ DominatorTree *DT;
ProfileInfo *PFI;
+
+ /// CurInstIterator - As we scan instructions optimizing them, this is the
+ /// next instruction to optimize. Xforms that can invalidate this should
+ /// update it.
+ BasicBlock::iterator CurInstIterator;
/// BackEdges - Keep a set of all the loop back edges.
///
SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges;
+
+ // Keeps track of non-local addresses that have been sunk into a block. This
+ // allows us to avoid inserting duplicate code for blocks with multiple
+ // load/stores of the same address.
+ DenseMap<Value*, Value*> SunkAddrs;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
- : FunctionPass(ID), TLI(tli) {}
+ : FunctionPass(ID), TLI(tli) {
+ initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
AU.addPreserved<ProfileInfo>();
}
@@ -76,10 +107,9 @@ namespace {
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
bool OptimizeBlock(BasicBlock &BB);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy,
- DenseMap<Value*,Value*> &SunkAddrs);
- bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
- DenseMap<Value*,Value*> &SunkAddrs);
+ bool OptimizeInst(Instruction *I);
+ bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy);
+ bool OptimizeInlineAsmInst(CallInst *CS);
bool OptimizeCallInst(CallInst *CI);
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
@@ -89,7 +119,7 @@ namespace {
char CodeGenPrepare::ID = 0;
INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false);
+ "Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
return new CodeGenPrepare(TLI);
@@ -108,13 +138,16 @@ void CodeGenPrepare::findLoopBackEdges(const Function &F) {
bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
+ DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
// First pass, eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
- // Now find loop back edges.
- findLoopBackEdges(F);
+ // Now find loop back edges, but only if they are being used to decide which
+ // critical edges to split.
+ if (CriticalEdgeSplit)
+ findLoopBackEdges(F);
bool MadeChange = true;
while (MadeChange) {
@@ -123,6 +156,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= OptimizeBlock(*BB);
EverMadeChange |= MadeChange;
}
+
+ SunkAddrs.clear();
+
return EverMadeChange;
}
@@ -297,11 +333,19 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
BB->replaceAllUsesWith(DestBB);
+ if (DT) {
+ BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
+ BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
+ BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
+ DT->changeImmediateDominator(DestBB, NewIDom);
+ DT->eraseNode(BB);
+ }
if (PFI) {
PFI->replaceAllUses(BB, DestBB);
PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
}
BB->eraseFromParent();
+ ++NumBlocksElim;
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
@@ -480,6 +524,7 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
// Replace a use of the cast with a use of the new cast.
TheUse = InsertedCast;
+ ++NumCastUses;
}
// If we removed all uses, nuke the cast.
@@ -537,6 +582,7 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
// Replace a use of the cmp with a use of the new cmp.
TheUse = InsertedCmp;
+ ++NumCmpUses;
}
// If we removed all uses, nuke the cmp.
@@ -563,14 +609,45 @@ protected:
} // end anonymous namespace
bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+ BasicBlock *BB = CI->getParent();
+
+ // Lower inline assembly if we can.
+ // If we found an inline asm expession, and if the target knows how to
+ // lower it to normal LLVM code, do so now.
+ if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+ if (TLI->ExpandInlineAsm(CI)) {
+ // Avoid invalidating the iterator.
+ CurInstIterator = BB->begin();
+ // Avoid processing instructions out of order, which could cause
+ // reuse before a value is defined.
+ SunkAddrs.clear();
+ return true;
+ }
+ // Sink address computing for memory operands into the block.
+ if (OptimizeInlineAsmInst(CI))
+ return true;
+ }
+
// Lower all uses of llvm.objectsize.*
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
const Type *ReturnTy = CI->getType();
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
- CI->replaceAllUsesWith(RetVal);
- CI->eraseFromParent();
+
+ // Substituting this can cause recursive simplifications, which can
+ // invalidate our iterator. Use a WeakVH to hold onto it in case this
+ // happens.
+ WeakVH IterHandle(CurInstIterator);
+
+ ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT);
+
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ if (IterHandle != CurInstIterator) {
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ }
return true;
}
@@ -588,6 +665,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
CodeGenPrepareFortifiedLibCalls Simplifier;
return Simplifier.fold(CI, TD);
}
+
//===----------------------------------------------------------------------===//
// Memory Optimization
//===----------------------------------------------------------------------===//
@@ -610,13 +688,69 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
- const Type *AccessTy,
- DenseMap<Value*,Value*> &SunkAddrs) {
- // Figure out what addressing mode will be built up for this operation.
+ const Type *AccessTy) {
+ Value *Repl = Addr;
+
+ // Try to collapse single-value PHI nodes. This is necessary to undo
+ // unprofitable PRE transformations.
+ SmallVector<Value*, 8> worklist;
+ SmallPtrSet<Value*, 16> Visited;
+ worklist.push_back(Addr);
+
+ // Use a worklist to iteratively look through PHI nodes, and ensure that
+ // the addressing mode obtained from the non-PHI roots of the graph
+ // are equivalent.
+ Value *Consensus = 0;
+ unsigned NumUses = 0;
SmallVector<Instruction*, 16> AddrModeInsts;
- ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst,
- AddrModeInsts, *TLI);
-
+ ExtAddrMode AddrMode;
+ while (!worklist.empty()) {
+ Value *V = worklist.back();
+ worklist.pop_back();
+
+ // Break use-def graph loops.
+ if (Visited.count(V)) {
+ Consensus = 0;
+ break;
+ }
+
+ Visited.insert(V);
+
+ // For a PHI node, push all of its incoming values.
+ if (PHINode *P = dyn_cast<PHINode>(V)) {
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
+ worklist.push_back(P->getIncomingValue(i));
+ continue;
+ }
+
+ // For non-PHIs, determine the addressing mode being computed.
+ SmallVector<Instruction*, 16> NewAddrModeInsts;
+ ExtAddrMode NewAddrMode =
+ AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
+ NewAddrModeInsts, *TLI);
+
+ // Ensure that the obtained addressing mode is equivalent to that obtained
+ // for all other roots of the PHI traversal. Also, when choosing one
+ // such root as representative, select the one with the most uses in order
+ // to keep the cost modeling heuristics in AddressingModeMatcher applicable.
+ if (!Consensus || NewAddrMode == AddrMode) {
+ if (V->getNumUses() > NumUses) {
+ Consensus = V;
+ NumUses = V->getNumUses();
+ AddrMode = NewAddrMode;
+ AddrModeInsts = NewAddrModeInsts;
+ }
+ continue;
+ }
+
+ Consensus = 0;
+ break;
+ }
+
+ // If the addressing mode couldn't be determined, or if multiple different
+ // ones were determined, bail out now.
+ if (!Consensus) return false;
+
// Check to see if any of the instructions supersumed by this addr mode are
// non-local to I's BB.
bool AnyNonLocal = false;
@@ -719,60 +853,39 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
}
- MemoryInst->replaceUsesOfWith(Addr, SunkAddr);
+ MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
- if (Addr->use_empty()) {
- RecursivelyDeleteTriviallyDeadInstructions(Addr);
+ if (Repl->use_empty()) {
+ RecursivelyDeleteTriviallyDeadInstructions(Repl);
// This address is now available for reassignment, so erase the table entry;
// we don't want to match some completely different instruction.
SunkAddrs[Addr] = 0;
}
+ ++NumMemoryInsts;
return true;
}
/// OptimizeInlineAsmInst - If there are any memory operands, use
/// OptimizeMemoryInst to sink their address computing into the block when
/// possible / profitable.
-bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
- DenseMap<Value*,Value*> &SunkAddrs) {
+bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
- InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
-
- // Do a prepass over the constraints, canonicalizing them, and building up the
- // ConstraintOperands list.
- std::vector<InlineAsm::ConstraintInfo>
- ConstraintInfos = IA->ParseConstraints();
-
- /// ConstraintOperands - Information about all of the constraints.
- std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
- unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
- for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
- ConstraintOperands.
- push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
- TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
-
- // Compute the value type for each operand.
- switch (OpInfo.Type) {
- case InlineAsm::isOutput:
- if (OpInfo.isIndirect)
- OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
- break;
- case InlineAsm::isInput:
- OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
- break;
- case InlineAsm::isClobber:
- // Nothing to do.
- break;
- }
+ TargetLowering::AsmOperandInfoVector
+ TargetConstraints = TLI->ParseConstraints(CS);
+ unsigned ArgNo = 0;
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue());
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
- Value *OpVal = OpInfo.CallOperandVal;
- MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
- }
+ Value *OpVal = CS->getArgOperand(ArgNo++);
+ MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
+ } else if (OpInfo.Type == InlineAsm::isInput)
+ ArgNo++;
}
return MadeChange;
@@ -794,7 +907,9 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
// If the load has other users and the truncate is not free, this probably
// isn't worthwhile.
if (!LI->hasOneUse() &&
- TLI && !TLI->isTruncateFree(I->getType(), LI->getType()))
+ TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
+ !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
+ !TLI->isTruncateFree(I->getType(), LI->getType()))
return false;
// Check whether the target supports casts folded into loads.
@@ -812,13 +927,14 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
// can fold it.
I->removeFromParent();
I->insertAfter(LI);
+ ++NumExtsMoved;
return true;
}
bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
BasicBlock *DefBB = I->getParent();
- // If both result of the {s|z}xt and its source are live out, rewrite all
+ // If the result of a {s|z}ext and its source are both live out, rewrite all
// other uses of the source with result of extension.
Value *Src = I->getOperand(0);
if (Src->hasOneUse())
@@ -883,13 +999,83 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
// Replace a use of the {s|z}ext source with a use of the result.
TheUse = InsertedTrunc;
-
+ ++NumExtUses;
MadeChange = true;
}
return MadeChange;
}
+bool CodeGenPrepare::OptimizeInst(Instruction *I) {
+ if (PHINode *P = dyn_cast<PHINode>(I)) {
+ // It is possible for very late stage optimizations (such as SimplifyCFG)
+ // to introduce PHI nodes too late to be cleaned up. If we detect such a
+ // trivial PHI, go ahead and zap it here.
+ if (Value *V = SimplifyInstruction(P)) {
+ P->replaceAllUsesWith(V);
+ P->eraseFromParent();
+ ++NumPHIsElim;
+ return true;
+ }
+ return false;
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // If the source of the cast is a constant, then this should have
+ // already been constant folded. The only reason NOT to constant fold
+ // it is if something (e.g. LSR) was careful to place the constant
+ // evaluation in a block other than then one that uses it (e.g. to hoist
+ // the address of globals out of a loop). If this is the case, we don't
+ // want to forward-subst the cast.
+ if (isa<Constant>(CI->getOperand(0)))
+ return false;
+
+ if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
+ return true;
+
+ if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+ bool MadeChange = MoveExtToFormExtLoad(I);
+ return MadeChange | OptimizeExtUses(I);
+ }
+ return false;
+ }
+
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ return OptimizeCmpExpression(CI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (TLI)
+ return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+ return false;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (TLI)
+ return OptimizeMemoryInst(I, SI->getOperand(1),
+ SI->getOperand(0)->getType());
+ return false;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ if (GEPI->hasAllZeroIndices()) {
+ /// The GEP operand must be a pointer, so must its result -> BitCast
+ Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NC);
+ GEPI->eraseFromParent();
+ ++NumGEPsElim;
+ OptimizeInst(NC);
+ return true;
+ }
+ return false;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ return OptimizeCallInst(CI);
+
+ return false;
+}
+
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
@@ -908,74 +1094,11 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
}
}
- // Keep track of non-local addresses that have been sunk into this block.
- // This allows us to avoid inserting duplicate code for blocks with multiple
- // load/stores of the same address.
- DenseMap<Value*, Value*> SunkAddrs;
-
- for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
- Instruction *I = BBI++;
+ SunkAddrs.clear();
- if (CastInst *CI = dyn_cast<CastInst>(I)) {
- // If the source of the cast is a constant, then this should have
- // already been constant folded. The only reason NOT to constant fold
- // it is if something (e.g. LSR) was careful to place the constant
- // evaluation in a block other than then one that uses it (e.g. to hoist
- // the address of globals out of a loop). If this is the case, we don't
- // want to forward-subst the cast.
- if (isa<Constant>(CI->getOperand(0)))
- continue;
-
- bool Change = false;
- if (TLI) {
- Change = OptimizeNoopCopyExpression(CI, *TLI);
- MadeChange |= Change;
- }
-
- if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) {
- MadeChange |= MoveExtToFormExtLoad(I);
- MadeChange |= OptimizeExtUses(I);
- }
- } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
- MadeChange |= OptimizeCmpExpression(CI);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (TLI)
- MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
- SunkAddrs);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (TLI)
- MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
- SI->getOperand(0)->getType(),
- SunkAddrs);
- } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
- if (GEPI->hasAllZeroIndices()) {
- /// The GEP operand must be a pointer, so must its result -> BitCast
- Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
- GEPI->getName(), GEPI);
- GEPI->replaceAllUsesWith(NC);
- GEPI->eraseFromParent();
- MadeChange = true;
- BBI = NC;
- }
- } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
- // If we found an inline asm expession, and if the target knows how to
- // lower it to normal LLVM code, do so now.
- if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
- if (TLI->ExpandInlineAsm(CI)) {
- BBI = BB.begin();
- // Avoid processing instructions out of order, which could cause
- // reuse before a value is defined.
- SunkAddrs.clear();
- } else
- // Sink address computing for memory operands into the block.
- MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
- } else {
- // Other CallInst optimizations that don't need to muck with the
- // enclosing iterator here.
- MadeChange |= OptimizeCallInst(CI);
- }
- }
- }
+ CurInstIterator = BB.begin();
+ for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
+ MadeChange |= OptimizeInst(CurInstIterator++);
return MadeChange;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
index a0ea369..664c3f6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantProp.cpp
@@ -34,7 +34,9 @@ STATISTIC(NumInstKilled, "Number of instructions killed");
namespace {
struct ConstantPropagation : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- ConstantPropagation() : FunctionPass(ID) {}
+ ConstantPropagation() : FunctionPass(ID) {
+ initializeConstantPropagationPass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
@@ -46,7 +48,7 @@ namespace {
char ConstantPropagation::ID = 0;
INITIALIZE_PASS(ConstantPropagation, "constprop",
- "Simple constant propagation", false, false);
+ "Simple constant propagation", false, false)
FunctionPass *llvm::createConstantPropagationPass() {
return new ConstantPropagation();
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 0d4e45d..be12973 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -16,6 +16,7 @@
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -30,18 +31,20 @@ STATISTIC(NumCmps, "Number of comparisons propagated");
namespace {
class CorrelatedValuePropagation : public FunctionPass {
LazyValueInfo *LVI;
-
+
bool processSelect(SelectInst *SI);
bool processPHI(PHINode *P);
bool processMemAccess(Instruction *I);
bool processCmp(CmpInst *C);
-
+
public:
static char ID;
- CorrelatedValuePropagation(): FunctionPass(ID) { }
-
+ CorrelatedValuePropagation(): FunctionPass(ID) {
+ initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
+ }
+
bool runOnFunction(Function &F);
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LazyValueInfo>();
}
@@ -49,8 +52,11 @@ namespace {
}
char CorrelatedValuePropagation::ID = 0;
-INITIALIZE_PASS(CorrelatedValuePropagation, "correlated-propagation",
- "Value Propagation", false, false);
+INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
+ "Value Propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation",
+ "Value Propagation", false, false)
// Public interface to the Value Propagation pass
Pass *llvm::createCorrelatedValuePropagationPass() {
@@ -60,46 +66,51 @@ Pass *llvm::createCorrelatedValuePropagationPass() {
bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
if (S->getType()->isVectorTy()) return false;
if (isa<Constant>(S->getOperand(0))) return false;
-
+
Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
if (!C) return false;
-
+
ConstantInt *CI = dyn_cast<ConstantInt>(C);
if (!CI) return false;
-
- S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2));
+
+ Value *ReplaceWith = S->getOperand(1);
+ Value *Other = S->getOperand(2);
+ if (!CI->isOne()) std::swap(ReplaceWith, Other);
+ if (ReplaceWith == S) ReplaceWith = UndefValue::get(S->getType());
+
+ S->replaceAllUsesWith(ReplaceWith);
S->eraseFromParent();
++NumSelects;
-
+
return true;
}
bool CorrelatedValuePropagation::processPHI(PHINode *P) {
bool Changed = false;
-
+
BasicBlock *BB = P->getParent();
for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
Value *Incoming = P->getIncomingValue(i);
if (isa<Constant>(Incoming)) continue;
-
+
Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
P->getIncomingBlock(i),
BB);
if (!C) continue;
-
+
P->setIncomingValue(i, C);
Changed = true;
}
-
- if (Value *ConstVal = P->hasConstantValue()) {
- P->replaceAllUsesWith(ConstVal);
+
+ if (Value *V = SimplifyInstruction(P)) {
+ P->replaceAllUsesWith(V);
P->eraseFromParent();
Changed = true;
}
-
+
++NumPhis;
-
+
return Changed;
}
@@ -109,12 +120,12 @@ bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
Pointer = L->getPointerOperand();
else
Pointer = cast<StoreInst>(I)->getPointerOperand();
-
+
if (isa<Constant>(Pointer)) return false;
-
+
Constant *C = LVI->getConstant(Pointer, I->getParent());
if (!C) return false;
-
+
++NumMemAccess;
I->replaceUsesOfWith(Pointer, C);
return true;
@@ -130,32 +141,32 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
if (isa<Instruction>(Op0) &&
cast<Instruction>(Op0)->getParent() == C->getParent())
return false;
-
+
Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
if (!Op1) return false;
-
+
pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
if (PI == PE) return false;
-
- LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
+
+ LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
C->getOperand(0), Op1, *PI, C->getParent());
if (Result == LazyValueInfo::Unknown) return false;
++PI;
while (PI != PE) {
- LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
+ LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
C->getOperand(0), Op1, *PI, C->getParent());
if (Res != Result) return false;
++PI;
}
-
+
++NumCmps;
-
+
if (Result == LazyValueInfo::True)
C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
else
C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
-
+
C->eraseFromParent();
return true;
@@ -163,9 +174,9 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
bool CorrelatedValuePropagation::runOnFunction(Function &F) {
LVI = &getAnalysis<LazyValueInfo>();
-
+
bool FnChanged = false;
-
+
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
bool BBChanged = false;
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
@@ -187,14 +198,9 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
break;
}
}
-
- // Propagating correlated values might leave cruft around.
- // Try to clean it up before we continue.
- if (BBChanged)
- SimplifyInstructionsInBlock(FI);
-
+
FnChanged |= BBChanged;
}
-
+
return FnChanged;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index 87ea803..dbb68f3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -35,7 +35,9 @@ namespace {
//
struct DeadInstElimination : public BasicBlockPass {
static char ID; // Pass identification, replacement for typeid
- DeadInstElimination() : BasicBlockPass(ID) {}
+ DeadInstElimination() : BasicBlockPass(ID) {
+ initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnBasicBlock(BasicBlock &BB) {
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
@@ -57,7 +59,7 @@ namespace {
char DeadInstElimination::ID = 0;
INITIALIZE_PASS(DeadInstElimination, "die",
- "Dead Instruction Elimination", false, false);
+ "Dead Instruction Elimination", false, false)
Pass *llvm::createDeadInstEliminationPass() {
return new DeadInstElimination();
@@ -70,7 +72,9 @@ namespace {
//
struct DCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- DCE() : FunctionPass(ID) {}
+ DCE() : FunctionPass(ID) {
+ initializeDCEPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -81,7 +85,7 @@ namespace {
}
char DCE::ID = 0;
-INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false);
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
bool DCE::runOnFunction(Function &F) {
// Start out with all of the instructions in the worklist...
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c8fd9d9..867a06a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -19,17 +19,20 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumFastStores, "Number of stores deleted");
@@ -37,58 +40,107 @@ STATISTIC(NumFastOther , "Number of other instrs removed");
namespace {
struct DSE : public FunctionPass {
- TargetData *TD;
+ AliasAnalysis *AA;
+ MemoryDependenceAnalysis *MD;
static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(ID) {}
+ DSE() : FunctionPass(ID), AA(0), MD(0) {
+ initializeDSEPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F) {
- bool Changed = false;
-
+ AA = &getAnalysis<AliasAnalysis>();
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
DominatorTree &DT = getAnalysis<DominatorTree>();
+ bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
// Only check non-dead blocks. Dead blocks may have strange pointer
// cycles that will confuse alias analysis.
if (DT.isReachableFromEntry(I))
Changed |= runOnBasicBlock(*I);
+
+ AA = 0; MD = 0;
return Changed;
}
bool runOnBasicBlock(BasicBlock &BB);
- bool handleFreeWithNonTrivialDependency(const CallInst *F,
- MemDepResult Dep);
+ bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
- bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize,
- BasicBlock::iterator &BBI,
- SmallPtrSet<Value*, 64> &deadPointers);
- void DeleteDeadInstruction(Instruction *I,
- SmallPtrSet<Value*, 64> *deadPointers = 0);
-
+ void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+ SmallPtrSet<Value*, 16> &DeadStackObjects);
- // getAnalysisUsage - We require post dominance frontiers (aka Control
- // Dependence Graph)
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<DominatorTree>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
AU.addPreserved<DominatorTree>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
-
- unsigned getPointerSize(Value *V) const;
};
}
char DSE::ID = 0;
-INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false);
+INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
-/// doesClobberMemory - Does this instruction clobber (write without reading)
-/// some memory?
-static bool doesClobberMemory(Instruction *I) {
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+/// If ValueSet is non-null, remove any deleted instructions from it as well.
+///
+static void DeleteDeadInstruction(Instruction *I,
+ MemoryDependenceAnalysis &MD,
+ SmallPtrSet<Value*, 16> *ValueSet = 0) {
+ SmallVector<Instruction*, 32> NowDeadInsts;
+
+ NowDeadInsts.push_back(I);
+ --NumFastOther;
+
+ // Before we touch this instruction, remove it from memdep!
+ do {
+ Instruction *DeadInst = NowDeadInsts.pop_back_val();
+ ++NumFastOther;
+
+ // This instruction is dead, zap it, in stages. Start by removing it from
+ // MemDep, which needs to know the operands and needs it to be in the
+ // function.
+ MD.removeInstruction(DeadInst);
+
+ for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+ Value *Op = DeadInst->getOperand(op);
+ DeadInst->setOperand(op, 0);
+
+ // If this operand just became dead, add it to the NowDeadInsts list.
+ if (!Op->use_empty()) continue;
+
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ if (isInstructionTriviallyDead(OpI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+
+ if (ValueSet) ValueSet->erase(DeadInst);
+ } while (!NowDeadInsts.empty());
+}
+
+
+/// hasMemoryWrite - Does this instruction write some memory? This only returns
+/// true for things that we can analyze with other helpers below.
+static bool hasMemoryWrite(Instruction *I) {
if (isa<StoreInst>(I))
return true;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -106,146 +158,296 @@ static bool doesClobberMemory(Instruction *I) {
return false;
}
-/// isElidable - If the value of this instruction and the memory it writes to is
-/// unused, may we delete this instrtction?
-static bool isElidable(Instruction *I) {
- assert(doesClobberMemory(I));
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- return II->getIntrinsicID() != Intrinsic::lifetime_end;
+/// getLocForWrite - Return a Location stored to by the specified instruction.
+static AliasAnalysis::Location
+getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return AA.getLocation(SI);
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
+ // memcpy/memmove/memset.
+ AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
+ // If we don't have target data around, an unknown size in Location means
+ // that we should use the size of the pointee type. This isn't valid for
+ // memset/memcpy, which writes more than an i8.
+ if (Loc.Size == AliasAnalysis::UnknownSize && AA.getTargetData() == 0)
+ return AliasAnalysis::Location();
+ return Loc;
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
+ if (II == 0) return AliasAnalysis::Location();
+
+ switch (II->getIntrinsicID()) {
+ default: return AliasAnalysis::Location(); // Unhandled intrinsic.
+ case Intrinsic::init_trampoline:
+ // If we don't have target data around, an unknown size in Location means
+ // that we should use the size of the pointee type. This isn't valid for
+ // init.trampoline, which writes more than an i8.
+ if (AA.getTargetData() == 0) return AliasAnalysis::Location();
+
+ // FIXME: We don't know the size of the trampoline, so we can't really
+ // handle it here.
+ return AliasAnalysis::Location(II->getArgOperand(0));
+ case Intrinsic::lifetime_end: {
+ uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+ return AliasAnalysis::Location(II->getArgOperand(1), Len);
+ }
+ }
+}
+
+/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
+/// instruction if any.
+static AliasAnalysis::Location
+getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
+ assert(hasMemoryWrite(Inst) && "Unknown instruction case");
+
+ // The only instructions that both read and write are the mem transfer
+ // instructions (memcpy/memmove).
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
+ return AA.getLocationForSource(MTI);
+ return AliasAnalysis::Location();
+}
+
+
+/// isRemovable - If the value of this instruction and the memory it writes to
+/// is unused, may we delete this instruction?
+static bool isRemovable(Instruction *I) {
+ // Don't remove volatile stores.
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return !SI->isVolatile();
- return true;
+
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate");
+ case Intrinsic::lifetime_end:
+ // Never remove dead lifetime_end's, e.g. because it is followed by a
+ // free.
+ return false;
+ case Intrinsic::init_trampoline:
+ // Always safe to remove init_trampoline.
+ return true;
+
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ // Don't remove volatile memory intrinsics.
+ return !cast<MemIntrinsic>(II)->isVolatile();
+ }
}
-/// getPointerOperand - Return the pointer that is being clobbered.
-static Value *getPointerOperand(Instruction *I) {
- assert(doesClobberMemory(I));
+/// getStoredPointerOperand - Return the pointer that is being written to.
+static Value *getStoredPointerOperand(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getPointerOperand();
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
- return MI->getArgOperand(0);
+ return MI->getDest();
IntrinsicInst *II = cast<IntrinsicInst>(I);
switch (II->getIntrinsicID()) {
default: assert(false && "Unexpected intrinsic!");
case Intrinsic::init_trampoline:
return II->getArgOperand(0);
- case Intrinsic::lifetime_end:
- return II->getArgOperand(1);
}
}
-/// getStoreSize - Return the length in bytes of the write by the clobbering
-/// instruction. If variable or unknown, returns -1.
-static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
- assert(doesClobberMemory(I));
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!TD) return -1u;
- return TD->getTypeStoreSize(SI->getOperand(0)->getType());
+static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) {
+ const TargetData *TD = AA.getTargetData();
+ if (TD == 0)
+ return AliasAnalysis::UnknownSize;
+
+ if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
+ // Get size information for the alloca
+ if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
+ return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
+ return AliasAnalysis::UnknownSize;
}
+
+ assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
+ const PointerType *PT = cast<PointerType>(V->getType());
+ return TD->getTypeAllocSize(PT->getElementType());
+}
- Value *Len;
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
- Len = MI->getLength();
- } else {
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: assert(false && "Unexpected intrinsic!");
- case Intrinsic::init_trampoline:
- return -1u;
- case Intrinsic::lifetime_end:
- Len = II->getArgOperand(0);
- break;
+/// isObjectPointerWithTrustworthySize - Return true if the specified Value* is
+/// pointing to an object with a pointer size we can trust.
+static bool isObjectPointerWithTrustworthySize(const Value *V) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ return !AI->isArrayAllocation();
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return !GV->mayBeOverridden();
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+ return false;
+}
+
+/// isCompleteOverwrite - Return true if a store to the 'Later' location
+/// completely overwrites a store to the 'Earlier' location.
+static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
+ const AliasAnalysis::Location &Earlier,
+ AliasAnalysis &AA) {
+ const Value *P1 = Earlier.Ptr->stripPointerCasts();
+ const Value *P2 = Later.Ptr->stripPointerCasts();
+
+ // If the start pointers are the same, we just have to compare sizes to see if
+ // the later store was larger than the earlier store.
+ if (P1 == P2) {
+ // If we don't know the sizes of either access, then we can't do a
+ // comparison.
+ if (Later.Size == AliasAnalysis::UnknownSize ||
+ Earlier.Size == AliasAnalysis::UnknownSize) {
+ // If we have no TargetData information around, then the size of the store
+ // is inferrable from the pointee type. If they are the same type, then
+ // we know that the store is safe.
+ if (AA.getTargetData() == 0)
+ return Later.Ptr->getType() == Earlier.Ptr->getType();
+ return false;
}
+
+ // Make sure that the Later size is >= the Earlier size.
+ if (Later.Size < Earlier.Size)
+ return false;
+ return true;
}
- if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
- if (!LenCI->isAllOnesValue())
- return LenCI->getZExtValue();
- return -1u;
+
+ // Otherwise, we have to have size information, and the later store has to be
+ // larger than the earlier one.
+ if (Later.Size == AliasAnalysis::UnknownSize ||
+ Earlier.Size == AliasAnalysis::UnknownSize ||
+ Later.Size <= Earlier.Size || AA.getTargetData() == 0)
+ return false;
+
+ // Check to see if the later store is to the entire object (either a global,
+ // an alloca, or a byval argument). If so, then it clearly overwrites any
+ // other store to the same object.
+ const TargetData &TD = *AA.getTargetData();
+
+ const Value *UO1 = GetUnderlyingObject(P1, &TD),
+ *UO2 = GetUnderlyingObject(P2, &TD);
+
+ // If we can't resolve the same pointers to the same object, then we can't
+ // analyze them at all.
+ if (UO1 != UO2)
+ return false;
+
+ // If the "Later" store is to a recognizable object, get its size.
+ if (isObjectPointerWithTrustworthySize(UO2)) {
+ uint64_t ObjectSize =
+ TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
+ if (ObjectSize == Later.Size)
+ return true;
+ }
+
+ // Okay, we have stores to two completely different pointers. Try to
+ // decompose the pointer into a "base + constant_offset" form. If the base
+ // pointers are equal, then we can reason about the two stores.
+ int64_t Off1 = 0, Off2 = 0;
+ const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD);
+ const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD);
+
+ // If the base pointers still differ, we have two completely different stores.
+ if (BP1 != BP2)
+ return false;
+
+ // Otherwise, we might have a situation like:
+ // store i16 -> P + 1 Byte
+ // store i32 -> P
+ // In this case, we see if the later store completely overlaps all bytes
+ // stored by the previous store.
+ if (Off1 < Off2 || // Earlier starts before Later.
+ Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later.
+ return false;
+ // Otherwise, we have complete overlap.
+ return true;
}
-/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is
-/// greater than or equal to the store in I2. This returns false if we don't
-/// know.
+/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
+/// memory region into an identical pointer) then it doesn't actually make its
+/// input dead in the traditional sense. Consider this case:
+///
+/// memcpy(A <- B)
+/// memcpy(A <- A)
+///
+/// In this case, the second store to A does not make the first store to A dead.
+/// The usual situation isn't an explicit A<-A store like this (which can be
+/// trivially removed) but a case where two pointers may alias.
///
-static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2,
- const TargetData *TD) {
- const Type *I1Ty = getPointerOperand(I1)->getType();
- const Type *I2Ty = getPointerOperand(I2)->getType();
+/// This function detects when it is unsafe to remove a dependent instruction
+/// because the DSE inducing instruction may be a self-read.
+static bool isPossibleSelfRead(Instruction *Inst,
+ const AliasAnalysis::Location &InstStoreLoc,
+ Instruction *DepWrite, AliasAnalysis &AA) {
+ // Self reads can only happen for instructions that read memory. Get the
+ // location read.
+ AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
+ if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction.
- // Exactly the same type, must have exactly the same size.
- if (I1Ty == I2Ty) return true;
+ // If the read and written loc obviously don't alias, it isn't a read.
+ if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
- int I1Size = getStoreSize(I1, TD);
- int I2Size = getStoreSize(I2, TD);
+ // Okay, 'Inst' may copy over itself. However, we can still remove a the
+ // DepWrite instruction if we can prove that it reads from the same location
+ // as Inst. This handles useful cases like:
+ // memcpy(A <- B)
+ // memcpy(A <- B)
+ // Here we don't know if A/B may alias, but we do know that B/B are must
+ // aliases, so removing the first memcpy is safe (assuming it writes <= #
+ // bytes as the second one.
+ AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA);
- return I1Size != -1 && I2Size != -1 && I1Size >= I2Size;
+ if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
+ return false;
+
+ // If DepWrite doesn't read memory or if we can't prove it is a must alias,
+ // then it can't be considered dead.
+ return true;
}
-bool DSE::runOnBasicBlock(BasicBlock &BB) {
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
- TD = getAnalysisIfAvailable<TargetData>();
+//===----------------------------------------------------------------------===//
+// DSE Pass
+//===----------------------------------------------------------------------===//
+
+bool DSE::runOnBasicBlock(BasicBlock &BB) {
bool MadeChange = false;
// Do a top-down walk on the BB.
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
Instruction *Inst = BBI++;
- // If we find a store or a free, get its memory dependence.
- if (!doesClobberMemory(Inst) && !isFreeCall(Inst))
- continue;
-
- MemDepResult InstDep = MD.getDependency(Inst);
-
- // Ignore non-local stores.
- // FIXME: cross-block DSE would be fun. :)
- if (InstDep.isNonLocal()) continue;
-
- // Handle frees whose dependencies are non-trivial.
- if (const CallInst *F = isFreeCall(Inst)) {
- MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep);
+ // Handle 'free' calls specially.
+ if (CallInst *F = isFreeCall(Inst)) {
+ MadeChange |= HandleFree(F);
continue;
}
- // If not a definite must-alias dependency, ignore it.
- if (!InstDep.isDef())
+ // If we find something that writes memory, get its memory dependence.
+ if (!hasMemoryWrite(Inst))
continue;
-
- // If this is a store-store dependence, then the previous store is dead so
- // long as this store is at least as big as it.
- if (doesClobberMemory(InstDep.getInst())) {
- Instruction *DepStore = InstDep.getInst();
- if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) &&
- isElidable(DepStore)) {
- // Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepStore);
- ++NumFastStores;
- MadeChange = true;
- // DeleteDeadInstruction can delete the current instruction in loop
- // cases, reset BBI.
- BBI = Inst;
- if (BBI != BB.begin())
- --BBI;
- continue;
- }
- }
+ MemDepResult InstDep = MD->getDependency(Inst);
- if (!isElidable(Inst))
+ // Ignore non-local store liveness.
+ // FIXME: cross-block DSE would be fun. :)
+ if (InstDep.isNonLocal() ||
+ // Ignore self dependence, which happens in the entry block of the
+ // function.
+ InstDep.getInst() == Inst)
continue;
-
+
// If we're storing the same value back to a pointer that we just
// loaded from, then the store can be removed.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- SI->getOperand(0) == DepLoad) {
+ SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
+ DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n "
+ << "LOAD: " << *DepLoad << "\n STORE: " << *SI << '\n');
+
// DeleteDeadInstruction can delete the current instruction. Save BBI
// in case we need it.
WeakVH NextInst(BBI);
- DeleteDeadInstruction(SI);
+ DeleteDeadInstruction(SI, *MD);
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
@@ -258,24 +460,63 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
}
}
- // If this is a lifetime end marker, we can throw away the store.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
- // Delete the store and now-dead instructions that feed it.
- // DeleteDeadInstruction can delete the current instruction. Save BBI
- // in case we need it.
- WeakVH NextInst(BBI);
-
- DeleteDeadInstruction(Inst);
+ // Figure out what location is being stored to.
+ AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
+
+ // If we didn't get a useful location, fail.
+ if (Loc.Ptr == 0)
+ continue;
+
+ while (!InstDep.isNonLocal()) {
+ // Get the memory clobbered by the instruction we depend on. MemDep will
+ // skip any instructions that 'Loc' clearly doesn't interact with. If we
+ // end up depending on a may- or must-aliased load, then we can't optimize
+ // away the store and we bail out. However, if we depend on on something
+ // that overwrites the memory location we *can* potentially optimize it.
+ //
+ // Find out what memory location the dependant instruction stores.
+ Instruction *DepWrite = InstDep.getInst();
+ AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
+ // If we didn't get a useful location, or if it isn't a size, bail out.
+ if (DepLoc.Ptr == 0)
+ break;
+
+ // If we find a write that is a) removable (i.e., non-volatile), b) is
+ // completely obliterated by the store to 'Loc', and c) which we know that
+ // 'Inst' doesn't load from, then we can remove it.
+ if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
+ !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
+ << *DepWrite << "\n KILLER: " << *Inst << '\n');
- if (NextInst == 0) // Next instruction deleted.
- BBI = BB.begin();
- else if (BBI != BB.begin()) // Revisit this instruction if possible.
- --BBI;
+ // Delete the store and now-dead instructions that feed it.
+ DeleteDeadInstruction(DepWrite, *MD);
++NumFastStores;
MadeChange = true;
- continue;
+
+ // DeleteDeadInstruction can delete the current instruction in loop
+ // cases, reset BBI.
+ BBI = Inst;
+ if (BBI != BB.begin())
+ --BBI;
+ break;
}
+
+ // If this is a may-aliased store that is clobbering the store value, we
+ // can keep searching past it for another must-aliased pointer that stores
+ // to the same location. For example, in:
+ // store -> P
+ // store -> Q
+ // store -> P
+ // we can remove the first store to P even though we don't know if P and Q
+ // alias.
+ if (DepWrite == &BB.front()) break;
+
+ // Can't look past this instruction if it might read 'Loc'.
+ if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
+ break;
+
+ InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
}
}
@@ -287,26 +528,36 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
return MadeChange;
}
-/// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose
-/// dependency is a store to a field of that structure.
-bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
- MemDepResult Dep) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
- Instruction *Dependency = Dep.getInst();
- if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency))
- return false;
+/// HandleFree - Handle frees of entire structures whose dependency is a store
+/// to a field of that structure.
+bool DSE::HandleFree(CallInst *F) {
+ MemDepResult Dep = MD->getDependency(F);
+ do {
+ if (Dep.isNonLocal()) return false;
+
+ Instruction *Dependency = Dep.getInst();
+ if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+ return false;
- Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
+ Value *DepPointer =
+ GetUnderlyingObject(getStoredPointerOperand(Dependency));
- // Check for aliasing.
- if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) !=
- AliasAnalysis::MustAlias)
- return false;
+ // Check for aliasing.
+ if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
+ return false;
+
+ // DCE instructions only used to calculate that store
+ DeleteDeadInstruction(Dependency, *MD);
+ ++NumFastStores;
+
+ // Inst's old Dependency is now deleted. Compute the next dependency,
+ // which may also be dead, as in
+ // s[0] = 0;
+ // s[1] = 0; // This has just been deleted.
+ // free(s);
+ Dep = MD->getDependency(F);
+ } while (!Dep.isNonLocal());
- // DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency);
- ++NumFastStores;
return true;
}
@@ -317,259 +568,163 @@ bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
/// store i32 1, i32* %A
/// ret void
bool DSE::handleEndBlock(BasicBlock &BB) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
bool MadeChange = false;
- // Pointers alloca'd in this function are dead in the end block
- SmallPtrSet<Value*, 64> deadPointers;
+ // Keep track of all of the stack objects that are dead at the end of the
+ // function.
+ SmallPtrSet<Value*, 16> DeadStackObjects;
// Find all of the alloca'd pointers in the entry block.
BasicBlock *Entry = BB.getParent()->begin();
for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
- deadPointers.insert(AI);
+ DeadStackObjects.insert(AI);
// Treat byval arguments the same, stores to them are dead at the end of the
// function.
for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
AE = BB.getParent()->arg_end(); AI != AE; ++AI)
if (AI->hasByValAttr())
- deadPointers.insert(AI);
+ DeadStackObjects.insert(AI);
// Scan the basic block backwards
for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
--BBI;
- // If we find a store whose pointer is dead.
- if (doesClobberMemory(BBI)) {
- if (isElidable(BBI)) {
- // See through pointer-to-pointer bitcasts
- Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject();
-
- // Alloca'd pointers or byval arguments (which are functionally like
- // alloca's) are valid candidates for removal.
- if (deadPointers.count(pointerOperand)) {
- // DCE instructions only used to calculate that store.
- Instruction *Dead = BBI;
- ++BBI;
- DeleteDeadInstruction(Dead, &deadPointers);
- ++NumFastStores;
- MadeChange = true;
- continue;
- }
- }
-
- // Because a memcpy or memmove is also a load, we can't skip it if we
- // didn't remove it.
- if (!isa<MemTransferInst>(BBI))
+ // If we find a store, check to see if it points into a dead stack value.
+ if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
+ // See through pointer-to-pointer bitcasts
+ Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI));
+
+ // Stores to stack values are valid candidates for removal.
+ if (DeadStackObjects.count(Pointer)) {
+ Instruction *Dead = BBI++;
+
+ DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
+ << *Dead << "\n Object: " << *Pointer << '\n');
+
+ // DCE instructions only used to calculate that store.
+ DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
+ ++NumFastStores;
+ MadeChange = true;
continue;
+ }
}
- Value *killPointer = 0;
- uint64_t killPointerSize = ~0UL;
+ // Remove any dead non-memory-mutating instructions.
+ if (isInstructionTriviallyDead(BBI)) {
+ Instruction *Inst = BBI++;
+ DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
+ ++NumFastOther;
+ MadeChange = true;
+ continue;
+ }
- // If we encounter a use of the pointer, it is no longer considered dead
- if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
- // However, if this load is unused and not volatile, we can go ahead and
- // remove it, and not have to worry about it making our pointer undead!
- if (L->use_empty() && !L->isVolatile()) {
- ++BBI;
- DeleteDeadInstruction(L, &deadPointers);
- ++NumFastOther;
- MadeChange = true;
- continue;
- }
-
- killPointer = L->getPointerOperand();
- } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
- killPointer = V->getOperand(0);
- } else if (isa<MemTransferInst>(BBI) &&
- isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) {
- killPointer = cast<MemTransferInst>(BBI)->getSource();
- killPointerSize = cast<ConstantInt>(
- cast<MemTransferInst>(BBI)->getLength())->getZExtValue();
- } else if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
- deadPointers.erase(A);
-
- // Dead alloca's can be DCE'd when we reach them
- if (A->use_empty()) {
- ++BBI;
- DeleteDeadInstruction(A, &deadPointers);
- ++NumFastOther;
- MadeChange = true;
- }
-
+ if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
+ DeadStackObjects.erase(A);
continue;
- } else if (CallSite CS = cast<Value>(BBI)) {
- // If this call does not access memory, it can't
- // be undeadifying any of our pointers.
- if (AA.doesNotAccessMemory(CS))
+ }
+
+ if (CallSite CS = cast<Value>(BBI)) {
+ // If this call does not access memory, it can't be loading any of our
+ // pointers.
+ if (AA->doesNotAccessMemory(CS))
continue;
- unsigned modRef = 0;
- unsigned other = 0;
+ unsigned NumModRef = 0, NumOther = 0;
- // Remove any pointers made undead by the call from the dead set
- std::vector<Value*> dead;
- for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
- E = deadPointers.end(); I != E; ++I) {
- // HACK: if we detect that our AA is imprecise, it's not
- // worth it to scan the rest of the deadPointers set. Just
- // assume that the AA will return ModRef for everything, and
- // go ahead and bail.
- if (modRef >= 16 && other == 0) {
- deadPointers.clear();
+ // If the call might load from any of our allocas, then any store above
+ // the call is live.
+ SmallVector<Value*, 8> LiveAllocas;
+ for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+ E = DeadStackObjects.end(); I != E; ++I) {
+ // If we detect that our AA is imprecise, it's not worth it to scan the
+ // rest of the DeadPointers set. Just assume that the AA will return
+ // ModRef for everything, and go ahead and bail out.
+ if (NumModRef >= 16 && NumOther == 0)
return MadeChange;
- }
-
- // See if the call site touches it
- AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I,
- getPointerSize(*I));
+
+ // See if the call site touches it.
+ AliasAnalysis::ModRefResult A =
+ AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
if (A == AliasAnalysis::ModRef)
- ++modRef;
+ ++NumModRef;
else
- ++other;
+ ++NumOther;
if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
- dead.push_back(*I);
+ LiveAllocas.push_back(*I);
}
-
- for (std::vector<Value*>::iterator I = dead.begin(), E = dead.end();
- I != E; ++I)
- deadPointers.erase(*I);
- continue;
- } else if (isInstructionTriviallyDead(BBI)) {
- // For any non-memory-affecting non-terminators, DCE them as we reach them
- Instruction *Inst = BBI;
- ++BBI;
- DeleteDeadInstruction(Inst, &deadPointers);
- ++NumFastOther;
- MadeChange = true;
+ for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
+ E = LiveAllocas.end(); I != E; ++I)
+ DeadStackObjects.erase(*I);
+
+ // If all of the allocas were clobbered by the call then we're not going
+ // to find anything else to process.
+ if (DeadStackObjects.empty())
+ return MadeChange;
+
continue;
}
- if (!killPointer)
+ AliasAnalysis::Location LoadedLoc;
+
+ // If we encounter a use of the pointer, it is no longer considered dead
+ if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+ LoadedLoc = AA->getLocation(L);
+ } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
+ LoadedLoc = AA->getLocation(V);
+ } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
+ LoadedLoc = AA->getLocationForSource(MTI);
+ } else {
+ // Not a loading instruction.
continue;
+ }
- killPointer = killPointer->getUnderlyingObject();
+ // Remove any allocas from the DeadPointer set that are loaded, as this
+ // makes any stores above the access live.
+ RemoveAccessedObjects(LoadedLoc, DeadStackObjects);
- // Deal with undead pointers
- MadeChange |= RemoveUndeadPointers(killPointer, killPointerSize, BBI,
- deadPointers);
+ // If all of the allocas were clobbered by the access then we're not going
+ // to find anything else to process.
+ if (DeadStackObjects.empty())
+ break;
}
return MadeChange;
}
-/// RemoveUndeadPointers - check for uses of a pointer that make it
-/// undead when scanning for dead stores to alloca's.
-bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize,
- BasicBlock::iterator &BBI,
- SmallPtrSet<Value*, 64> &deadPointers) {
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
- // If the kill pointer can be easily reduced to an alloca,
- // don't bother doing extraneous AA queries.
- if (deadPointers.count(killPointer)) {
- deadPointers.erase(killPointer);
- return false;
- }
-
- // A global can't be in the dead pointer set.
- if (isa<GlobalValue>(killPointer))
- return false;
-
- bool MadeChange = false;
+/// RemoveAccessedObjects - Check to see if the specified location may alias any
+/// of the stack objects in the DeadStackObjects set. If so, they become live
+/// because the location is being loaded.
+void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+ SmallPtrSet<Value*, 16> &DeadStackObjects) {
+ const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
+
+ // A constant can't be in the dead pointer set.
+ if (isa<Constant>(UnderlyingPointer))
+ return;
- SmallVector<Value*, 16> undead;
+ // If the kill pointer can be easily reduced to an alloca, don't bother doing
+ // extraneous AA queries.
+ if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
+ DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
+ return;
+ }
- for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
- E = deadPointers.end(); I != E; ++I) {
- // See if this pointer could alias it
- AliasAnalysis::AliasResult A = AA.alias(*I, getPointerSize(*I),
- killPointer, killPointerSize);
-
- // If it must-alias and a store, we can delete it
- if (isa<StoreInst>(BBI) && A == AliasAnalysis::MustAlias) {
- StoreInst *S = cast<StoreInst>(BBI);
-
- // Remove it!
- ++BBI;
- DeleteDeadInstruction(S, &deadPointers);
- ++NumFastStores;
- MadeChange = true;
-
- continue;
-
- // Otherwise, it is undead
- } else if (A != AliasAnalysis::NoAlias)
- undead.push_back(*I);
+ SmallVector<Value*, 16> NowLive;
+ for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+ E = DeadStackObjects.end(); I != E; ++I) {
+ // See if the loaded location could alias the stack location.
+ AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
+ if (!AA->isNoAlias(StackLoc, LoadedLoc))
+ NowLive.push_back(*I);
}
- for (SmallVector<Value*, 16>::iterator I = undead.begin(), E = undead.end();
+ for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
I != E; ++I)
- deadPointers.erase(*I);
-
- return MadeChange;
+ DeadStackObjects.erase(*I);
}
-/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
-/// and zero out all the operands of this instruction. If any of them become
-/// dead, delete them and the computation tree that feeds them.
-///
-/// If ValueSet is non-null, remove any deleted instructions from it as well.
-///
-void DSE::DeleteDeadInstruction(Instruction *I,
- SmallPtrSet<Value*, 64> *ValueSet) {
- SmallVector<Instruction*, 32> NowDeadInsts;
-
- NowDeadInsts.push_back(I);
- --NumFastOther;
-
- // Before we touch this instruction, remove it from memdep!
- MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
- do {
- Instruction *DeadInst = NowDeadInsts.pop_back_val();
-
- ++NumFastOther;
-
- // This instruction is dead, zap it, in stages. Start by removing it from
- // MemDep, which needs to know the operands and needs it to be in the
- // function.
- MDA.removeInstruction(DeadInst);
-
- for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
- Value *Op = DeadInst->getOperand(op);
- DeadInst->setOperand(op, 0);
-
- // If this operand just became dead, add it to the NowDeadInsts list.
- if (!Op->use_empty()) continue;
-
- if (Instruction *OpI = dyn_cast<Instruction>(Op))
- if (isInstructionTriviallyDead(OpI))
- NowDeadInsts.push_back(OpI);
- }
-
- DeadInst->eraseFromParent();
-
- if (ValueSet) ValueSet->erase(DeadInst);
- } while (!NowDeadInsts.empty());
-}
-
-unsigned DSE::getPointerSize(Value *V) const {
- if (TD) {
- if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
- // Get size information for the alloca
- if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
- return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
- } else {
- assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
- const PointerType *PT = cast<PointerType>(V->getType());
- return TD->getTypeAllocSize(PT->getElementType());
- }
- }
- return ~0U;
-}
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
new file mode 100644
index 0000000..3d3f17b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -0,0 +1,470 @@
+//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a simple dominator tree walk that eliminates trivially
+// redundant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-cse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
+STATISTIC(NumCSE, "Number of instructions CSE'd");
+STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
+STATISTIC(NumCSECall, "Number of call instructions CSE'd");
+STATISTIC(NumDSE, "Number of trivial dead stores removed");
+
+static unsigned getHash(const void *V) {
+ return DenseMapInfo<const void*>::getHashValue(V);
+}
+
+//===----------------------------------------------------------------------===//
+// SimpleValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// SimpleValue - Instances of this struct represent available values in the
+ /// scoped hash table.
+ struct SimpleValue {
+ Instruction *Inst;
+
+ SimpleValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+
+ static bool canHandle(Instruction *Inst) {
+ // This can only handle non-void readnone functions.
+ if (CallInst *CI = dyn_cast<CallInst>(Inst))
+ return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+ return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
+ isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
+ isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+ isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
+ }
+ };
+}
+
+namespace llvm {
+// SimpleValue is POD.
+template<> struct isPodLike<SimpleValue> {
+ static const bool value = true;
+};
+
+template<> struct DenseMapInfo<SimpleValue> {
+ static inline SimpleValue getEmptyKey() {
+ return DenseMapInfo<Instruction*>::getEmptyKey();
+ }
+ static inline SimpleValue getTombstoneKey() {
+ return DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(SimpleValue Val);
+ static bool isEqual(SimpleValue LHS, SimpleValue RHS);
+};
+}
+
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+ Instruction *Inst = Val.Inst;
+
+ // Hash in all of the operands as pointers.
+ unsigned Res = 0;
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+ Res ^= getHash(Inst->getOperand(i)) << i;
+
+ if (CastInst *CI = dyn_cast<CastInst>(Inst))
+ Res ^= getHash(CI->getType());
+ else if (CmpInst *CI = dyn_cast<CmpInst>(Inst))
+ Res ^= CI->getPredicate();
+ else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) {
+ for (ExtractValueInst::idx_iterator I = EVI->idx_begin(),
+ E = EVI->idx_end(); I != E; ++I)
+ Res ^= *I;
+ } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) {
+ for (InsertValueInst::idx_iterator I = IVI->idx_begin(),
+ E = IVI->idx_end(); I != E; ++I)
+ Res ^= *I;
+ } else {
+ // nothing extra to hash in.
+ assert((isa<CallInst>(Inst) ||
+ isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
+ isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+ isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) &&
+ "Invalid/unknown instruction");
+ }
+
+ // Mix in the opcode.
+ return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+ Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+
+ if (LHS.isSentinel() || RHS.isSentinel())
+ return LHSI == RHSI;
+
+ if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
+ return LHSI->isIdenticalTo(RHSI);
+}
+
+//===----------------------------------------------------------------------===//
+// CallValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// CallValue - Instances of this struct represent available call values in
+ /// the scoped hash table.
+ struct CallValue {
+ Instruction *Inst;
+
+ CallValue(Instruction *I) : Inst(I) {
+ assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+ }
+
+ bool isSentinel() const {
+ return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+ Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+
+ static bool canHandle(Instruction *Inst) {
+ // Don't value number anything that returns void.
+ if (Inst->getType()->isVoidTy())
+ return false;
+
+ CallInst *CI = dyn_cast<CallInst>(Inst);
+ if (CI == 0 || !CI->onlyReadsMemory())
+ return false;
+ return true;
+ }
+ };
+}
+
+namespace llvm {
+ // CallValue is POD.
+ template<> struct isPodLike<CallValue> {
+ static const bool value = true;
+ };
+
+ template<> struct DenseMapInfo<CallValue> {
+ static inline CallValue getEmptyKey() {
+ return DenseMapInfo<Instruction*>::getEmptyKey();
+ }
+ static inline CallValue getTombstoneKey() {
+ return DenseMapInfo<Instruction*>::getTombstoneKey();
+ }
+ static unsigned getHashValue(CallValue Val);
+ static bool isEqual(CallValue LHS, CallValue RHS);
+ };
+}
+unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
+ Instruction *Inst = Val.Inst;
+ // Hash in all of the operands as pointers.
+ unsigned Res = 0;
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
+ assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
+ "Cannot value number calls with metadata operands");
+ Res ^= getHash(Inst->getOperand(i)) << i;
+ }
+
+ // Mix in the opcode.
+ return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
+ Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+ if (LHS.isSentinel() || RHS.isSentinel())
+ return LHSI == RHSI;
+ return LHSI->isIdenticalTo(RHSI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyCSE pass.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// EarlyCSE - This pass does a simple depth-first walk over the dominator
+/// tree, eliminating trivially redundant instructions and using instsimplify
+/// to canonicalize things as it goes. It is intended to be fast and catch
+/// obvious cases so that instcombine and other passes are more effective. It
+/// is expected that a later pass of GVN will catch the interesting/hard
+/// cases.
+class EarlyCSE : public FunctionPass {
+public:
+ const TargetData *TD;
+ DominatorTree *DT;
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
+ typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
+ AllocatorTy> ScopedHTType;
+
+ /// AvailableValues - This scoped hash table contains the current values of
+ /// all of our simple scalar expressions. As we walk down the domtree, we
+ /// look to see if instructions are in this: if so, we replace them with what
+ /// we find, otherwise we insert them so that dominated values can succeed in
+ /// their lookup.
+ ScopedHTType *AvailableValues;
+
+ /// AvailableLoads - This scoped hash table contains the current values
+ /// of loads. This allows us to get efficient access to dominating loads when
+ /// we have a fully redundant load. In addition to the most recent load, we
+ /// keep track of a generation count of the read, which is compared against
+ /// the current generation count. The current generation count is
+ /// incremented after every possibly writing memory operation, which ensures
+ /// that we only CSE loads with other loads that have no intervening store.
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
+ typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
+ DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
+ LoadHTType *AvailableLoads;
+
+ /// AvailableCalls - This scoped hash table contains the current values
+ /// of read-only call values. It uses the same generation count as loads.
+ typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
+ CallHTType *AvailableCalls;
+
+ /// CurrentGeneration - This is the current generation of the memory value.
+ unsigned CurrentGeneration;
+
+ static char ID;
+ explicit EarlyCSE() : FunctionPass(ID) {
+ initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F);
+
+private:
+
+ bool processNode(DomTreeNode *Node);
+
+ // This transformation requires dominator postdominator info
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+ }
+};
+}
+
+char EarlyCSE::ID = 0;
+
+// createEarlyCSEPass - The public interface to this file.
+FunctionPass *llvm::createEarlyCSEPass() {
+ return new EarlyCSE();
+}
+
+INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
+
+bool EarlyCSE::processNode(DomTreeNode *Node) {
+ // Define a scope in the scoped hash table. When we are done processing this
+ // domtree node and recurse back up to our parent domtree node, this will pop
+ // off all the values we install.
+ ScopedHTType::ScopeTy Scope(*AvailableValues);
+
+ // Define a scope for the load values so that anything we add will get
+ // popped when we recurse back up to our parent domtree node.
+ LoadHTType::ScopeTy LoadScope(*AvailableLoads);
+
+ // Define a scope for the call values so that anything we add will get
+ // popped when we recurse back up to our parent domtree node.
+ CallHTType::ScopeTy CallScope(*AvailableCalls);
+
+ BasicBlock *BB = Node->getBlock();
+
+ // If this block has a single predecessor, then the predecessor is the parent
+ // of the domtree node and all of the live out memory values are still current
+ // in this block. If this block has multiple predecessors, then they could
+ // have invalidated the live-out memory values of our parent value. For now,
+ // just be conservative and invalidate memory if this block has multiple
+ // predecessors.
+ if (BB->getSinglePredecessor() == 0)
+ ++CurrentGeneration;
+
+ /// LastStore - Keep track of the last non-volatile store that we saw... for
+ /// as long as there in no instruction that reads memory. If we see a store
+ /// to the same location, we delete the dead store. This zaps trivial dead
+ /// stores which can occur in bitfield code among other things.
+ StoreInst *LastStore = 0;
+
+ bool Changed = false;
+
+ // See if any instructions in the block can be eliminated. If so, do it. If
+ // not, add them to AvailableValues.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+
+ // Dead instructions should just be removed.
+ if (isInstructionTriviallyDead(Inst)) {
+ DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumSimplify;
+ continue;
+ }
+
+ // If the instruction can be simplified (e.g. X+0 = X) then replace it with
+ // its simpler value.
+ if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+ DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
+ Inst->replaceAllUsesWith(V);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumSimplify;
+ continue;
+ }
+
+ // If this is a simple instruction that we can value number, process it.
+ if (SimpleValue::canHandle(Inst)) {
+ // See if the instruction has an available value. If so, use it.
+ if (Value *V = AvailableValues->lookup(Inst)) {
+ DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V << '\n');
+ Inst->replaceAllUsesWith(V);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSE;
+ continue;
+ }
+
+ // Otherwise, just remember that this value is available.
+ AvailableValues->insert(Inst, Inst);
+ continue;
+ }
+
+ // If this is a non-volatile load, process it.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ // Ignore volatile loads.
+ if (LI->isVolatile()) {
+ LastStore = 0;
+ continue;
+ }
+
+ // If we have an available version of this load, and if it is the right
+ // generation, replace this instruction.
+ std::pair<Value*, unsigned> InVal =
+ AvailableLoads->lookup(Inst->getOperand(0));
+ if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: "
+ << *InVal.first << '\n');
+ if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSELoad;
+ continue;
+ }
+
+ // Otherwise, remember that we have this instruction.
+ AvailableLoads->insert(Inst->getOperand(0),
+ std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+ LastStore = 0;
+ continue;
+ }
+
+ // If this instruction may read from memory, forget LastStore.
+ if (Inst->mayReadFromMemory())
+ LastStore = 0;
+
+ // If this is a read-only call, process it.
+ if (CallValue::canHandle(Inst)) {
+ // If we have an available version of this call, and if it is the right
+ // generation, replace this instruction.
+ std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
+ if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: "
+ << *InVal.first << '\n');
+ if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumCSECall;
+ continue;
+ }
+
+ // Otherwise, remember that we have this instruction.
+ AvailableCalls->insert(Inst,
+ std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+ continue;
+ }
+
+ // Okay, this isn't something we can CSE at all. Check to see if it is
+ // something that could modify memory. If so, our available memory values
+ // cannot be used so bump the generation count.
+ if (Inst->mayWriteToMemory()) {
+ ++CurrentGeneration;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // We do a trivial form of DSE if there are two stores to the same
+ // location with no intervening loads. Delete the earlier store.
+ if (LastStore &&
+ LastStore->getPointerOperand() == SI->getPointerOperand()) {
+ DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: "
+ << *Inst << '\n');
+ LastStore->eraseFromParent();
+ Changed = true;
+ ++NumDSE;
+ LastStore = 0;
+ continue;
+ }
+
+ // Okay, we just invalidated anything we knew about loaded values. Try
+ // to salvage *something* by remembering that the stored value is a live
+ // version of the pointer. It is safe to forward from volatile stores
+ // to non-volatile loads, so we don't have to check for volatility of
+ // the store.
+ AvailableLoads->insert(SI->getPointerOperand(),
+ std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
+
+ // Remember that this was the last store we saw for DSE.
+ if (!SI->isVolatile())
+ LastStore = SI;
+ }
+ }
+ }
+
+ unsigned LiveOutGeneration = CurrentGeneration;
+ for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) {
+ Changed |= processNode(*I);
+ // Pop any generation changes off the stack from the recursive walk.
+ CurrentGeneration = LiveOutGeneration;
+ }
+ return Changed;
+}
+
+
+bool EarlyCSE::runOnFunction(Function &F) {
+ TD = getAnalysisIfAvailable<TargetData>();
+ DT = &getAnalysis<DominatorTree>();
+
+ // Tables that the pass uses when walking the domtree.
+ ScopedHTType AVTable;
+ AvailableValues = &AVTable;
+ LoadHTType LoadTable;
+ AvailableLoads = &LoadTable;
+ CallHTType CallTable;
+ AvailableCalls = &CallTable;
+
+ CurrentGeneration = 0;
+ return processNode(DT->getRootNode());
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
index 53dd06d..4c3d188 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp
@@ -27,13 +27,15 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
public:
static char ID; // Pass identification, replacement for typeid
- explicit GEPSplitter() : FunctionPass(ID) {}
+ explicit GEPSplitter() : FunctionPass(ID) {
+ initializeGEPSplitterPass(*PassRegistry::getPassRegistry());
+ }
};
}
char GEPSplitter::ID = 0;
INITIALIZE_PASS(GEPSplitter, "split-geps",
- "split complex GEPs into simple GEPs", false, false);
+ "split complex GEPs into simple GEPs", false, false)
FunctionPass *llvm::createGEPSplitterPass() {
return new GEPSplitter();
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index c62ce1f..a0123f5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -17,39 +17,30 @@
#define DEBUG_TYPE "gvn"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
-#include "llvm/Operator.h"
-#include "llvm/Value.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PHITransAddr.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
using namespace llvm;
STATISTIC(NumGVNInstr, "Number of instructions deleted");
@@ -61,7 +52,6 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd");
static cl::opt<bool> EnablePRE("enable-pre",
cl::init(true), cl::Hidden);
static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
-static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false));
//===----------------------------------------------------------------------===//
// ValueTable Class
@@ -72,76 +62,23 @@ static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false));
/// two values.
namespace {
struct Expression {
- enum ExpressionOpcode {
- ADD = Instruction::Add,
- FADD = Instruction::FAdd,
- SUB = Instruction::Sub,
- FSUB = Instruction::FSub,
- MUL = Instruction::Mul,
- FMUL = Instruction::FMul,
- UDIV = Instruction::UDiv,
- SDIV = Instruction::SDiv,
- FDIV = Instruction::FDiv,
- UREM = Instruction::URem,
- SREM = Instruction::SRem,
- FREM = Instruction::FRem,
- SHL = Instruction::Shl,
- LSHR = Instruction::LShr,
- ASHR = Instruction::AShr,
- AND = Instruction::And,
- OR = Instruction::Or,
- XOR = Instruction::Xor,
- TRUNC = Instruction::Trunc,
- ZEXT = Instruction::ZExt,
- SEXT = Instruction::SExt,
- FPTOUI = Instruction::FPToUI,
- FPTOSI = Instruction::FPToSI,
- UITOFP = Instruction::UIToFP,
- SITOFP = Instruction::SIToFP,
- FPTRUNC = Instruction::FPTrunc,
- FPEXT = Instruction::FPExt,
- PTRTOINT = Instruction::PtrToInt,
- INTTOPTR = Instruction::IntToPtr,
- BITCAST = Instruction::BitCast,
- ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
- ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
- FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
- FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
- FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
- SHUFFLE, SELECT, GEP, CALL, CONSTANT,
- INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
-
- ExpressionOpcode opcode;
+ uint32_t opcode;
const Type* type;
SmallVector<uint32_t, 4> varargs;
- Value *function;
Expression() { }
- Expression(ExpressionOpcode o) : opcode(o) { }
+ Expression(uint32_t o) : opcode(o) { }
bool operator==(const Expression &other) const {
if (opcode != other.opcode)
return false;
- else if (opcode == EMPTY || opcode == TOMBSTONE)
+ else if (opcode == ~0U || opcode == ~1U)
return true;
else if (type != other.type)
return false;
- else if (function != other.function)
+ else if (varargs != other.varargs)
return false;
- else {
- if (varargs.size() != other.varargs.size())
- return false;
-
- for (size_t i = 0; i < varargs.size(); ++i)
- if (varargs[i] != other.varargs[i])
- return false;
-
- return true;
- }
- }
-
- bool operator!=(const Expression &other) const {
- return !(*this == other);
+ return true;
}
};
@@ -155,19 +92,7 @@ namespace {
uint32_t nextValueNumber;
- Expression::ExpressionOpcode getOpcode(CmpInst* C);
- Expression create_expression(BinaryOperator* BO);
- Expression create_expression(CmpInst* C);
- Expression create_expression(ShuffleVectorInst* V);
- Expression create_expression(ExtractElementInst* C);
- Expression create_expression(InsertElementInst* V);
- Expression create_expression(SelectInst* V);
- Expression create_expression(CastInst* C);
- Expression create_expression(GetElementPtrInst* G);
- Expression create_expression(CallInst* C);
- Expression create_expression(ExtractValueInst* C);
- Expression create_expression(InsertValueInst* C);
-
+ Expression create_expression(Instruction* I);
uint32_t lookup_or_add_call(CallInst* C);
public:
ValueTable() : nextValueNumber(1) { }
@@ -176,7 +101,6 @@ namespace {
void add(Value *V, uint32_t num);
void clear();
void erase(Value *v);
- unsigned size();
void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
AliasAnalysis *getAliasAnalysis() const { return AA; }
void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
@@ -189,11 +113,11 @@ namespace {
namespace llvm {
template <> struct DenseMapInfo<Expression> {
static inline Expression getEmptyKey() {
- return Expression(Expression::EMPTY);
+ return ~0U;
}
static inline Expression getTombstoneKey() {
- return Expression(Expression::TOMBSTONE);
+ return ~1U;
}
static unsigned getHashValue(const Expression e) {
@@ -205,20 +129,13 @@ template <> struct DenseMapInfo<Expression> {
for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
E = e.varargs.end(); I != E; ++I)
hash = *I + hash * 37;
-
- hash = ((unsigned)((uintptr_t)e.function >> 4) ^
- (unsigned)((uintptr_t)e.function >> 9)) +
- hash * 37;
-
+
return hash;
}
static bool isEqual(const Expression &LHS, const Expression &RHS) {
return LHS == RHS;
}
};
-
-template <>
-struct isPodLike<Expression> { static const bool value = true; };
}
@@ -226,185 +143,27 @@ struct isPodLike<Expression> { static const bool value = true; };
// ValueTable Internal Functions
//===----------------------------------------------------------------------===//
-Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
- if (isa<ICmpInst>(C)) {
- switch (C->getPredicate()) {
- default: // THIS SHOULD NEVER HAPPEN
- llvm_unreachable("Comparison with unknown predicate?");
- case ICmpInst::ICMP_EQ: return Expression::ICMPEQ;
- case ICmpInst::ICMP_NE: return Expression::ICMPNE;
- case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
- case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
- case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
- case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
- case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
- case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
- case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
- case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
- }
- } else {
- switch (C->getPredicate()) {
- default: // THIS SHOULD NEVER HAPPEN
- llvm_unreachable("Comparison with unknown predicate?");
- case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
- case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
- case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
- case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
- case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
- case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
- case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
- case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
- case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
- case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
- case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
- case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
- case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
- case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
- }
- }
-}
-
-Expression ValueTable::create_expression(CallInst* C) {
- Expression e;
-
- e.type = C->getType();
- e.function = C->getCalledFunction();
- e.opcode = Expression::CALL;
-
- CallSite CS(C);
- for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I)
- e.varargs.push_back(lookup_or_add(*I));
-
- return e;
-}
-
-Expression ValueTable::create_expression(BinaryOperator* BO) {
- Expression e;
- e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
- e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
- e.function = 0;
- e.type = BO->getType();
- e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode());
-
- return e;
-}
-
-Expression ValueTable::create_expression(CmpInst* C) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(C->getOperand(0)));
- e.varargs.push_back(lookup_or_add(C->getOperand(1)));
- e.function = 0;
- e.type = C->getType();
- e.opcode = getOpcode(C);
-
- return e;
-}
-
-Expression ValueTable::create_expression(CastInst* C) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(C->getOperand(0)));
- e.function = 0;
- e.type = C->getType();
- e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode());
-
- return e;
-}
-
-Expression ValueTable::create_expression(ShuffleVectorInst* S) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(S->getOperand(0)));
- e.varargs.push_back(lookup_or_add(S->getOperand(1)));
- e.varargs.push_back(lookup_or_add(S->getOperand(2)));
- e.function = 0;
- e.type = S->getType();
- e.opcode = Expression::SHUFFLE;
-
- return e;
-}
-Expression ValueTable::create_expression(ExtractElementInst* E) {
+Expression ValueTable::create_expression(Instruction *I) {
Expression e;
-
- e.varargs.push_back(lookup_or_add(E->getOperand(0)));
- e.varargs.push_back(lookup_or_add(E->getOperand(1)));
- e.function = 0;
- e.type = E->getType();
- e.opcode = Expression::EXTRACT;
-
- return e;
-}
-
-Expression ValueTable::create_expression(InsertElementInst* I) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(I->getOperand(0)));
- e.varargs.push_back(lookup_or_add(I->getOperand(1)));
- e.varargs.push_back(lookup_or_add(I->getOperand(2)));
- e.function = 0;
e.type = I->getType();
- e.opcode = Expression::INSERT;
-
- return e;
-}
-
-Expression ValueTable::create_expression(SelectInst* I) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(I->getCondition()));
- e.varargs.push_back(lookup_or_add(I->getTrueValue()));
- e.varargs.push_back(lookup_or_add(I->getFalseValue()));
- e.function = 0;
- e.type = I->getType();
- e.opcode = Expression::SELECT;
-
- return e;
-}
-
-Expression ValueTable::create_expression(GetElementPtrInst* G) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
- e.function = 0;
- e.type = G->getType();
- e.opcode = Expression::GEP;
-
- for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
- I != E; ++I)
- e.varargs.push_back(lookup_or_add(*I));
-
- return e;
-}
-
-Expression ValueTable::create_expression(ExtractValueInst* E) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
- for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
- II != IE; ++II)
- e.varargs.push_back(*II);
- e.function = 0;
- e.type = E->getType();
- e.opcode = Expression::EXTRACTVALUE;
-
- return e;
-}
-
-Expression ValueTable::create_expression(InsertValueInst* E) {
- Expression e;
-
- e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
- e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
- for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
- II != IE; ++II)
- e.varargs.push_back(*II);
- e.function = 0;
- e.type = E->getType();
- e.opcode = Expression::INSERTVALUE;
-
+ e.opcode = I->getOpcode();
+ for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ e.varargs.push_back(lookup_or_add(*OI));
+
+ if (CmpInst *C = dyn_cast<CmpInst>(I))
+ e.opcode = (C->getOpcode() << 8) | C->getPredicate();
+ else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) {
+ for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+ } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
+ for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+ II != IE; ++II)
+ e.varargs.push_back(*II);
+ }
+
return e;
}
@@ -563,12 +322,8 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
case Instruction::And:
case Instruction::Or :
case Instruction::Xor:
- exp = create_expression(cast<BinaryOperator>(I));
- break;
case Instruction::ICmp:
case Instruction::FCmp:
- exp = create_expression(cast<CmpInst>(I));
- break;
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -581,28 +336,14 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::BitCast:
- exp = create_expression(cast<CastInst>(I));
- break;
case Instruction::Select:
- exp = create_expression(cast<SelectInst>(I));
- break;
case Instruction::ExtractElement:
- exp = create_expression(cast<ExtractElementInst>(I));
- break;
case Instruction::InsertElement:
- exp = create_expression(cast<InsertElementInst>(I));
- break;
case Instruction::ShuffleVector:
- exp = create_expression(cast<ShuffleVectorInst>(I));
- break;
case Instruction::ExtractValue:
- exp = create_expression(cast<ExtractValueInst>(I));
- break;
case Instruction::InsertValue:
- exp = create_expression(cast<InsertValueInst>(I));
- break;
case Instruction::GetElementPtr:
- exp = create_expression(cast<GetElementPtrInst>(I));
+ exp = create_expression(I);
break;
default:
valueNumbering[V] = nextValueNumber;
@@ -649,30 +390,76 @@ void ValueTable::verifyRemoved(const Value *V) const {
//===----------------------------------------------------------------------===//
namespace {
- struct ValueNumberScope {
- ValueNumberScope* parent;
- DenseMap<uint32_t, Value*> table;
-
- ValueNumberScope(ValueNumberScope* p) : parent(p) { }
- };
-}
-
-namespace {
class GVN : public FunctionPass {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
- : FunctionPass(ID), NoLoads(noloads), MD(0) { }
+ : FunctionPass(ID), NoLoads(noloads), MD(0) {
+ initializeGVNPass(*PassRegistry::getPassRegistry());
+ }
private:
bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
+ const TargetData* TD;
ValueTable VN;
- DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
+
+ /// LeaderTable - A mapping from value numbers to lists of Value*'s that
+ /// have that value number. Use findLeader to query it.
+ struct LeaderTableEntry {
+ Value *Val;
+ BasicBlock *BB;
+ LeaderTableEntry *Next;
+ };
+ DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
+ BumpPtrAllocator TableAllocator;
+
+ /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
+ /// its value number.
+ void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ LeaderTableEntry& Curr = LeaderTable[N];
+ if (!Curr.Val) {
+ Curr.Val = V;
+ Curr.BB = BB;
+ return;
+ }
+
+ LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>();
+ Node->Val = V;
+ Node->BB = BB;
+ Node->Next = Curr.Next;
+ Curr.Next = Node;
+ }
+
+ /// removeFromLeaderTable - Scan the list of values corresponding to a given
+ /// value number, and remove the given value if encountered.
+ void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ LeaderTableEntry* Prev = 0;
+ LeaderTableEntry* Curr = &LeaderTable[N];
+
+ while (Curr->Val != V || Curr->BB != BB) {
+ Prev = Curr;
+ Curr = Curr->Next;
+ }
+
+ if (Prev) {
+ Prev->Next = Curr->Next;
+ } else {
+ if (!Curr->Next) {
+ Curr->Val = 0;
+ Curr->BB = 0;
+ } else {
+ LeaderTableEntry* Next = Curr->Next;
+ Curr->Val = Next->Val;
+ Curr->BB = Next->BB;
+ Curr->Next = Next->Next;
+ }
+ }
+ }
// List of critical edges to be split between iterations.
SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
@@ -699,9 +486,8 @@ namespace {
bool processBlock(BasicBlock *BB);
void dump(DenseMap<uint32_t, Value*>& d);
bool iterateOnFunction(Function &F);
- Value *CollapsePhi(PHINode* p);
bool performPRE(Function& F);
- Value *lookupNumber(BasicBlock *BB, uint32_t num);
+ Value *findLeader(BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
@@ -715,7 +501,11 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
return new GVN(NoLoads);
}
-INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false);
+INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "{\n";
@@ -727,33 +517,6 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "}\n";
}
-static bool isSafeReplacement(PHINode* p, Instruction *inst) {
- if (!isa<PHINode>(inst))
- return true;
-
- for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
- UI != E; ++UI)
- if (PHINode* use_phi = dyn_cast<PHINode>(*UI))
- if (use_phi->getParent() == inst->getParent())
- return false;
-
- return true;
-}
-
-Value *GVN::CollapsePhi(PHINode *PN) {
- Value *ConstVal = PN->hasConstantValue(DT);
- if (!ConstVal) return 0;
-
- Instruction *Inst = dyn_cast<Instruction>(ConstVal);
- if (!Inst)
- return ConstVal;
-
- if (DT->dominates(Inst, PN))
- if (isSafeReplacement(PN, Inst))
- return Inst;
- return 0;
-}
-
/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
@@ -937,47 +700,6 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
}
-/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
-/// be expressed as a base pointer plus a constant offset. Return the base and
-/// offset to the caller.
-static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const TargetData &TD) {
- Operator *PtrOp = dyn_cast<Operator>(Ptr);
- if (PtrOp == 0) return Ptr;
-
- // Just look through bitcasts.
- if (PtrOp->getOpcode() == Instruction::BitCast)
- return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
-
- // If this is a GEP with constant indices, we can look through it.
- GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
- if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
-
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
- ++I, ++GTI) {
- ConstantInt *OpC = cast<ConstantInt>(*I);
- if (OpC->isZero()) continue;
-
- // Handle a struct and array indices which add their offset to the pointer.
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
- Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
- } else {
- uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
- Offset += OpC->getSExtValue()*Size;
- }
- }
-
- // Re-sign extend from the pointer size if needed to get overflow edge cases
- // right.
- unsigned PtrSize = TD.getPointerSizeInBits();
- if (PtrSize < 64)
- Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
-
- return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
-}
-
-
/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
/// memdep query of a load that ends up being a clobbering memory write (store,
/// memset, memcpy, memmove). This means that the write *may* provide bits used
@@ -996,9 +718,8 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
- Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
- Value *LoadBase =
- GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
+ Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
if (StoreBase != LoadBase)
return -1;
@@ -1020,8 +741,6 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
// If the load and store don't overlap at all, the store doesn't provide
// anything to the load. In this case, they really don't alias at all, AA
// must have gotten confused.
- // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
- // remove this check, as it is duplicated with what we have below.
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
if ((WriteSizeInBits & 7) | (LoadSize & 7))
@@ -1067,12 +786,12 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI,
const TargetData &TD) {
// Cannot handle reading from store of first-class aggregate yet.
- if (DepSI->getOperand(0)->getType()->isStructTy() ||
- DepSI->getOperand(0)->getType()->isArrayTy())
+ if (DepSI->getValueOperand()->getType()->isStructTy() ||
+ DepSI->getValueOperand()->getType()->isArrayTy())
return -1;
Value *StorePtr = DepSI->getPointerOperand();
- uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
+ uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
StorePtr, StoreSize, TD);
}
@@ -1099,7 +818,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
Constant *Src = dyn_cast<Constant>(MTI->getSource());
if (Src == 0) return -1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
if (GV == 0 || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
@@ -1331,6 +1050,15 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
if (V->getType()->isPointerTy())
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
AA->copyValue(LI, NewPHIs[i]);
+
+ // Now that we've copied information to the new PHIs, scan through
+ // them again and inform alias analysis that we've added potentially
+ // escaping uses to any values that are operands to these PHIs.
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
+ PHINode *P = NewPHIs[i];
+ for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
+ AA->addEscapingUse(P->getOperandUse(2*ii));
+ }
return V;
}
@@ -1347,8 +1075,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
SmallVectorImpl<Instruction*> &toErase) {
// Find the non-local dependencies of the load.
SmallVector<NonLocalDepResult, 64> Deps;
- MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
- Deps);
+ AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+ MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
//DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
// << Deps.size() << *LI << '\n');
@@ -1376,8 +1104,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
SmallVector<BasicBlock*, 16> UnavailableBlocks;
- const TargetData *TD = 0;
-
for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
@@ -1392,14 +1118,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// read by the load, we can extract the bits we need for the load from the
// stored value.
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
- if (TD == 0)
- TD = getAnalysisIfAvailable<TargetData>();
if (TD && Address) {
int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
DepSI, *TD);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
- DepSI->getOperand(0),
+ DepSI->getValueOperand(),
Offset));
continue;
}
@@ -1409,8 +1133,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
- if (TD == 0)
- TD = getAnalysisIfAvailable<TargetData>();
if (TD && Address) {
int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
DepMI, *TD);
@@ -1440,13 +1162,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
// different types if we have to.
- if (S->getOperand(0)->getType() != LI->getType()) {
- if (TD == 0)
- TD = getAnalysisIfAvailable<TargetData>();
-
+ if (S->getValueOperand()->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0),
+ if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
LI->getType(), *TD)) {
UnavailableBlocks.push_back(DepBB);
continue;
@@ -1454,16 +1173,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
}
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
- S->getOperand(0)));
+ S->getValueOperand()));
continue;
}
if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
// If the types mismatch and we can't handle it, reject reuse of the load.
if (LD->getType() != LI->getType()) {
- if (TD == 0)
- TD = getAnalysisIfAvailable<TargetData>();
-
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
@@ -1533,26 +1249,19 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
return false;
if (Blockers.count(TmpBB))
return false;
+
+ // If any of these blocks has more than one successor (i.e. if the edge we
+ // just traversed was critical), then there are other paths through this
+ // block along which the load may not be anticipated. Hoisting the load
+ // above this block would be adding the load to execution paths along
+ // which it was not previously executed.
if (TmpBB->getTerminator()->getNumSuccessors() != 1)
- allSingleSucc = false;
+ return false;
}
assert(TmpBB);
LoadBB = TmpBB;
- // If we have a repl set with LI itself in it, this means we have a loop where
- // at least one of the values is LI. Since this means that we won't be able
- // to eliminate LI even if we insert uses in the other predecessors, we will
- // end up increasing code size. Reject this by scanning for LI.
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
- if (ValuesPerBlock[i].isSimpleValue() &&
- ValuesPerBlock[i].getSimpleValue() == LI) {
- // Skip cases where LI is the only definition, even for EnableFullLoadPRE.
- if (!EnableFullLoadPRE || e == 1)
- return false;
- }
- }
-
// FIXME: It is extremely unclear what this loop is doing, other than
// artificially restricting loadpre.
if (isSinglePred) {
@@ -1612,14 +1321,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
unsigned NumUnavailablePreds = PredLoads.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should be eliminated above!");
- if (!EnableFullLoadPRE) {
- // If this load is unavailable in multiple predecessors, reject it.
- // FIXME: If we could restructure the CFG, we could make a common pred with
- // all the preds that don't have an available LI and insert a new load into
- // that one block.
- if (NumUnavailablePreds != 1)
+
+ // If this load is unavailable in multiple predecessors, reject it.
+ // FIXME: If we could restructure the CFG, we could make a common pred with
+ // all the preds that don't have an available LI and insert a new load into
+ // that one block.
+ if (NumUnavailablePreds != 1)
return false;
- }
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
@@ -1634,7 +1342,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// If all preds have a single successor, then we know it is safe to insert
// the load on the pred (?!?), so we can insert code to materialize the
// pointer if it is not available.
- PHITransAddr Address(LI->getOperand(0), TD);
+ PHITransAddr Address(LI->getPointerOperand(), TD);
Value *LoadPtr = 0;
if (allSingleSucc) {
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
@@ -1648,7 +1356,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// we fail PRE.
if (LoadPtr == 0) {
DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
- << *LI->getOperand(0) << "\n");
+ << *LI->getPointerOperand() << "\n");
CanDoPRE = false;
break;
}
@@ -1657,8 +1365,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// @1 = getelementptr (i8* p, ...
// test p and branch if == 0
// load @1
- // It is valid to have the getelementptr before the test, even if p can be 0,
- // as getelementptr only does address arithmetic.
+ // It is valid to have the getelementptr before the test, even if p can
+ // be 0, as getelementptr only does address arithmetic.
// If we are not pushing the value through any multiple-successor blocks
// we do not have this case. Otherwise, check that the load is safe to
// put anywhere; this can be improved, but should be conservatively safe.
@@ -1675,8 +1383,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
}
if (!CanDoPRE) {
- while (!NewInsts.empty())
- NewInsts.pop_back_val()->eraseFromParent();
+ while (!NewInsts.empty()) {
+ Instruction *I = NewInsts.pop_back_val();
+ if (MD) MD->removeInstruction(I);
+ I->eraseFromParent();
+ }
return false;
}
@@ -1702,9 +1413,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
BasicBlock *UnavailablePred = I->first;
Value *LoadPtr = I->second;
- Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
- LI->getAlignment(),
- UnavailablePred->getTerminator());
+ Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
+ LI->getAlignment(),
+ UnavailablePred->getTerminator());
+
+ // Transfer the old load's TBAA tag to the new load.
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
+ NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
// Add the newly created load.
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
@@ -1753,19 +1468,19 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// access code.
Value *AvailVal = 0;
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
- if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+ if (TD) {
int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
L->getPointerOperand(),
DepSI, *TD);
if (Offset != -1)
- AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
+ AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
L->getType(), L, *TD);
}
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
// a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
- if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+ if (TD) {
int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
L->getPointerOperand(),
DepMI, *TD);
@@ -1804,14 +1519,13 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
- Value *StoredVal = DepSI->getOperand(0);
+ Value *StoredVal = DepSI->getValueOperand();
// The store and load are to a must-aliased pointer, but they may not
// actually have the same type. See if we know how to reuse the stored
// value (depending on its type).
- const TargetData *TD = 0;
if (StoredVal->getType() != L->getType()) {
- if ((TD = getAnalysisIfAvailable<TargetData>())) {
+ if (TD) {
StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
L, *TD);
if (StoredVal == 0)
@@ -1840,9 +1554,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// The loads are of a must-aliased pointer, but they may not actually have
// the same type. See if we know how to reuse the previously loaded value
// (depending on its type).
- const TargetData *TD = 0;
if (DepLI->getType() != L->getType()) {
- if ((TD = getAnalysisIfAvailable<TargetData>())) {
+ if (TD) {
AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
if (AvailableVal == 0)
return false;
@@ -1890,20 +1603,32 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
return false;
}
-Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
- DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
- if (I == localAvail.end())
- return 0;
-
- ValueNumberScope *Locals = I->second;
- while (Locals) {
- DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
- if (I != Locals->table.end())
- return I->second;
- Locals = Locals->parent;
+// findLeader - In order to find a leader for a given value number at a
+// specific basic block, we first obtain the list of all Values for that number,
+// and then scan the list to find one whose block dominates the block in
+// question. This is fast because dominator tree queries consist of only
+// a few comparisons of DFS numbers.
+Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
+ LeaderTableEntry Vals = LeaderTable[num];
+ if (!Vals.Val) return 0;
+
+ Value *Val = 0;
+ if (DT->dominates(Vals.BB, BB)) {
+ Val = Vals.Val;
+ if (isa<Constant>(Val)) return Val;
+ }
+
+ LeaderTableEntry* Next = Vals.Next;
+ while (Next) {
+ if (DT->dominates(Next->BB, BB)) {
+ if (isa<Constant>(Next->Val)) return Next->Val;
+ if (!Val) Val = Next->Val;
+ }
+
+ Next = Next->Next;
}
- return 0;
+ return Val;
}
@@ -1915,85 +1640,92 @@ bool GVN::processInstruction(Instruction *I,
if (isa<DbgInfoIntrinsic>(I))
return false;
+ // If the instruction can be easily simplified then do so now in preference
+ // to value numbering it. Value numbering often exposes redundancies, for
+ // example if it determines that %y is equal to %x then the instruction
+ // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
+ if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ I->replaceAllUsesWith(V);
+ if (MD && V->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ VN.erase(I);
+ toErase.push_back(I);
+ return true;
+ }
+
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
bool Changed = processLoad(LI, toErase);
if (!Changed) {
unsigned Num = VN.lookup_or_add(LI);
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
+ addToLeaderTable(Num, LI, LI->getParent());
}
return Changed;
}
- uint32_t NextNum = VN.getNextUnusedValueNumber();
- unsigned Num = VN.lookup_or_add(I);
-
+ // For conditions branches, we can perform simple conditional propagation on
+ // the condition value itself.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
-
if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
return false;
-
+
Value *BranchCond = BI->getCondition();
uint32_t CondVN = VN.lookup_or_add(BranchCond);
-
+
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
-
+
if (TrueSucc->getSinglePredecessor())
- localAvail[TrueSucc]->table[CondVN] =
- ConstantInt::getTrue(TrueSucc->getContext());
+ addToLeaderTable(CondVN,
+ ConstantInt::getTrue(TrueSucc->getContext()),
+ TrueSucc);
if (FalseSucc->getSinglePredecessor())
- localAvail[FalseSucc]->table[CondVN] =
- ConstantInt::getFalse(TrueSucc->getContext());
-
+ addToLeaderTable(CondVN,
+ ConstantInt::getFalse(TrueSucc->getContext()),
+ FalseSucc);
+
return false;
+ }
+
+ // Instructions with void type don't return a value, so there's
+ // no point in trying to find redudancies in them.
+ if (I->getType()->isVoidTy()) return false;
+
+ uint32_t NextNum = VN.getNextUnusedValueNumber();
+ unsigned Num = VN.lookup_or_add(I);
// Allocations are always uniquely numbered, so we can save time and memory
// by fast failing them.
- } else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+ if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
+ addToLeaderTable(Num, I, I->getParent());
return false;
}
- // Collapse PHI nodes
- if (PHINode* p = dyn_cast<PHINode>(I)) {
- Value *constVal = CollapsePhi(p);
-
- if (constVal) {
- p->replaceAllUsesWith(constVal);
- if (MD && constVal->getType()->isPointerTy())
- MD->invalidateCachedPointerInfo(constVal);
- VN.erase(p);
-
- toErase.push_back(p);
- } else {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
- }
-
// If the number we were assigned was a brand new VN, then we don't
// need to do a lookup to see if the number already exists
// somewhere in the domtree: it can't!
- } else if (Num == NextNum) {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
-
+ if (Num == NextNum) {
+ addToLeaderTable(Num, I, I->getParent());
+ return false;
+ }
+
// Perform fast-path value-number based elimination of values inherited from
// dominators.
- } else if (Value *repl = lookupNumber(I->getParent(), Num)) {
- // Remove it!
- VN.erase(I);
- I->replaceAllUsesWith(repl);
- if (MD && repl->getType()->isPointerTy())
- MD->invalidateCachedPointerInfo(repl);
- toErase.push_back(I);
- return true;
-
- } else {
- localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
+ Value *repl = findLeader(I->getParent(), Num);
+ if (repl == 0) {
+ // Failure, just remember this instance for future use.
+ addToLeaderTable(Num, I, I->getParent());
+ return false;
}
-
- return false;
+
+ // Remove it!
+ VN.erase(I);
+ I->replaceAllUsesWith(repl);
+ if (MD && repl->getType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(repl);
+ toErase.push_back(I);
+ return true;
}
/// runOnFunction - This is the main transformation entry point for a function.
@@ -2001,6 +1733,7 @@ bool GVN::runOnFunction(Function& F) {
if (!NoLoads)
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<TargetData>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
VN.setDomTree(DT);
@@ -2011,8 +1744,8 @@ bool GVN::runOnFunction(Function& F) {
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
- BasicBlock *BB = FI;
- ++FI;
+ BasicBlock *BB = FI++;
+
bool removedBlock = MergeBlockIntoPredecessor(BB, this);
if (removedBlock) ++NumGVNBlocks;
@@ -2020,7 +1753,6 @@ bool GVN::runOnFunction(Function& F) {
}
unsigned Iteration = 0;
-
while (ShouldContinue) {
DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
ShouldContinue = iterateOnFunction(F);
@@ -2138,20 +1870,19 @@ bool GVN::performPRE(Function &F) {
if (P == CurrentBlock) {
NumWithout = 2;
break;
- } else if (!localAvail.count(P)) {
+ } else if (!DT->dominates(&F.getEntryBlock(), P)) {
NumWithout = 2;
break;
}
- DenseMap<uint32_t, Value*>::iterator predV =
- localAvail[P]->table.find(ValNo);
- if (predV == localAvail[P]->table.end()) {
+ Value* predV = findLeader(P, ValNo);
+ if (predV == 0) {
PREPred = P;
++NumWithout;
- } else if (predV->second == CurInst) {
+ } else if (predV == CurInst) {
NumWithout = 2;
} else {
- predMap[P] = predV->second;
+ predMap[P] = predV;
++NumWith;
}
}
@@ -2186,7 +1917,7 @@ bool GVN::performPRE(Function &F) {
if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
continue;
- if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
+ if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
PREInstr->setOperand(i, V);
} else {
success = false;
@@ -2210,7 +1941,7 @@ bool GVN::performPRE(Function &F) {
++NumGVNPRE;
// Update the availability map to include the new instruction.
- localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
+ addToLeaderTable(ValNo, PREInstr, PREPred);
// Create a PHI to make the value available in this block.
PHINode* Phi = PHINode::Create(CurInst->getType(),
@@ -2223,12 +1954,21 @@ bool GVN::performPRE(Function &F) {
}
VN.add(Phi, ValNo);
- localAvail[CurrentBlock]->table[ValNo] = Phi;
+ addToLeaderTable(ValNo, Phi, CurrentBlock);
CurInst->replaceAllUsesWith(Phi);
- if (MD && Phi->getType()->isPointerTy())
- MD->invalidateCachedPointerInfo(Phi);
+ if (Phi->getType()->isPointerTy()) {
+ // Because we have added a PHI-use of the pointer value, it has now
+ // "escaped" from alias analysis' perspective. We need to inform
+ // AA of this.
+ for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii)
+ VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii));
+
+ if (MD)
+ MD->invalidateCachedPointerInfo(Phi);
+ }
VN.erase(CurInst);
+ removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
if (MD) MD->removeInstruction(CurInst);
@@ -2260,16 +2000,7 @@ bool GVN::splitCriticalEdges() {
/// iterateOnFunction - Executes one iteration of GVN
bool GVN::iterateOnFunction(Function &F) {
cleanupGlobalSets();
-
- for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
- DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
- if (DI->getIDom())
- localAvail[DI->getBlock()] =
- new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]);
- else
- localAvail[DI->getBlock()] = new ValueNumberScope(0);
- }
-
+
// Top-down walk of the dominator tree
bool Changed = false;
#if 0
@@ -2289,11 +2020,8 @@ bool GVN::iterateOnFunction(Function &F) {
void GVN::cleanupGlobalSets() {
VN.clear();
-
- for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
- I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
- delete I->second;
- localAvail.clear();
+ LeaderTable.clear();
+ TableAllocator.Reset();
}
/// verifyRemoved - Verify that the specified instruction does not occur in our
@@ -2303,17 +2031,14 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
// Walk through the value number scope to make sure the instruction isn't
// ferreted away in it.
- for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
- I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
- const ValueNumberScope *VNS = I->second;
-
- while (VNS) {
- for (DenseMap<uint32_t, Value*>::const_iterator
- II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
- assert(II->second != Inst && "Inst still in value numbering scope!");
- }
-
- VNS = VNS->parent;
+ for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator
+ I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
+ const LeaderTableEntry *Node = &I->second;
+ assert(Node->Val != Inst && "Inst still in value numbering scope!");
+
+ while (Node->Next) {
+ Node = Node->Next;
+ assert(Node->Val != Inst && "Inst still in value numbering scope!");
}
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index af2eafc..0fb6798 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -77,7 +77,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID) {}
+ IndVarSimplify() : LoopPass(ID) {
+ initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -117,8 +119,16 @@ namespace {
}
char IndVarSimplify::ID = 0;
-INITIALIZE_PASS(IndVarSimplify, "indvars",
- "Canonicalize Induction Variables", false, false);
+INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
+ "Canonicalize Induction Variables", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_END(IndVarSimplify, "indvars",
+ "Canonicalize Induction Variables", false, false)
Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
@@ -190,7 +200,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
}
// Expand the code for the iteration count.
- assert(RHS->isLoopInvariant(L) &&
+ assert(SE->isLoopInvariant(RHS, L) &&
"Computed iteration count is not loop invariant!");
Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
@@ -233,8 +243,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
/// happen later, except that it's more powerful in some cases, because it's
/// able to brute-force evaluate arbitrary instructions as long as they have
/// constant operands at the beginning of the loop.
-void IndVarSimplify::RewriteLoopExitValues(Loop *L,
- SCEVExpander &Rewriter) {
+void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// Verify the input to the pass in already in LCSSA form.
assert(L->isLCSSAForm(*DT));
@@ -292,7 +301,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
// and varies predictably *inside* the loop. Evaluate the value it
// contains when the loop exits, if possible.
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
- if (!ExitValue->isLoopInvariant(L))
+ if (!SE->isLoopInvariant(ExitValue, L))
continue;
Changed = true;
@@ -338,7 +347,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
// If there are, change them into integer recurrences, permitting analysis by
// the SCEV routines.
//
- BasicBlock *Header = L->getHeader();
+ BasicBlock *Header = L->getHeader();
SmallVector<WeakVH, 8> PHIs;
for (BasicBlock::iterator I = Header->begin();
@@ -346,7 +355,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
PHIs.push_back(PN);
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
- if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i]))
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
HandleFloatingPointIV(L, PN);
// If the loop previously had floating-point IV, ScalarEvolution
@@ -395,7 +404,7 @@ void IndVarSimplify::EliminateIVComparisons() {
// which are now dead.
while (!DeadInsts.empty())
if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
@@ -462,7 +471,7 @@ void IndVarSimplify::EliminateIVRemainders() {
// which are now dead.
while (!DeadInsts.empty())
if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
@@ -607,9 +616,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// currently can only reduce affine polynomials. For now just disable
// indvar subst on anything more complex than an affine addrec, unless
// it can be expanded to a trivial value.
-static bool isSafe(const SCEV *S, const Loop *L) {
+static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
// Loop-invariant values are safe.
- if (S->isLoopInvariant(L)) return true;
+ if (SE->isLoopInvariant(S, L)) return true;
// Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
// to transform them into efficient code.
@@ -620,18 +629,18 @@ static bool isSafe(const SCEV *S, const Loop *L) {
if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
E = Commutative->op_end(); I != E; ++I)
- if (!isSafe(*I, L)) return false;
+ if (!isSafe(*I, L, SE)) return false;
return true;
}
// A cast is safe if its operand is.
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
- return isSafe(C->getOperand(), L);
+ return isSafe(C->getOperand(), L, SE);
// A udiv is safe if its operands are.
if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
- return isSafe(UD->getLHS(), L) &&
- isSafe(UD->getRHS(), L);
+ return isSafe(UD->getLHS(), L, SE) &&
+ isSafe(UD->getRHS(), L, SE);
// SCEVUnknown is always safe.
if (isa<SCEVUnknown>(S))
@@ -662,7 +671,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// Evaluate the expression out of the loop, if possible.
if (!L->contains(UI->getUser())) {
const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
- if (ExitVal->isLoopInvariant(L))
+ if (SE->isLoopInvariant(ExitVal, L))
AR = ExitVal;
}
@@ -672,7 +681,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// currently can only reduce affine polynomials. For now just disable
// indvar subst on anything more complex than an affine addrec, unless
// it can be expanded to a trivial value.
- if (!isSafe(AR, L))
+ if (!isSafe(AR, L, SE))
continue;
// Determine the insertion point for this user. By default, insert
@@ -725,7 +734,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
// which are now dead.
while (!DeadInsts.empty())
if (Instruction *Inst =
- dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
+ dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 104d5ae..90094a8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -40,20 +40,22 @@ STATISTIC(NumFolds, "Number of terminators folded");
STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
static cl::opt<unsigned>
-Threshold("jump-threading-threshold",
+Threshold("jump-threading-threshold",
cl::desc("Max block size to duplicate for jump threading"),
cl::init(6), cl::Hidden);
-// Turn on use of LazyValueInfo.
-static cl::opt<bool>
-EnableLVI("enable-jump-threading-lvi",
- cl::desc("Use LVI for jump threading"),
- cl::init(true),
- cl::ReallyHidden);
-
-
-
namespace {
+ // These are at global scope so static functions can use them too.
+ typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
+ typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy;
+
+ // This is used to keep track of what kind of constant we're currently hoping
+ // to find.
+ enum ConstantPreference {
+ WantInteger,
+ WantBlockAddress
+ };
+
/// This pass performs 'jump threading', which looks at blocks that have
/// multiple predecessors and multiple successors. If one or more of the
/// predecessors of the block can be proven to always jump to one of the
@@ -79,61 +81,59 @@ namespace {
SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
#endif
DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
-
+
// RAII helper for updating the recursion stack.
struct RecursionSetRemover {
DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
std::pair<Value*, BasicBlock*> ThePair;
-
+
RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
std::pair<Value*, BasicBlock*> P)
: TheSet(S), ThePair(P) { }
-
+
~RecursionSetRemover() {
TheSet.erase(ThePair);
}
};
public:
static char ID; // Pass identification
- JumpThreading() : FunctionPass(ID) {}
+ JumpThreading() : FunctionPass(ID) {
+ initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- if (EnableLVI) {
- AU.addRequired<LazyValueInfo>();
- AU.addPreserved<LazyValueInfo>();
- }
+ AU.addRequired<LazyValueInfo>();
+ AU.addPreserved<LazyValueInfo>();
}
-
+
void FindLoopHeaders(Function &F);
bool ProcessBlock(BasicBlock *BB);
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB);
bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
const SmallVectorImpl<BasicBlock *> &PredBBs);
-
- typedef SmallVectorImpl<std::pair<ConstantInt*,
- BasicBlock*> > PredValueInfo;
-
+
bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
- PredValueInfo &Result);
- bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB);
-
-
- bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
- bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
+ PredValueInfo &Result,
+ ConstantPreference Preference);
+ bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference);
bool ProcessBranchOnPHI(PHINode *PN);
bool ProcessBranchOnXOR(BinaryOperator *BO);
-
+
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
};
}
char JumpThreading::ID = 0;
-INITIALIZE_PASS(JumpThreading, "jump-threading",
- "Jump Threading", false, false);
+INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
+ "Jump Threading", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(JumpThreading, "jump-threading",
+ "Jump Threading", false, false)
// Public interface to the Jump Threading pass
FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
@@ -143,21 +143,21 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
- LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
-
+ LVI = &getAnalysis<LazyValueInfo>();
+
FindLoopHeaders(F);
-
+
bool Changed, EverChanged = false;
do {
Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
BasicBlock *BB = I;
- // Thread all of the branches we can over this block.
+ // Thread all of the branches we can over this block.
while (ProcessBlock(BB))
Changed = true;
-
+
++I;
-
+
// If the block is trivially dead, zap it. This eliminates the successor
// edges which simplifies the CFG.
if (pred_begin(BB) == pred_end(BB) &&
@@ -165,48 +165,46 @@ bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
<< "' with terminator: " << *BB->getTerminator() << '\n');
LoopHeaders.erase(BB);
- if (LVI) LVI->eraseBlock(BB);
+ LVI->eraseBlock(BB);
DeleteDeadBlock(BB);
Changed = true;
- } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- // Can't thread an unconditional jump, but if the block is "almost
- // empty", we can replace uses of it with uses of the successor and make
- // this dead.
- if (BI->isUnconditional() &&
- BB != &BB->getParent()->getEntryBlock()) {
- BasicBlock::iterator BBI = BB->getFirstNonPHI();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
+ continue;
+ }
+
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+
+ // Can't thread an unconditional jump, but if the block is "almost
+ // empty", we can replace uses of it with uses of the successor and make
+ // this dead.
+ if (BI && BI->isUnconditional() &&
+ BB != &BB->getParent()->getEntryBlock() &&
// If the terminator is the only non-phi instruction, try to nuke it.
- if (BBI->isTerminator()) {
- // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
- // block, we have to make sure it isn't in the LoopHeaders set. We
- // reinsert afterward if needed.
- bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
- BasicBlock *Succ = BI->getSuccessor(0);
-
- // FIXME: It is always conservatively correct to drop the info
- // for a block even if it doesn't get erased. This isn't totally
- // awesome, but it allows us to use AssertingVH to prevent nasty
- // dangling pointer issues within LazyValueInfo.
- if (LVI) LVI->eraseBlock(BB);
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
- Changed = true;
- // If we deleted BB and BB was the header of a loop, then the
- // successor is now the header of the loop.
- BB = Succ;
- }
-
- if (ErasedFromLoopHeaders)
- LoopHeaders.insert(BB);
- }
+ BB->getFirstNonPHIOrDbg()->isTerminator()) {
+ // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+ // block, we have to make sure it isn't in the LoopHeaders set. We
+ // reinsert afterward if needed.
+ bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+ BasicBlock *Succ = BI->getSuccessor(0);
+
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ LVI->eraseBlock(BB);
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+ Changed = true;
+ // If we deleted BB and BB was the header of a loop, then the
+ // successor is now the header of the loop.
+ BB = Succ;
}
+
+ if (ErasedFromLoopHeaders)
+ LoopHeaders.insert(BB);
}
}
EverChanged |= Changed;
} while (Changed);
-
+
LoopHeaders.clear();
return EverChanged;
}
@@ -216,25 +214,25 @@ bool JumpThreading::runOnFunction(Function &F) {
static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
/// Ignore PHI nodes, these will be flattened when duplication happens.
BasicBlock::const_iterator I = BB->getFirstNonPHI();
-
+
// FIXME: THREADING will delete values that are just used to compute the
// branch, so they shouldn't count against the duplication cost.
-
-
+
+
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
for (; !isa<TerminatorInst>(I); ++I) {
// Debugger intrinsics don't incur code size.
if (isa<DbgInfoIntrinsic>(I)) continue;
-
+
// If this is a pointer->pointer bitcast, it is free.
if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
continue;
-
+
// All other instructions count for at least one unit.
++Size;
-
+
// Calls are more expensive. If they are non-intrinsic calls, we model them
// as having cost of 4. If they are a non-vector intrinsic, we model them
// as having cost of 2 total, and if they are a vector intrinsic, we model
@@ -246,12 +244,16 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
Size += 1;
}
}
-
+
// Threading through a switch statement is particularly profitable. If this
// block ends in a switch, decrease its cost to make it more likely to happen.
if (isa<SwitchInst>(I))
Size = Size > 6 ? Size-6 : 0;
-
+
+ // The same holds for indirect branches, but slightly more so.
+ if (isa<IndirectBrInst>(I))
+ Size = Size > 8 ? Size-8 : 0;
+
return Size;
}
@@ -273,57 +275,64 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
void JumpThreading::FindLoopHeaders(Function &F) {
SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
FindFunctionBackedges(F, Edges);
-
+
for (unsigned i = 0, e = Edges.size(); i != e; ++i)
LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
}
-// Helper method for ComputeValueKnownInPredecessors. If Value is a
-// ConstantInt, push it. If it's an undef, push 0. Otherwise, do nothing.
-static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*,
- BasicBlock*> > &Result,
- Constant *Value, BasicBlock* BB){
- if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value))
- Result.push_back(std::make_pair(FoldedCInt, BB));
- else if (isa<UndefValue>(Value))
- Result.push_back(std::make_pair((ConstantInt*)0, BB));
+/// getKnownConstant - Helper method to determine if we can thread over a
+/// terminator with the given value as its condition, and if so what value to
+/// use for that. What kind of value this is depends on whether we want an
+/// integer or a block address, but an undef is always accepted.
+/// Returns null if Val is null or not an appropriate constant.
+static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
+ if (!Val)
+ return 0;
+
+ // Undef is "known" enough.
+ if (UndefValue *U = dyn_cast<UndefValue>(Val))
+ return U;
+
+ if (Preference == WantBlockAddress)
+ return dyn_cast<BlockAddress>(Val->stripPointerCasts());
+
+ return dyn_cast<ConstantInt>(Val);
}
/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
-/// if we can infer that the value is a known ConstantInt in any of our
-/// predecessors. If so, return the known list of value and pred BB in the
-/// result vector. If a value is known to be undef, it is returned as null.
+/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
+/// in any of our predecessors. If so, return the known list of value and pred
+/// BB in the result vector.
///
/// This returns true if there were any known values.
///
bool JumpThreading::
-ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
+ ConstantPreference Preference) {
// This method walks up use-def chains recursively. Because of this, we could
// get into an infinite loop going around loops in the use-def chain. To
// prevent this, keep track of what (value, block) pairs we've already visited
// and terminate the search if we loop back to them
if (!RecursionSet.insert(std::make_pair(V, BB)).second)
return false;
-
+
// An RAII help to remove this pair from the recursion set once the recursion
// stack pops back out again.
RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
-
- // If V is a constantint, then it is known in all predecessors.
- if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
- ConstantInt *CI = dyn_cast<ConstantInt>(V);
-
+
+ // If V is a constant, then it is known in all predecessors.
+ if (Constant *KC = getKnownConstant(V, Preference)) {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(CI, *PI));
-
+ Result.push_back(std::make_pair(KC, *PI));
+
return true;
}
-
+
// If V is a non-instruction value, or an instruction in a different block,
// then it can't be derived from a PHI.
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0 || I->getParent() != BB) {
-
+
// Okay, if this is a live-in value, see if it has a known value at the end
// of any of our predecessors.
//
@@ -331,82 +340,78 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
/// TODO: Per PR2563, we could infer value range information about a
/// predecessor based on its terminator.
//
- if (LVI) {
- // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
- // "I" is a non-local compare-with-a-constant instruction. This would be
- // able to handle value inequalities better, for example if the compare is
- // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
- // Perhaps getConstantOnEdge should be smart enough to do this?
-
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
- // If the value is known by LazyValueInfo to be a constant in a
- // predecessor, use that information to try to thread this block.
- Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
- if (PredCst == 0 ||
- (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
- continue;
-
- Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P));
- }
-
- return !Result.empty();
+ // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+ // "I" is a non-local compare-with-a-constant instruction. This would be
+ // able to handle value inequalities better, for example if the compare is
+ // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+ // Perhaps getConstantOnEdge should be smart enough to do this?
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
+ if (Constant *KC = getKnownConstant(PredCst, Preference))
+ Result.push_back(std::make_pair(KC, P));
}
-
- return false;
+
+ return !Result.empty();
}
-
+
/// If I is a PHI node, then we know the incoming values for any constants.
if (PHINode *PN = dyn_cast<PHINode>(I)) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *InVal = PN->getIncomingValue(i);
- if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
- ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
- Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
- } else if (LVI) {
+ if (Constant *KC = getKnownConstant(InVal, Preference)) {
+ Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+ } else {
Constant *CI = LVI->getConstantOnEdge(InVal,
PN->getIncomingBlock(i), BB);
- // LVI returns null is no value could be determined.
- if (!CI) continue;
- PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i));
+ if (Constant *KC = getKnownConstant(CI, Preference))
+ Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
}
}
-
+
return !Result.empty();
}
-
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals, RHSVals;
+
+ PredValueInfoTy LHSVals, RHSVals;
// Handle some boolean conditions.
- if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ assert(Preference == WantInteger && "One-bit non-integer type?");
// X | true -> true
// X & false -> false
if (I->getOpcode() == Instruction::Or ||
I->getOpcode() == Instruction::And) {
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
- ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals);
-
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+ WantInteger);
+ ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
+ WantInteger);
+
if (LHSVals.empty() && RHSVals.empty())
return false;
-
+
ConstantInt *InterestingVal;
if (I->getOpcode() == Instruction::Or)
InterestingVal = ConstantInt::getTrue(I->getContext());
else
InterestingVal = ConstantInt::getFalse(I->getContext());
-
+
SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
-
+
// Scan for the sentinel. If we find an undef, force it to the
// interesting value: x|undef -> true and x&undef -> false.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
- if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
+ if (LHSVals[i].first == InterestingVal ||
+ isa<UndefValue>(LHSVals[i].first)) {
Result.push_back(LHSVals[i]);
Result.back().first = InterestingVal;
LHSKnownBBs.insert(LHSVals[i].second);
}
for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
- if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
+ if (RHSVals[i].first == InterestingVal ||
+ isa<UndefValue>(RHSVals[i].first)) {
// If we already inferred a value for this block on the LHS, don't
// re-add it.
if (!LHSKnownBBs.count(RHSVals[i].second)) {
@@ -414,48 +419,51 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
Result.back().first = InterestingVal;
}
}
-
+
return !Result.empty();
}
-
+
// Handle the NOT form of XOR.
if (I->getOpcode() == Instruction::Xor &&
isa<ConstantInt>(I->getOperand(1)) &&
cast<ConstantInt>(I->getOperand(1))->isOne()) {
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result);
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
+ WantInteger);
if (Result.empty())
return false;
// Invert the known values.
for (unsigned i = 0, e = Result.size(); i != e; ++i)
- if (Result[i].first)
- Result[i].first =
- cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
-
+ Result[i].first = ConstantExpr::getNot(Result[i].first);
+
return true;
}
-
+
// Try to simplify some other binary operator values.
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ assert(Preference != WantBlockAddress
+ && "A binary operator creating a block address?");
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
- ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
-
+ PredValueInfoTy LHSVals;
+ ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
+ WantInteger);
+
// Try to use constant folding to simplify the binary operator.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
- Constant *V = LHSVals[i].first ? LHSVals[i].first :
- cast<Constant>(UndefValue::get(BO->getType()));
+ Constant *V = LHSVals[i].first;
Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
-
- PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+
+ if (Constant *KC = getKnownConstant(Folded, WantInteger))
+ Result.push_back(std::make_pair(KC, LHSVals[i].second));
}
}
-
+
return !Result.empty();
}
-
+
// Handle compare with phi operand, where the PHI is defined in this block.
if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ assert(Preference == WantInteger && "Compares only produce integers");
PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
if (PN && PN->getParent() == BB) {
// We can do this simplification if any comparisons fold to true or false.
@@ -464,32 +472,31 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
BasicBlock *PredBB = PN->getIncomingBlock(i);
Value *LHS = PN->getIncomingValue(i);
Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
-
+
Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
if (Res == 0) {
- if (!LVI || !isa<Constant>(RHS))
+ if (!isa<Constant>(RHS))
continue;
-
- LazyValueInfo::Tristate
+
+ LazyValueInfo::Tristate
ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
cast<Constant>(RHS), PredBB, BB);
if (ResT == LazyValueInfo::Unknown)
continue;
Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
}
-
- if (Constant *ConstRes = dyn_cast<Constant>(Res))
- PushConstantIntOrUndef(Result, ConstRes, PredBB);
+
+ if (Constant *KC = getKnownConstant(Res, WantInteger))
+ Result.push_back(std::make_pair(KC, PredBB));
}
-
+
return !Result.empty();
}
-
-
+
+
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
- if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
- Cmp->getType()->isIntegerTy()) {
+ if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) {
if (!isa<Instruction>(Cmp->getOperand(0)) ||
cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
@@ -505,44 +512,74 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
continue;
Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
- Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+ Result.push_back(std::make_pair(ResC, P));
}
return !Result.empty();
}
-
+
// Try to find a constant value for the LHS of a comparison,
// and evaluate it statically if we can.
if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
- ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
-
+ PredValueInfoTy LHSVals;
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+ WantInteger);
+
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
- Constant *V = LHSVals[i].first ? LHSVals[i].first :
- cast<Constant>(UndefValue::get(CmpConst->getType()));
+ Constant *V = LHSVals[i].first;
Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
V, CmpConst);
- PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ if (Constant *KC = getKnownConstant(Folded, WantInteger))
+ Result.push_back(std::make_pair(KC, LHSVals[i].second));
}
-
+
return !Result.empty();
}
}
}
-
- if (LVI) {
- // If all else fails, see if LVI can figure out a constant value for us.
- Constant *CI = LVI->getConstant(V, BB);
- ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI);
- if (CInt) {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(CInt, *PI));
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ // Handle select instructions where at least one operand is a known constant
+ // and we can figure out the condition value for any predecessor block.
+ Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
+ Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
+ PredValueInfoTy Conds;
+ if ((TrueVal || FalseVal) &&
+ ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
+ WantInteger)) {
+ for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
+ Constant *Cond = Conds[i].first;
+
+ // Figure out what value to use for the condition.
+ bool KnownCond;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
+ // A known boolean.
+ KnownCond = CI->isOne();
+ } else {
+ assert(isa<UndefValue>(Cond) && "Unexpected condition value");
+ // Either operand will do, so be sure to pick the one that's a known
+ // constant.
+ // FIXME: Do this more cleverly if both values are known constants?
+ KnownCond = (TrueVal != 0);
+ }
+
+ // See if the select has a known constant value for this predecessor.
+ if (Constant *Val = KnownCond ? TrueVal : FalseVal)
+ Result.push_back(std::make_pair(Val, Conds[i].second));
+ }
+
+ return !Result.empty();
}
-
- return !Result.empty();
}
-
- return false;
+
+ // If all else fails, see if LVI can figure out a constant value for us.
+ Constant *CI = LVI->getConstant(V, BB);
+ if (Constant *KC = getKnownConstant(CI, Preference)) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(KC, *PI));
+ }
+
+ return !Result.empty();
}
@@ -565,10 +602,20 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
if (NumPreds < MinNumPreds)
MinSucc = i;
}
-
+
return MinSucc;
}
+static bool hasAddressTakenAndUsed(BasicBlock *BB) {
+ if (!BB->hasAddressTaken()) return false;
+
+ // If the block has its address taken, it may be a tree of dead constants
+ // hanging off of it. These shouldn't keep the block alive.
+ BlockAddress *BA = BlockAddress::get(BB);
+ BA->removeDeadConstantUsers();
+ return !BA->use_empty();
+}
+
/// ProcessBlock - If there are any predecessors whose control can be threaded
/// through to a successor, transform them now.
bool JumpThreading::ProcessBlock(BasicBlock *BB) {
@@ -577,167 +624,122 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
if (pred_begin(BB) == pred_end(BB) &&
BB != &BB->getParent()->getEntryBlock())
return false;
-
+
// If this block has a single predecessor, and if that pred has a single
// successor, merge the blocks. This encourages recursive jump threading
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
- SinglePred != BB) {
+ SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
// If SinglePred was a loop header, BB becomes one.
if (LoopHeaders.erase(SinglePred))
LoopHeaders.insert(BB);
-
+
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- if (LVI) LVI->eraseBlock(SinglePred);
+ LVI->eraseBlock(SinglePred);
MergeBasicBlockIntoOnlyPred(BB);
-
+
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
return true;
}
}
- // Look to see if the terminator is a branch of switch, if not we can't thread
- // it.
+ // What kind of constant we're looking for.
+ ConstantPreference Preference = WantInteger;
+
+ // Look to see if the terminator is a conditional branch, switch or indirect
+ // branch, if not we can't thread it.
Value *Condition;
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ Instruction *Terminator = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
// Can't thread an unconditional jump.
if (BI->isUnconditional()) return false;
Condition = BI->getCondition();
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
Condition = SI->getCondition();
- else
+ } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
+ Condition = IB->getAddress()->stripPointerCasts();
+ Preference = WantBlockAddress;
+ } else {
return false; // Must be an invoke.
-
- // If the terminator of this block is branching on a constant, simplify the
- // terminator to an unconditional branch. This can occur due to threading in
- // other blocks.
- if (isa<ConstantInt>(Condition)) {
- DEBUG(dbgs() << " In block '" << BB->getName()
- << "' folding terminator: " << *BB->getTerminator() << '\n');
- ++NumFolds;
- ConstantFoldTerminator(BB);
- return true;
}
-
+
// If the terminator is branching on an undef, we can pick any of the
// successors to branch to. Let GetBestDestForJumpOnUndef decide.
if (isa<UndefValue>(Condition)) {
unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
-
+
// Fold the branch/switch.
TerminatorInst *BBTerm = BB->getTerminator();
for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
if (i == BestSucc) continue;
- RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
+ BBTerm->getSuccessor(i)->removePredecessor(BB, true);
}
-
+
DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding undef terminator: " << *BBTerm << '\n');
BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
BBTerm->eraseFromParent();
return true;
}
-
- Instruction *CondInst = dyn_cast<Instruction>(Condition);
- // If the condition is an instruction defined in another block, see if a
- // predecessor has the same condition:
- // br COND, BBX, BBY
- // BBX:
- // br COND, BBZ, BBW
- if (!LVI &&
- !Condition->hasOneUse() && // Multiple uses.
- (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
- pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- if (isa<BranchInst>(BB->getTerminator())) {
- for (; PI != E; ++PI) {
- BasicBlock *P = *PI;
- if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
- if (PBI->isConditional() && PBI->getCondition() == Condition &&
- ProcessBranchOnDuplicateCond(P, BB))
- return true;
- }
- } else {
- assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator");
- for (; PI != E; ++PI) {
- BasicBlock *P = *PI;
- if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator()))
- if (PSI->getCondition() == Condition &&
- ProcessSwitchOnDuplicateCond(P, BB))
- return true;
- }
- }
+ // If the terminator of this block is branching on a constant, simplify the
+ // terminator to an unconditional branch. This can occur due to threading in
+ // other blocks.
+ if (getKnownConstant(Condition, Preference)) {
+ DEBUG(dbgs() << " In block '" << BB->getName()
+ << "' folding terminator: " << *BB->getTerminator() << '\n');
+ ++NumFolds;
+ ConstantFoldTerminator(BB);
+ return true;
}
+ Instruction *CondInst = dyn_cast<Instruction>(Condition);
+
// All the rest of our checks depend on the condition being an instruction.
if (CondInst == 0) {
// FIXME: Unify this with code below.
- if (LVI && ProcessThreadableEdges(Condition, BB))
+ if (ProcessThreadableEdges(Condition, BB, Preference))
return true;
return false;
- }
-
-
+ }
+
+
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
- if (!LVI &&
- (!isa<PHINode>(CondCmp->getOperand(0)) ||
- cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) {
- // If we have a comparison, loop over the predecessors to see if there is
- // a condition with a lexically identical value.
- pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- for (; PI != E; ++PI) {
- BasicBlock *P = *PI;
- if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
- if (PBI->isConditional() && P != BB) {
- if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
- if (CI->getOperand(0) == CondCmp->getOperand(0) &&
- CI->getOperand(1) == CondCmp->getOperand(1) &&
- CI->getPredicate() == CondCmp->getPredicate()) {
- // TODO: Could handle things like (x != 4) --> (x == 17)
- if (ProcessBranchOnDuplicateCond(P, BB))
- return true;
- }
- }
- }
- }
- }
-
// For a comparison where the LHS is outside this block, it's possible
// that we've branched on it before. Used LVI to see if we can simplify
// the branch based on that.
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- if (LVI && CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+ if (CondBr && CondConst && CondBr->isConditional() && PI != PE &&
(!isa<Instruction>(CondCmp->getOperand(0)) ||
cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
// For predecessor edge, determine if the comparison is true or false
// on that edge. If they're all true or all false, we can simplify the
// branch.
// FIXME: We could handle mixed true/false by duplicating code.
- LazyValueInfo::Tristate Baseline =
+ LazyValueInfo::Tristate Baseline =
LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
CondConst, *PI, BB);
if (Baseline != LazyValueInfo::Unknown) {
// Check that all remaining incoming values match the first one.
while (++PI != PE) {
- LazyValueInfo::Tristate Ret = LVI->getPredicateOnEdge(
- CondCmp->getPredicate(),
- CondCmp->getOperand(0),
- CondConst, *PI, BB);
+ LazyValueInfo::Tristate Ret =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(),
+ CondCmp->getOperand(0), CondConst, *PI, BB);
if (Ret != Baseline) break;
}
-
+
// If we terminated early, then one of the values didn't match.
if (PI == PE) {
unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
- RemovePredecessorAndSimplify(CondBr->getSuccessor(ToRemove), BB, TD);
+ CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
CondBr->eraseFromParent();
return true;
@@ -755,174 +757,37 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
if (isa<Constant>(CondCmp->getOperand(1)))
SimplifyValue = CondCmp->getOperand(0);
-
+
// TODO: There are other places where load PRE would be profitable, such as
// more complex comparisons.
if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
if (SimplifyPartiallyRedundantLoad(LI))
return true;
-
-
+
+
// Handle a variety of cases where we are branching on something derived from
// a PHI node in the current block. If we can prove that any predecessors
// compute a predictable value based on a PHI node, thread those predecessors.
//
- if (ProcessThreadableEdges(CondInst, BB))
+ if (ProcessThreadableEdges(CondInst, BB, Preference))
return true;
-
+
// If this is an otherwise-unfoldable branch on a phi node in the current
// block, see if we can simplify.
if (PHINode *PN = dyn_cast<PHINode>(CondInst))
if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnPHI(PN);
-
-
+
+
// If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
if (CondInst->getOpcode() == Instruction::Xor &&
CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
-
-
- // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
- // "(X == 4)", thread through this block.
-
- return false;
-}
-/// ProcessBranchOnDuplicateCond - We found a block and a predecessor of that
-/// block that jump on exactly the same condition. This means that we almost
-/// always know the direction of the edge in the DESTBB:
-/// PREDBB:
-/// br COND, DESTBB, BBY
-/// DESTBB:
-/// br COND, BBZ, BBW
-///
-/// If DESTBB has multiple predecessors, we can't just constant fold the branch
-/// in DESTBB, we have to thread over it.
-bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
- BasicBlock *BB) {
- BranchInst *PredBI = cast<BranchInst>(PredBB->getTerminator());
-
- // If both successors of PredBB go to DESTBB, we don't know anything. We can
- // fold the branch to an unconditional one, which allows other recursive
- // simplifications.
- bool BranchDir;
- if (PredBI->getSuccessor(1) != BB)
- BranchDir = true;
- else if (PredBI->getSuccessor(0) != BB)
- BranchDir = false;
- else {
- DEBUG(dbgs() << " In block '" << PredBB->getName()
- << "' folding terminator: " << *PredBB->getTerminator() << '\n');
- ++NumFolds;
- ConstantFoldTerminator(PredBB);
- return true;
- }
-
- BranchInst *DestBI = cast<BranchInst>(BB->getTerminator());
- // If the dest block has one predecessor, just fix the branch condition to a
- // constant and fold it.
- if (BB->getSinglePredecessor()) {
- DEBUG(dbgs() << " In block '" << BB->getName()
- << "' folding condition to '" << BranchDir << "': "
- << *BB->getTerminator() << '\n');
- ++NumFolds;
- Value *OldCond = DestBI->getCondition();
- DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
- BranchDir));
- // Delete dead instructions before we fold the branch. Folding the branch
- // can eliminate edges from the CFG which can end up deleting OldCond.
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- ConstantFoldTerminator(BB);
- return true;
- }
-
-
- // Next, figure out which successor we are threading to.
- BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
-
- SmallVector<BasicBlock*, 2> Preds;
- Preds.push_back(PredBB);
-
- // Ok, try to thread it!
- return ThreadEdge(BB, Preds, SuccBB);
-}
-
-/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
-/// block that switch on exactly the same condition. This means that we almost
-/// always know the direction of the edge in the DESTBB:
-/// PREDBB:
-/// switch COND [... DESTBB, BBY ... ]
-/// DESTBB:
-/// switch COND [... BBZ, BBW ]
-///
-/// Optimizing switches like this is very important, because simplifycfg builds
-/// switches out of repeated 'if' conditions.
-bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
- BasicBlock *DestBB) {
- // Can't thread edge to self.
- if (PredBB == DestBB)
- return false;
-
- SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator());
- SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator());
-
- // There are a variety of optimizations that we can potentially do on these
- // blocks: we order them from most to least preferable.
-
- // If DESTBB *just* contains the switch, then we can forward edges from PREDBB
- // directly to their destination. This does not introduce *any* code size
- // growth. Skip debug info first.
- BasicBlock::iterator BBI = DestBB->begin();
- while (isa<DbgInfoIntrinsic>(BBI))
- BBI++;
-
- // FIXME: Thread if it just contains a PHI.
- if (isa<SwitchInst>(BBI)) {
- bool MadeChange = false;
- // Ignore the default edge for now.
- for (unsigned i = 1, e = DestSI->getNumSuccessors(); i != e; ++i) {
- ConstantInt *DestVal = DestSI->getCaseValue(i);
- BasicBlock *DestSucc = DestSI->getSuccessor(i);
-
- // Okay, DestSI has a case for 'DestVal' that goes to 'DestSucc'. See if
- // PredSI has an explicit case for it. If so, forward. If it is covered
- // by the default case, we can't update PredSI.
- unsigned PredCase = PredSI->findCaseValue(DestVal);
- if (PredCase == 0) continue;
-
- // If PredSI doesn't go to DestBB on this value, then it won't reach the
- // case on this condition.
- if (PredSI->getSuccessor(PredCase) != DestBB &&
- DestSI->getSuccessor(i) != DestBB)
- continue;
-
- // Do not forward this if it already goes to this destination, this would
- // be an infinite loop.
- if (PredSI->getSuccessor(PredCase) == DestSucc)
- continue;
-
- // Otherwise, we're safe to make the change. Make sure that the edge from
- // DestSI to DestSucc is not critical and has no PHI nodes.
- DEBUG(dbgs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI);
- DEBUG(dbgs() << "THROUGH: " << *DestSI);
+ // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
+ // "(X == 4)", thread through this block.
- // If the destination has PHI nodes, just split the edge for updating
- // simplicity.
- if (isa<PHINode>(DestSucc->begin()) && !DestSucc->getSinglePredecessor()){
- SplitCriticalEdge(DestSI, i, this);
- DestSucc = DestSI->getSuccessor(i);
- }
- FoldSingleEntryPHINodes(DestSucc);
- PredSI->setSuccessor(PredCase, DestSucc);
- MadeChange = true;
- }
-
- if (MadeChange)
- return true;
- }
-
return false;
}
@@ -934,13 +799,13 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Don't hack volatile loads.
if (LI->isVolatile()) return false;
-
+
// If the load is defined in a block with exactly one predecessor, it can't be
// partially redundant.
BasicBlock *LoadBB = LI->getParent();
if (LoadBB->getSinglePredecessor())
return false;
-
+
Value *LoadedPtr = LI->getOperand(0);
// If the loaded operand is defined in the LoadBB, it can't be available.
@@ -948,17 +813,17 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
if (PtrOp->getParent() == LoadBB)
return false;
-
+
// Scan a few instructions up from the load, to see if it is obviously live at
// the entry to its block.
BasicBlock::iterator BBIt = LI;
- if (Value *AvailableVal =
+ if (Value *AvailableVal =
FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
// If the value if the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
//cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
-
+
// If the returned value is the load itself, replace with an undef. This can
// only happen in dead loops.
if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
@@ -972,13 +837,13 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// might clobber its value.
if (BBIt != LoadBB->begin())
return false;
-
-
+
+
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
AvailablePredsTy AvailablePreds;
BasicBlock *OneUnavailablePred = 0;
-
+
// If we got here, the loaded value is transparent through to the start of the
// block. Check to see if it is available in any of the predecessor blocks.
for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
@@ -996,23 +861,23 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
OneUnavailablePred = PredBB;
continue;
}
-
+
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
}
-
+
// If the loaded value isn't available in any predecessor, it isn't partially
// redundant.
if (AvailablePreds.empty()) return false;
-
+
// Okay, the loaded value is available in at least one (and maybe all!)
// predecessors. If the value is unavailable in more than one unique
// predecessor, we want to insert a merge block for those common predecessors.
// This ensures that we only have to insert one reload, thus not increasing
// code size.
BasicBlock *UnavailablePred = 0;
-
+
// If there is exactly one predecessor where the value is unavailable, the
// already computed 'OneUnavailablePred' block is it. If it ends in an
// unconditional branch, we know that it isn't a critical edge.
@@ -1035,17 +900,17 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// If the predecessor is an indirect goto, we can't split the edge.
if (isa<IndirectBrInst>(P->getTerminator()))
return false;
-
+
if (!AvailablePredSet.count(P))
PredsToSplit.push_back(P);
}
-
+
// Split them out to their own block.
UnavailablePred =
SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
"thread-pre-split", this);
}
-
+
// If the value isn't available in all predecessors, then there will be
// exactly one where it isn't available. Insert a load on that edge and add
// it to the AvailablePreds list.
@@ -1057,35 +922,35 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
UnavailablePred->getTerminator());
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
-
+
// Now we know that each predecessor of this block has a value in
// AvailablePreds, sort them for efficient access as we're walking the preds.
array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
-
+
// Create a PHI node at the start of the block for the PRE'd load value.
PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
PN->takeName(LI);
-
+
// Insert new entries into the PHI for each predecessor. A single block may
// have multiple entries here.
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
++PI) {
BasicBlock *P = *PI;
- AvailablePredsTy::iterator I =
+ AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
std::make_pair(P, (Value*)0));
-
+
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
-
+
PN->addIncoming(I->second, I->first);
}
-
+
//cerr << "PRE: " << *LI << *PN << "\n";
-
+
LI->replaceAllUsesWith(PN);
LI->eraseFromParent();
-
+
return true;
}
@@ -1097,7 +962,7 @@ FindMostPopularDest(BasicBlock *BB,
const SmallVectorImpl<std::pair<BasicBlock*,
BasicBlock*> > &PredToDestList) {
assert(!PredToDestList.empty());
-
+
// Determine popularity. If there are multiple possible destinations, we
// explicitly choose to ignore 'undef' destinations. We prefer to thread
// blocks with known and real destinations to threading undef. We'll handle
@@ -1106,13 +971,13 @@ FindMostPopularDest(BasicBlock *BB,
for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
if (PredToDestList[i].second)
DestPopularity[PredToDestList[i].second]++;
-
+
// Find the most popular dest.
DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
BasicBlock *MostPopularDest = DPI->first;
unsigned Popularity = DPI->second;
SmallVector<BasicBlock*, 4> SamePopularity;
-
+
for (++DPI; DPI != DestPopularity.end(); ++DPI) {
// If the popularity of this entry isn't higher than the popularity we've
// seen so far, ignore it.
@@ -1126,10 +991,10 @@ FindMostPopularDest(BasicBlock *BB,
SamePopularity.clear();
MostPopularDest = DPI->first;
Popularity = DPI->second;
- }
+ }
}
-
- // Okay, now we know the most popular destination. If there is more than
+
+ // Okay, now we know the most popular destination. If there is more than one
// destination, we need to determine one. This is arbitrary, but we need
// to make a deterministic decision. Pick the first one that appears in the
// successor list.
@@ -1138,105 +1003,105 @@ FindMostPopularDest(BasicBlock *BB,
TerminatorInst *TI = BB->getTerminator();
for (unsigned i = 0; ; ++i) {
assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
-
+
if (std::find(SamePopularity.begin(), SamePopularity.end(),
TI->getSuccessor(i)) == SamePopularity.end())
continue;
-
+
MostPopularDest = TI->getSuccessor(i);
break;
}
}
-
+
// Okay, we have finally picked the most popular destination.
return MostPopularDest;
}
-bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+ ConstantPreference Preference) {
// If threading this would thread across a loop header, don't even try to
// thread the edge.
if (LoopHeaders.count(BB))
return false;
-
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
- if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
+
+ PredValueInfoTy PredValues;
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference))
return false;
-
+
assert(!PredValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
DEBUG(dbgs() << "IN BB: " << *BB;
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
- dbgs() << " BB '" << BB->getName() << "': FOUND condition = ";
- if (PredValues[i].first)
- dbgs() << *PredValues[i].first;
- else
- dbgs() << "UNDEF";
- dbgs() << " for pred '" << PredValues[i].second->getName()
- << "'.\n";
+ dbgs() << " BB '" << BB->getName() << "': FOUND condition = "
+ << *PredValues[i].first
+ << " for pred '" << PredValues[i].second->getName() << "'.\n";
});
-
+
// Decide what we want to thread through. Convert our list of known values to
// a list of known destinations for each pred. This also discards duplicate
// predecessors and keeps track of the undefined inputs (which are represented
// as a null dest in the PredToDestList).
SmallPtrSet<BasicBlock*, 16> SeenPreds;
SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
-
+
BasicBlock *OnlyDest = 0;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
-
+
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
BasicBlock *Pred = PredValues[i].second;
if (!SeenPreds.insert(Pred))
continue; // Duplicate predecessor entry.
-
+
// If the predecessor ends with an indirect goto, we can't change its
// destination.
if (isa<IndirectBrInst>(Pred->getTerminator()))
continue;
-
- ConstantInt *Val = PredValues[i].first;
-
+
+ Constant *Val = PredValues[i].first;
+
BasicBlock *DestBB;
- if (Val == 0) // Undef.
+ if (isa<UndefValue>(Val))
DestBB = 0;
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- DestBB = BI->getSuccessor(Val->isZero());
+ DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
+ else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+ DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val)));
else {
- SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
- DestBB = SI->getSuccessor(SI->findCaseValue(Val));
+ assert(isa<IndirectBrInst>(BB->getTerminator())
+ && "Unexpected terminator");
+ DestBB = cast<BlockAddress>(Val)->getBasicBlock();
}
// If we have exactly one destination, remember it for efficiency below.
- if (i == 0)
+ if (PredToDestList.empty())
OnlyDest = DestBB;
else if (OnlyDest != DestBB)
OnlyDest = MultipleDestSentinel;
-
+
PredToDestList.push_back(std::make_pair(Pred, DestBB));
}
-
+
// If all edges were unthreadable, we fail.
if (PredToDestList.empty())
return false;
-
+
// Determine which is the most common successor. If we have many inputs and
// this block is a switch, we want to start by threading the batch that goes
// to the most popular destination first. If we only know about one
// threadable destination (the common case) we can avoid this.
BasicBlock *MostPopularDest = OnlyDest;
-
+
if (MostPopularDest == MultipleDestSentinel)
MostPopularDest = FindMostPopularDest(BB, PredToDestList);
-
+
// Now that we know what the most popular destination is, factor all
// predecessors that will jump to it into a single predecessor.
SmallVector<BasicBlock*, 16> PredsToFactor;
for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
if (PredToDestList[i].second == MostPopularDest) {
BasicBlock *Pred = PredToDestList[i].first;
-
+
// This predecessor may be a switch or something else that has multiple
// edges to the block. Factor each of these edges by listing them
// according to # occurrences in PredsToFactor.
@@ -1251,7 +1116,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
if (MostPopularDest == 0)
MostPopularDest = BB->getTerminator()->
getSuccessor(GetBestDestForJumpOnUndef(BB));
-
+
// Ok, try to thread it!
return ThreadEdge(BB, PredsToFactor, MostPopularDest);
}
@@ -1259,15 +1124,15 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
/// a PHI node in the current block. See if there are any simplifications we
/// can do based on inputs to the phi node.
-///
+///
bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
BasicBlock *BB = PN->getParent();
-
+
// TODO: We could make use of this to do it once for blocks with common PHI
// values.
SmallVector<BasicBlock*, 1> PredBBs;
PredBBs.resize(1);
-
+
// If any of the predecessor blocks end in an unconditional branch, we can
// *duplicate* the conditional branch into that block in order to further
// encourage jump threading and to eliminate cases where we have branch on a
@@ -1289,21 +1154,21 @@ bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
/// a xor instruction in the current block. See if there are any
/// simplifications we can do based on inputs to the xor.
-///
+///
bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
BasicBlock *BB = BO->getParent();
-
+
// If either the LHS or RHS of the xor is a constant, don't do this
// optimization.
if (isa<ConstantInt>(BO->getOperand(0)) ||
isa<ConstantInt>(BO->getOperand(1)))
return false;
-
+
// If the first instruction in BB isn't a phi, we won't be able to infer
// anything special about any particular predecessor.
if (!isa<PHINode>(BB->front()))
return false;
-
+
// If we have a xor as the branch input to this block, and we know that the
// LHS or RHS of the xor in any predecessor is true/false, then we can clone
// the condition into the predecessor and fix that value to true, saving some
@@ -1322,15 +1187,17 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// %Y = icmp ne i32 %A, %B
// br i1 %Z, ...
- SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> XorOpValues;
+ PredValueInfoTy XorOpValues;
bool isLHS = true;
- if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) {
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
+ WantInteger)) {
assert(XorOpValues.empty());
- if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues))
+ if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
+ WantInteger))
return false;
isLHS = false;
}
-
+
assert(!XorOpValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
@@ -1338,29 +1205,33 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// predecessors can be of the set true, false, or undef.
unsigned NumTrue = 0, NumFalse = 0;
for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (!XorOpValues[i].first) continue; // Ignore undefs for the count.
- if (XorOpValues[i].first->isZero())
+ if (isa<UndefValue>(XorOpValues[i].first))
+ // Ignore undefs for the count.
+ continue;
+ if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
++NumFalse;
else
++NumTrue;
}
-
+
// Determine which value to split on, true, false, or undef if neither.
ConstantInt *SplitVal = 0;
if (NumTrue > NumFalse)
SplitVal = ConstantInt::getTrue(BB->getContext());
else if (NumTrue != 0 || NumFalse != 0)
SplitVal = ConstantInt::getFalse(BB->getContext());
-
+
// Collect all of the blocks that this can be folded into so that we can
// factor this once and clone it once.
SmallVector<BasicBlock*, 8> BlocksToFoldInto;
for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue;
+ if (XorOpValues[i].first != SplitVal &&
+ !isa<UndefValue>(XorOpValues[i].first))
+ continue;
BlocksToFoldInto.push_back(XorOpValues[i].second);
}
-
+
// If we inferred a value for all of the predecessors, then duplication won't
// help us. However, we can just replace the LHS or RHS with the constant.
if (BlocksToFoldInto.size() ==
@@ -1377,10 +1248,10 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// If all preds provide 1, set the computed value to 1.
BO->setOperand(!isLHS, SplitVal);
}
-
+
return true;
}
-
+
// Try to duplicate BB into PredBB.
return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
}
@@ -1398,14 +1269,14 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
// Ok, we have a PHI node. Figure out what the incoming value was for the
// DestBlock.
Value *IV = PN->getIncomingValueForBlock(OldPred);
-
+
// Remap the value if necessary.
if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
if (I != ValueMap.end())
IV = I->second;
}
-
+
PN->addIncoming(IV, NewPred);
}
}
@@ -1413,8 +1284,8 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
/// ThreadEdge - We have decided that it is safe and profitable to factor the
/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
/// across BB. Transform the IR to reflect this change.
-bool JumpThreading::ThreadEdge(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock*> &PredBBs,
+bool JumpThreading::ThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
if (SuccBB == BB) {
@@ -1422,7 +1293,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
<< "' - would thread to self!\n");
return false;
}
-
+
// If threading this would thread across a loop header, don't thread the edge.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
@@ -1438,7 +1309,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
<< "' - Cost is too high: " << JumpThreadCost << "\n");
return false;
}
-
+
// And finally, do it! Start by factoring the predecessors is needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
@@ -1449,30 +1320,29 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
".thr_comm", this);
}
-
+
// And finally, do it!
DEBUG(dbgs() << " Threading edge from '" << PredBB->getName() << "' to '"
<< SuccBB->getName() << "' with cost: " << JumpThreadCost
<< ", across block:\n "
<< *BB << "\n");
-
- if (LVI)
- LVI->threadEdge(PredBB, BB, SuccBB);
-
+
+ LVI->threadEdge(PredBB, BB, SuccBB);
+
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
DenseMap<Instruction*, Value*> ValueMapping;
-
- BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
- BB->getName()+".thread",
+
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
+ BB->getName()+".thread",
BB->getParent(), BB);
NewBB->moveAfter(PredBB);
-
+
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
-
+
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; !isa<TerminatorInst>(BI); ++BI) {
@@ -1480,7 +1350,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
New->setName(BI->getName());
NewBB->getInstList().push_back(New);
ValueMapping[BI] = New;
-
+
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
@@ -1489,15 +1359,15 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
New->setOperand(i, I->second);
}
}
-
+
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
BranchInst::Create(SuccBB, NewBB);
-
+
// Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
// PHI nodes for NewBB now.
AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
-
+
// If there were values defined in BB that are used outside the block, then we
// now have to update all uses of the value to use either the original value,
// the cloned value, or some PHI derived value. This can require arbitrary
@@ -1515,14 +1385,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
continue;
} else if (User->getParent() == BB)
continue;
-
+
UsesToRename.push_back(&UI.getUse());
}
-
+
// If there are no uses outside the block, we're done with this instruction.
if (UsesToRename.empty())
continue;
-
+
DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
@@ -1531,28 +1401,28 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
-
+
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
DEBUG(dbgs() << "\n");
}
-
-
+
+
// Ok, NewBB is good to go. Update the terminator of PredBB to jump to
// NewBB instead of BB. This eliminates predecessors from BB, which requires
// us to simplify any PHI nodes in BB.
TerminatorInst *PredTerm = PredBB->getTerminator();
for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
if (PredTerm->getSuccessor(i) == BB) {
- RemovePredecessorAndSimplify(BB, PredBB, TD);
+ BB->removePredecessor(PredBB, true);
PredTerm->setSuccessor(i, NewBB);
}
-
+
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
SimplifyInstructionsInBlock(NewBB, TD);
-
+
// Threaded an edge!
++NumThreads;
return true;
@@ -1576,14 +1446,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
<< "' - it might create an irreducible loop!\n");
return false;
}
-
+
unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
if (DuplicationCost > Threshold) {
DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
return false;
}
-
+
// And finally, do it! Start by factoring the predecessors is needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
@@ -1594,35 +1464,35 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
".thr_comm", this);
}
-
+
// Okay, we decided to do this! Clone all the instructions in BB onto the end
// of PredBB.
DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
<< PredBB->getName() << "' to eliminate branch on phi. Cost: "
<< DuplicationCost << " block is:" << *BB << "\n");
-
+
// Unless PredBB ends with an unconditional branch, split the edge so that we
// can just clone the bits from BB into the end of the new PredBB.
BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
-
+
if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
PredBB = SplitEdge(PredBB, BB, this);
OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
}
-
+
// We are going to have to map operands from the original BB block into the
// PredBB block. Evaluate PHI nodes in BB.
DenseMap<Instruction*, Value*> ValueMapping;
-
+
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
-
+
// Clone the non-phi instructions of BB into PredBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; BI != BB->end(); ++BI) {
Instruction *New = BI->clone();
-
+
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
@@ -1644,7 +1514,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
ValueMapping[BI] = New;
}
}
-
+
// Check to see if the targets of the branch had PHI nodes. If so, we need to
// add entries to the PHI nodes for branch from PredBB now.
BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
@@ -1652,7 +1522,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
ValueMapping);
AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
ValueMapping);
-
+
// If there were values defined in BB that are used outside the block, then we
// now have to update all uses of the value to use either the original value,
// the cloned value, or some PHI derived value. This can require arbitrary
@@ -1670,35 +1540,35 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
continue;
} else if (User->getParent() == BB)
continue;
-
+
UsesToRename.push_back(&UI.getUse());
}
-
+
// If there are no uses outside the block, we're done with this instruction.
if (UsesToRename.empty())
continue;
-
+
DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
-
+
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
-
+
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
DEBUG(dbgs() << "\n");
}
-
+
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
// that we nuked.
- RemovePredecessorAndSimplify(BB, PredBB, TD);
-
+ BB->removePredecessor(PredBB, true);
+
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
-
+
++NumDupes;
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index 2ef8544..0786793 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -36,13 +36,13 @@
#include "llvm/DerivedTypes.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Support/CFG.h"
@@ -66,7 +66,9 @@ DisablePromotion("disable-licm-promotion", cl::Hidden,
namespace {
struct LICM : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LICM() : LoopPass(ID) {}
+ LICM() : LoopPass(ID) {
+ initializeLICMPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -80,7 +82,7 @@ namespace {
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved("scalar-evolution");
AU.addPreservedID(LoopSimplifyID);
}
@@ -129,42 +131,7 @@ namespace {
///
bool inSubLoop(BasicBlock *BB) {
assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
- for (Loop::iterator I = CurLoop->begin(), E = CurLoop->end(); I != E; ++I)
- if ((*I)->contains(BB))
- return true; // A subloop actually contains this block!
- return false;
- }
-
- /// isExitBlockDominatedByBlockInLoop - This method checks to see if the
- /// specified exit block of the loop is dominated by the specified block
- /// that is in the body of the loop. We use these constraints to
- /// dramatically limit the amount of the dominator tree that needs to be
- /// searched.
- bool isExitBlockDominatedByBlockInLoop(BasicBlock *ExitBlock,
- BasicBlock *BlockInLoop) const {
- // If the block in the loop is the loop header, it must be dominated!
- BasicBlock *LoopHeader = CurLoop->getHeader();
- if (BlockInLoop == LoopHeader)
- return true;
-
- DomTreeNode *BlockInLoopNode = DT->getNode(BlockInLoop);
- DomTreeNode *IDom = DT->getNode(ExitBlock);
-
- // Because the exit block is not in the loop, we know we have to get _at
- // least_ its immediate dominator.
- IDom = IDom->getIDom();
-
- while (IDom && IDom != BlockInLoopNode) {
- // If we have got to the header of the loop, then the instructions block
- // did not dominate the exit node, so we can't hoist it.
- if (IDom->getBlock() == LoopHeader)
- return false;
-
- // Get next Immediate Dominator.
- IDom = IDom->getIDom();
- };
-
- return true;
+ return LI->getLoopFor(BB) != CurLoop;
}
/// sink - When an instruction is found to only be used outside of the loop,
@@ -187,13 +154,13 @@ namespace {
/// pointerInvalidatedByLoop - Return true if the body of this loop may
/// store into the memory location pointed to by V.
///
- bool pointerInvalidatedByLoop(Value *V, unsigned Size) {
+ bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+ const MDNode *TBAAInfo) {
// Check to see if any of the basic blocks in CurLoop invalidate *V.
- return CurAST->getAliasSetForPointer(V, Size).isMod();
+ return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod();
}
bool canSinkOrHoistInst(Instruction &I);
- bool isLoopInvariantInst(Instruction &I);
bool isNotUsedInLoop(Instruction &I);
void PromoteAliasSet(AliasSet &AS);
@@ -201,7 +168,12 @@ namespace {
}
char LICM::ID = 0;
-INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false);
+INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
Pass *llvm::createLICMPass() { return new LICM(); }
@@ -369,7 +341,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.
//
- if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) &&
+ if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) &&
isSafeToExecuteUnconditionally(I))
hoist(I);
}
@@ -394,16 +366,17 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
return true;
// Don't hoist loads which have may-aliased stores in loop.
- unsigned Size = 0;
+ uint64_t Size = 0;
if (LI->getType()->isSized())
Size = AA->getTypeStoreSize(LI->getType());
- return !pointerInvalidatedByLoop(LI->getOperand(0), Size);
+ return !pointerInvalidatedByLoop(LI->getOperand(0), Size,
+ LI->getMetadata(LLVMContext::MD_tbaa));
} else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
// Handle obvious cases efficiently.
AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
if (Behavior == AliasAnalysis::DoesNotAccessMemory)
return true;
- else if (Behavior == AliasAnalysis::OnlyReadsMemory) {
+ if (AliasAnalysis::onlyReadsMemory(Behavior)) {
// If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate.
bool FoundMod = false;
@@ -452,20 +425,6 @@ bool LICM::isNotUsedInLoop(Instruction &I) {
}
-/// isLoopInvariantInst - Return true if all operands of this instruction are
-/// loop invariant. We also filter out non-hoistable instructions here just for
-/// efficiency.
-///
-bool LICM::isLoopInvariantInst(Instruction &I) {
- // The instruction is loop invariant if all of its operands are loop-invariant
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
- if (!CurLoop->isLoopInvariant(I.getOperand(i)))
- return false;
-
- // If we got this far, the instruction is loop invariant!
- return true;
-}
-
/// sink - When an instruction is found to only be used outside of the loop,
/// this function moves it to the exit blocks and patches up SSA form as needed.
/// This method is guaranteed to remove the original instruction from its
@@ -486,7 +445,7 @@ void LICM::sink(Instruction &I) {
// enough that we handle it as a special (more efficient) case. It is more
// efficient to handle because there are no PHI nodes that need to be placed.
if (ExitBlocks.size() == 1) {
- if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) {
+ if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
// Instruction is not used, just delete it.
CurAST->deleteValue(&I);
// If I has users in unreachable blocks, eliminate.
@@ -537,7 +496,7 @@ void LICM::sink(Instruction &I) {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = ExitBlocks[i];
- if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB))
+ if (!DT->dominates(InstOrigBB, ExitBlock))
continue;
// Insert the code after the last PHI node.
@@ -628,15 +587,61 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
SmallVector<BasicBlock*, 8> ExitBlocks;
CurLoop->getExitBlocks(ExitBlocks);
- // For each exit block, get the DT node and walk up the DT until the
- // instruction's basic block is found or we exit the loop.
+ // Verify that the block dominates each of the exit blocks of the loop.
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[i], Inst.getParent()))
+ if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
return false;
return true;
}
+namespace {
+ class LoopPromoter : public LoadAndStorePromoter {
+ Value *SomePtr; // Designated pointer to store to.
+ SmallPtrSet<Value*, 4> &PointerMustAliases;
+ SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+ AliasSetTracker &AST;
+ public:
+ LoopPromoter(Value *SP,
+ const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ SmallPtrSet<Value*, 4> &PMA,
+ SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast)
+ : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
+ LoopExitBlocks(LEB), AST(ast) {}
+
+ virtual bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &) const {
+ Value *Ptr;
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ Ptr = LI->getOperand(0);
+ else
+ Ptr = cast<StoreInst>(I)->getPointerOperand();
+ return PointerMustAliases.count(Ptr);
+ }
+
+ virtual void doExtraRewritesBeforeFinalDeletion() const {
+ // Insert stores after in the loop exit blocks. Each exit block gets a
+ // store of the live-out values that feed them. Since we've already told
+ // the SSA updater about the defs in the loop and the preheader
+ // definition, it is all set and we can start using it.
+ for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = LoopExitBlocks[i];
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+ new StoreInst(LiveInValue, SomePtr, InsertPos);
+ }
+ }
+
+ virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {
+ // Update alias analysis.
+ AST.copyValue(LI, V);
+ }
+ virtual void instructionDeleted(Instruction *I) const {
+ AST.deleteValue(I);
+ }
+ };
+} // end anon namespace
+
/// PromoteAliasSet - Try to promote memory values to scalars by sinking
/// stores out of the loop and moving loads to before the loop. We do this by
/// looping over the stores in the loop, looking for stores to Must pointers
@@ -697,8 +702,11 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
if (isa<LoadInst>(Use))
assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
else if (isa<StoreInst>(Use)) {
+ // Stores *of* the pointer are not interesting, only stores *to* the
+ // pointer.
+ if (Use->getOperand(1) != ASIV)
+ continue;
assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
- if (Use->getOperand(0) == ASIV) return;
} else
return; // Not a load or store.
@@ -718,179 +726,43 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
Changed = true;
++NumPromoted;
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+
// We use the SSAUpdater interface to insert phi nodes as required.
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
+ LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
+ *CurAST);
- // It wants to know some value of the same type as what we'll be inserting.
- Value *SomeValue;
- if (isa<LoadInst>(LoopUses[0]))
- SomeValue = LoopUses[0];
- else
- SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
- SSA.Initialize(SomeValue->getType(), SomeValue->getName());
-
- // First step: bucket up uses of the pointers by the block they occur in.
- // This is important because we have to handle multiple defs/uses in a block
- // ourselves: SSAUpdater is purely for cross-block references.
- // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
- DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
- for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
- Instruction *User = LoopUses[i];
- UsesByBlock[User->getParent()].push_back(User);
- }
-
- // Okay, now we can iterate over all the blocks in the loop with uses,
- // processing them. Keep track of which loads are loading a live-in value.
- SmallVector<LoadInst*, 32> LiveInLoads;
- DenseMap<Value*, Value*> ReplacedLoads;
-
- for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
- Instruction *User = LoopUses[LoopUse];
- std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
-
- // If this block has already been processed, ignore this repeat use.
- if (BlockUses.empty()) continue;
-
- // Okay, this is the first use in the block. If this block just has a
- // single user in it, we can rewrite it trivially.
- if (BlockUses.size() == 1) {
- // If it is a store, it is a trivial def of the value in the block.
- if (isa<StoreInst>(User)) {
- SSA.AddAvailableValue(User->getParent(),
- cast<StoreInst>(User)->getOperand(0));
- } else {
- // Otherwise it is a load, queue it to rewrite as a live-in load.
- LiveInLoads.push_back(cast<LoadInst>(User));
- }
- BlockUses.clear();
- continue;
- }
-
- // Otherwise, check to see if this block is all loads. If so, we can queue
- // them all as live in loads.
- bool HasStore = false;
- for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
- if (isa<StoreInst>(BlockUses[i])) {
- HasStore = true;
- break;
- }
- }
-
- if (!HasStore) {
- for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
- LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
- BlockUses.clear();
- continue;
- }
-
- // Otherwise, we have mixed loads and stores (or just a bunch of stores).
- // Since SSAUpdater is purely for cross-block values, we need to determine
- // the order of these instructions in the block. If the first use in the
- // block is a load, then it uses the live in value. The last store defines
- // the live out value. We handle this by doing a linear scan of the block.
- BasicBlock *BB = User->getParent();
- Value *StoredValue = 0;
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- if (LoadInst *L = dyn_cast<LoadInst>(II)) {
- // If this is a load from an unrelated pointer, ignore it.
- if (!PointerMustAliases.count(L->getOperand(0))) continue;
-
- // If we haven't seen a store yet, this is a live in use, otherwise
- // use the stored value.
- if (StoredValue) {
- L->replaceAllUsesWith(StoredValue);
- ReplacedLoads[L] = StoredValue;
- } else {
- LiveInLoads.push_back(L);
- }
- continue;
- }
-
- if (StoreInst *S = dyn_cast<StoreInst>(II)) {
- // If this is a store to an unrelated pointer, ignore it.
- if (!PointerMustAliases.count(S->getOperand(1))) continue;
-
- // Remember that this is the active value in the block.
- StoredValue = S->getOperand(0);
- }
- }
-
- // The last stored value that happened is the live-out for the block.
- assert(StoredValue && "Already checked that there is a store in block");
- SSA.AddAvailableValue(BB, StoredValue);
- BlockUses.clear();
- }
-
- // Now that all the intra-loop values are classified, set up the preheader.
- // It gets a load of the pointer we're promoting, and it is the live-out value
- // from the preheader.
- LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
- Preheader->getTerminator());
+ // Set up the preheader to have a definition of the value. It is the live-out
+ // value from the preheader that uses in the loop will use.
+ LoadInst *PreheaderLoad =
+ new LoadInst(SomePtr, SomePtr->getName()+".promoted",
+ Preheader->getTerminator());
SSA.AddAvailableValue(Preheader, PreheaderLoad);
- // Now that the preheader is good to go, set up the exit blocks. Each exit
- // block gets a store of the live-out values that feed them. Since we've
- // already told the SSA updater about the defs in the loop and the preheader
- // definition, it is all set and we can start using it.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getUniqueExitBlocks(ExitBlocks);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *ExitBlock = ExitBlocks[i];
- Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
- Instruction *InsertPos = ExitBlock->getFirstNonPHI();
- new StoreInst(LiveInValue, SomePtr, InsertPos);
+ // Copy any value stored to or loaded from a must-alias of the pointer.
+ if (PreheaderLoad->getType()->isPointerTy()) {
+ Value *SomeValue;
+ if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0]))
+ SomeValue = LI;
+ else
+ SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand();
+
+ CurAST->copyValue(SomeValue, PreheaderLoad);
}
- // Okay, now we rewrite all loads that use live-in values in the loop,
- // inserting PHI nodes as necessary.
- for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
- LoadInst *ALoad = LiveInLoads[i];
- Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
- ALoad->replaceAllUsesWith(NewVal);
- CurAST->copyValue(ALoad, NewVal);
- ReplacedLoads[ALoad] = NewVal;
- }
+ // Rewrite all the loads in the loop and remember all the definitions from
+ // stores in the loop.
+ Promoter.run(LoopUses);
// If the preheader load is itself a pointer, we need to tell alias analysis
// about the new pointer we created in the preheader block and about any PHI
// nodes that just got inserted.
if (PreheaderLoad->getType()->isPointerTy()) {
- // Copy any value stored to or loaded from a must-alias of the pointer.
- CurAST->copyValue(SomeValue, PreheaderLoad);
-
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
- CurAST->copyValue(SomeValue, NewPHIs[i]);
- }
-
- // Now that everything is rewritten, delete the old instructions from the body
- // of the loop. They should all be dead now.
- for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
- Instruction *User = LoopUses[i];
-
- // If this is a load that still has uses, then the load must have been added
- // as a live value in the SSAUpdate data structure for a block (e.g. because
- // the loaded value was stored later). In this case, we need to recursively
- // propagate the updates until we get to the real value.
- if (!User->use_empty()) {
- Value *NewVal = ReplacedLoads[User];
- assert(NewVal && "not a replaced load?");
-
- // Propagate down to the ultimate replacee. The intermediately loads
- // could theoretically already have been deleted, so we don't want to
- // dereference the Value*'s.
- DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
- while (RLI != ReplacedLoads.end()) {
- NewVal = RLI->second;
- RLI = ReplacedLoads.find(NewVal);
- }
-
- User->replaceAllUsesWith(NewVal);
- CurAST->copyValue(User, NewVal);
- }
-
- CurAST->deleteValue(User);
- User->eraseFromParent();
+ CurAST->copyValue(PreheaderLoad, NewPHIs[i]);
}
// fwew, we're done!
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 543dfc1..6d1d344 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -17,6 +17,7 @@
#define DEBUG_TYPE "loop-delete"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/SmallVector.h"
@@ -28,7 +29,9 @@ namespace {
class LoopDeletion : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopDeletion() : LoopPass(ID) {}
+ LoopDeletion() : LoopPass(ID) {
+ initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
+ }
// Possibly eliminate loop L if it is dead.
bool runOnLoop(Loop* L, LPPassManager& LPM);
@@ -49,14 +52,20 @@ namespace {
AU.addPreserved<LoopInfo>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
- AU.addPreserved<DominanceFrontier>();
}
};
}
char LoopDeletion::ID = 0;
-INITIALIZE_PASS(LoopDeletion, "loop-deletion",
- "Delete dead loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false)
Pass* llvm::createLoopDeletionPass() {
return new LoopDeletion();
@@ -183,22 +192,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
// Update the dominator tree and remove the instructions and blocks that will
// be deleted from the reference counting scheme.
DominatorTree& DT = getAnalysis<DominatorTree>();
- DominanceFrontier* DF = getAnalysisIfAvailable<DominanceFrontier>();
- SmallPtrSet<DomTreeNode*, 8> ChildNodes;
+ SmallVector<DomTreeNode*, 8> ChildNodes;
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
// Move all of the block's children to be children of the preheader, which
// allows us to remove the domtree entry for the block.
- ChildNodes.insert(DT[*LI]->begin(), DT[*LI]->end());
- for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
+ ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
+ for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
DE = ChildNodes.end(); DI != DE; ++DI) {
DT.changeImmediateDominator(*DI, DT[preheader]);
- if (DF) DF->changeImmediateDominator((*DI)->getBlock(), preheader, &DT);
}
ChildNodes.clear();
DT.eraseNode(*LI);
- if (DF) DF->removeBlock(*LI);
// Remove the block from the reference counting scheme, so that we can
// delete it freely later.
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
new file mode 100644
index 0000000..d7fa149
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -0,0 +1,594 @@
+//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an idiom recognizer that transforms simple loops into a
+// non-loop form. In cases that this kicks in, it can be a significant
+// performance win.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO List:
+//
+// Future loop memory idioms to recognize:
+// memcmp, memmove, strlen, etc.
+// Future floating point idioms to recognize in -ffast-math mode:
+// fpowi
+// Future integer operation idioms to recognize:
+// ctpop, ctlz, cttz
+//
+// Beware that isel's default lowering for ctpop is highly inefficient for
+// i64 and larger types when i64 is legal and the value has few bits set. It
+// would be good to enhance isel to emit a loop for ctpop in this case.
+//
+// We should enhance the memset/memcpy recognition to handle multiple stores in
+// the loop. This would handle things like:
+// void foo(_Complex float *P)
+// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
+//
+// This could recognize common matrix multiplies and dot product idioms and
+// replace them with calls to BLAS (if linked in??).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-idiom"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
+STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
+
+namespace {
+ class LoopIdiomRecognize : public LoopPass {
+ Loop *CurLoop;
+ const TargetData *TD;
+ DominatorTree *DT;
+ ScalarEvolution *SE;
+ TargetLibraryInfo *TLI;
+ public:
+ static char ID;
+ explicit LoopIdiomRecognize() : LoopPass(ID) {
+ initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks);
+
+ bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+ bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
+
+ bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment,
+ Value *SplatValue, Instruction *TheStore,
+ const SCEVAddRecExpr *Ev,
+ const SCEV *BECount);
+ bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv,
+ const SCEV *BECount);
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG.
+ ///
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+ };
+}
+
+char LoopIdiomRecognize::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+ false, false)
+
+Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
+
+/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
+/// and zero out all the operands of this instruction. If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+ SmallVector<Instruction*, 32> NowDeadInsts;
+
+ NowDeadInsts.push_back(I);
+
+ // Before we touch this instruction, remove it from SE!
+ do {
+ Instruction *DeadInst = NowDeadInsts.pop_back_val();
+
+ // This instruction is dead, zap it, in stages. Start by removing it from
+ // SCEV.
+ SE.forgetValue(DeadInst);
+
+ for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+ Value *Op = DeadInst->getOperand(op);
+ DeadInst->setOperand(op, 0);
+
+ // If this operand just became dead, add it to the NowDeadInsts list.
+ if (!Op->use_empty()) continue;
+
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ if (isInstructionTriviallyDead(OpI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+
+ } while (!NowDeadInsts.empty());
+}
+
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ CurLoop = L;
+
+ // The trip count of the loop must be analyzable.
+ SE = &getAnalysis<ScalarEvolution>();
+ if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+ return false;
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BECount)) return false;
+
+ // If this loop executes exactly one time, then it should be peeled, not
+ // optimized by this pass.
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ if (BECst->getValue()->getValue() == 0)
+ return false;
+
+ // We require target data for now.
+ TD = getAnalysisIfAvailable<TargetData>();
+ if (TD == 0) return false;
+
+ DT = &getAnalysis<DominatorTree>();
+ LoopInfo &LI = getAnalysis<LoopInfo>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+ DEBUG(dbgs() << "loop-idiom Scanning: F["
+ << L->getHeader()->getParent()->getName()
+ << "] Loop %" << L->getHeader()->getName() << "\n");
+
+ bool MadeChange = false;
+ // Scan all the blocks in the loop that are not in subloops.
+ for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+ ++BI) {
+ // Ignore blocks in subloops.
+ if (LI.getLoopFor(*BI) != CurLoop)
+ continue;
+
+ MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
+ }
+ return MadeChange;
+}
+
+/// runOnLoopBlock - Process the specified block, which lives in a counted loop
+/// with the specified backedge count. This block is known to be in the current
+/// loop and not in any subloops.
+bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ // We can only promote stores in this block if they are unconditionally
+ // executed in the loop. For a block to be unconditionally executed, it has
+ // to dominate all the exit blocks of the loop. Verify this now.
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (!DT->dominates(BB, ExitBlocks[i]))
+ return false;
+
+ bool MadeChange = false;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+ // Look for store instructions, which may be optimized to memset/memcpy.
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ WeakVH InstPtr(I);
+ if (!processLoopStore(SI, BECount)) continue;
+ MadeChange = true;
+
+ // If processing the store invalidated our iterator, start over from the
+ // top of the block.
+ if (InstPtr == 0)
+ I = BB->begin();
+ continue;
+ }
+
+ // Look for memset instructions, which may be optimized to a larger memset.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
+ WeakVH InstPtr(I);
+ if (!processLoopMemSet(MSI, BECount)) continue;
+ MadeChange = true;
+
+ // If processing the memset invalidated our iterator, start over from the
+ // top of the block.
+ if (InstPtr == 0)
+ I = BB->begin();
+ continue;
+ }
+ }
+
+ return MadeChange;
+}
+
+
+/// processLoopStore - See if this store can be promoted to a memset or memcpy.
+bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
+ if (SI->isVolatile()) return false;
+
+ Value *StoredVal = SI->getValueOperand();
+ Value *StorePtr = SI->getPointerOperand();
+
+ // Reject stores that are so large that they overflow an unsigned.
+ uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
+ if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *StoreEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+ return false;
+
+ // Check to see if the stride matches the size of the store. If so, then we
+ // know that every byte is touched in the loop.
+ unsigned StoreSize = (unsigned)SizeInBits >> 3;
+ const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
+
+ // TODO: Could also handle negative stride here someday, that will require the
+ // validity check in mayLoopAccessLocation to be updated though.
+ if (Stride == 0 || StoreSize != Stride->getValue()->getValue())
+ return false;
+
+ // See if we can optimize just this store in isolation.
+ if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+ StoredVal, SI, StoreEv, BECount))
+ return true;
+
+ // If the stored value is a strided load in the same loop with the same stride
+ // this this may be transformable into a memcpy. This kicks in for stuff like
+ // for (i) A[i] = B[i];
+ if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+ const SCEVAddRecExpr *LoadEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
+ if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
+ StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile())
+ if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
+ return true;
+ }
+ //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
+
+ return false;
+}
+
+/// processLoopMemSet - See if this memset can be promoted to a large memset.
+bool LoopIdiomRecognize::
+processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+ // We can only handle non-volatile memsets with a constant size.
+ if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+
+ // If we're not allowed to hack on memset, we fail.
+ if (!TLI->has(LibFunc::memset))
+ return false;
+
+ Value *Pointer = MSI->getDest();
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
+ if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ return false;
+
+ // Reject memsets that are so large that they overflow an unsigned.
+ uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ if ((SizeInBytes >> 32) != 0)
+ return false;
+
+ // Check to see if the stride matches the size of the memset. If so, then we
+ // know that every byte is touched in the loop.
+ const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+
+ // TODO: Could also handle negative stride here someday, that will require the
+ // validity check in mayLoopAccessLocation to be updated though.
+ if (Stride == 0 || MSI->getLength() != Stride->getValue())
+ return false;
+
+ return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
+ MSI->getAlignment(), MSI->getValue(),
+ MSI, Ev, BECount);
+}
+
+
+/// mayLoopAccessLocation - Return true if the specified loop might access the
+/// specified pointer location, which is a loop-strided access. The 'Access'
+/// argument specifies what the verboten forms of access are (read or write).
+static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
+ Loop *L, const SCEV *BECount,
+ unsigned StoreSize, AliasAnalysis &AA,
+ Instruction *IgnoredStore) {
+ // Get the location that may be stored across the loop. Since the access is
+ // strided positively through memory, we say that the modified location starts
+ // at the pointer and has infinite size.
+ uint64_t AccessSize = AliasAnalysis::UnknownSize;
+
+ // If the loop iterates a fixed number of times, we can refine the access size
+ // to be exactly the size of the memset, which is (BECount+1)*StoreSize
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+
+ // TODO: For this to be really effective, we have to dive into the pointer
+ // operand in the store. Store to &A[i] of 100 will always return may alias
+ // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
+ // which will then no-alias a store to &A[100].
+ AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
+
+ for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+ ++BI)
+ for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
+ if (&*I != IgnoredStore &&
+ (AA.getModRefInfo(I, StoreLoc) & Access))
+ return true;
+
+ return false;
+}
+
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in. Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
+ // If the value isn't a constant, we can't promote it to being in a constant
+ // array. We could theoretically do a store to an alloca or something, but
+ // that doesn't seem worthwhile.
+ Constant *C = dyn_cast<Constant>(V);
+ if (C == 0) return 0;
+
+ // Only handle simple values that are a power of two bytes in size.
+ uint64_t Size = TD.getTypeSizeInBits(V->getType());
+ if (Size == 0 || (Size & 7) || (Size & (Size-1)))
+ return 0;
+
+ // Don't care enough about darwin/ppc to implement this.
+ if (TD.isBigEndian())
+ return 0;
+
+ // Convert to size in bytes.
+ Size /= 8;
+
+ // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+ // if the top and bottom are the same (e.g. for vectors and large integers).
+ if (Size > 16) return 0;
+
+ // If the constant is exactly 16 bytes, just use it.
+ if (Size == 16) return C;
+
+ // Otherwise, we'll use an array of the constants.
+ unsigned ArraySize = 16/Size;
+ ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+ return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
+}
+
+
+/// processLoopStridedStore - We see a strided store of some value. If we can
+/// transform this into a memset or memset_pattern in the loop preheader, do so.
+bool LoopIdiomRecognize::
+processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ unsigned StoreAlignment, Value *StoredVal,
+ Instruction *TheStore, const SCEVAddRecExpr *Ev,
+ const SCEV *BECount) {
+
+ // If the stored value is a byte-wise value (like i32 -1), then it may be
+ // turned into a memset of i8 -1, assuming that all the consecutive bytes
+ // are stored. A store of i32 0x01020304 can never be turned into a memset,
+ // but it can be turned into memset_pattern if the target supports it.
+ Value *SplatValue = isBytewiseValue(StoredVal);
+ Constant *PatternValue = 0;
+
+ // If we're allowed to form a memset, and the stored value would be acceptable
+ // for memset, use it.
+ if (SplatValue && TLI->has(LibFunc::memset) &&
+ // Verify that the stored value is loop invariant. If not, we can't
+ // promote the memset.
+ CurLoop->isLoopInvariant(SplatValue)) {
+ // Keep and use SplatValue.
+ PatternValue = 0;
+ } else if (TLI->has(LibFunc::memset_pattern16) &&
+ (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+ // It looks like we can use PatternValue!
+ SplatValue = 0;
+ } else {
+ // Otherwise, this isn't an idiom we can transform. For example, we can't
+ // do anything with a 3-byte store, for example.
+ return false;
+ }
+
+
+ // Okay, we have a strided store "p[i]" of a splattable value. We can turn
+ // this into a memset in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write to the aliased location. Check for an alias.
+ if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef,
+ CurLoop, BECount,
+ StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
+ return false;
+
+ // Okay, everything looks good, insert the memset.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+
+ IRBuilder<> Builder(Preheader->getTerminator());
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. Just insert code for it in the preheader.
+ SCEVExpander Expander(*SE);
+
+ unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
+ Value *BasePtr =
+ Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+ Preheader->getTerminator());
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ true /*no unsigned overflow*/);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ true /*no unsigned overflow*/);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+ Value *NewCall;
+ if (SplatValue)
+ NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
+ else {
+ Module *M = TheStore->getParent()->getParent()->getParent();
+ Value *MSP = M->getOrInsertFunction("memset_pattern16",
+ Builder.getVoidTy(),
+ Builder.getInt8PtrTy(),
+ Builder.getInt8PtrTy(), IntPtr,
+ (void*)0);
+
+ // Otherwise we should form a memset_pattern16. PatternValue is known to be
+ // an constant array of 16-bytes. Plop the value into a mergable global.
+ GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
+ GlobalValue::InternalLinkage,
+ PatternValue, ".memset_pattern");
+ GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setAlignment(16);
+ Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
+ NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
+ }
+
+ DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"
+ << " from store to: " << *Ev << " at: " << *TheStore << "\n");
+ (void)NewCall;
+
+ // Okay, the memset has been formed. Zap the original store and anything that
+ // feeds into it.
+ DeleteDeadInstruction(TheStore, *SE);
+ ++NumMemSet;
+ return true;
+}
+
+/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
+/// same-strided load.
+bool LoopIdiomRecognize::
+processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+ const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv,
+ const SCEV *BECount) {
+ // If we're not allowed to form memcpy, we fail.
+ if (!TLI->has(LibFunc::memcpy))
+ return false;
+
+ LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+
+ // Okay, we have a strided store "p[i]" of a loaded value. We can turn
+ // this into a memcpy in the loop preheader now if we want. However, this
+ // would be unsafe to do if there is anything else in the loop that may read
+ // or write to the stored location (including the load feeding the stores).
+ // Check for an alias.
+ if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef,
+ CurLoop, BECount, StoreSize,
+ getAnalysis<AliasAnalysis>(), SI))
+ return false;
+
+ // For a memcpy, we have to make sure that the input array is not being
+ // mutated by the loop.
+ if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod,
+ CurLoop, BECount, StoreSize,
+ getAnalysis<AliasAnalysis>(), SI))
+ return false;
+
+ // Okay, everything looks good, insert the memcpy.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+
+ IRBuilder<> Builder(Preheader->getTerminator());
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. Just insert code for it in the preheader.
+ SCEVExpander Expander(*SE);
+
+ Value *LoadBasePtr =
+ Expander.expandCodeFor(LoadEv->getStart(),
+ Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
+ Preheader->getTerminator());
+ Value *StoreBasePtr =
+ Expander.expandCodeFor(StoreEv->getStart(),
+ Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
+ Preheader->getTerminator());
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ true /*no unsigned overflow*/);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ true /*no unsigned overflow*/);
+
+ Value *NumBytes =
+ Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+
+ Value *NewCall =
+ Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+ std::min(SI->getAlignment(), LI->getAlignment()));
+
+ DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n"
+ << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+ << " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+ (void)NewCall;
+
+ // Okay, the memset has been formed. Zap the original store and anything that
+ // feeds into it.
+ DeleteDeadInstruction(SI, *SE);
+ ++NumMemCpy;
+ return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp
deleted file mode 100644
index a433674..0000000
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ /dev/null
@@ -1,1270 +0,0 @@
-//===- LoopIndexSplit.cpp - Loop Index Splitting Pass ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements Loop Index Splitting Pass. This pass handles three
-// kinds of loops.
-//
-// [1] A loop may be eliminated if the body is executed exactly once.
-// For example,
-//
-// for (i = 0; i < N; ++i) {
-// if (i == X) {
-// body;
-// }
-// }
-//
-// is transformed to
-//
-// i = X;
-// body;
-//
-// [2] A loop's iteration space may be shrunk if the loop body is executed
-// for a proper sub-range of the loop's iteration space. For example,
-//
-// for (i = 0; i < N; ++i) {
-// if (i > A && i < B) {
-// ...
-// }
-// }
-//
-// is transformed to iterators from A to B, if A > 0 and B < N.
-//
-// [3] A loop may be split if the loop body is dominated by a branch.
-// For example,
-//
-// for (i = LB; i < UB; ++i) { if (i < SV) A; else B; }
-//
-// is transformed into
-//
-// AEV = BSV = SV
-// for (i = LB; i < min(UB, AEV); ++i)
-// A;
-// for (i = max(LB, BSV); i < UB; ++i);
-// B;
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loop-index-split"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Statistic.h"
-
-using namespace llvm;
-
-STATISTIC(NumIndexSplit, "Number of loop index split");
-STATISTIC(NumIndexSplitRemoved, "Number of loops eliminated by loop index split");
-STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted");
-
-namespace {
-
- class LoopIndexSplit : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopIndexSplit() : LoopPass(ID) {}
-
- // Index split Loop L. Return true if loop is split.
- bool runOnLoop(Loop *L, LPPassManager &LPM);
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<ScalarEvolution>();
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>();
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
- }
-
- private:
- /// processOneIterationLoop -- Eliminate loop if loop body is executed
- /// only once. For example,
- /// for (i = 0; i < N; ++i) {
- /// if ( i == X) {
- /// ...
- /// }
- /// }
- ///
- bool processOneIterationLoop();
-
- // -- Routines used by updateLoopIterationSpace();
-
- /// updateLoopIterationSpace -- Update loop's iteration space if loop
- /// body is executed for certain IV range only. For example,
- ///
- /// for (i = 0; i < N; ++i) {
- /// if ( i > A && i < B) {
- /// ...
- /// }
- /// }
- /// is transformed to iterators from A to B, if A > 0 and B < N.
- ///
- bool updateLoopIterationSpace();
-
- /// restrictLoopBound - Op dominates loop body. Op compares an IV based value
- /// with a loop invariant value. Update loop's lower and upper bound based on
- /// the loop invariant value.
- bool restrictLoopBound(ICmpInst &Op);
-
- // --- Routines used by splitLoop(). --- /
-
- bool splitLoop();
-
- /// removeBlocks - Remove basic block DeadBB and all blocks dominated by
- /// DeadBB. This routine is used to remove split condition's dead branch,
- /// dominated by DeadBB. LiveBB dominates split conidition's other branch.
- void removeBlocks(BasicBlock *DeadBB, Loop *LP, BasicBlock *LiveBB);
-
- /// moveExitCondition - Move exit condition EC into split condition block.
- void moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
- BasicBlock *ExitBB, ICmpInst *EC, ICmpInst *SC,
- PHINode *IV, Instruction *IVAdd, Loop *LP,
- unsigned);
-
- /// updatePHINodes - CFG has been changed.
- /// Before
- /// - ExitBB's single predecessor was Latch
- /// - Latch's second successor was Header
- /// Now
- /// - ExitBB's single predecessor was Header
- /// - Latch's one and only successor was Header
- ///
- /// Update ExitBB PHINodes' to reflect this change.
- void updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch,
- BasicBlock *Header,
- PHINode *IV, Instruction *IVIncrement, Loop *LP);
-
- // --- Utility routines --- /
-
- /// cleanBlock - A block is considered clean if all non terminal
- /// instructions are either PHINodes or IV based values.
- bool cleanBlock(BasicBlock *BB);
-
- /// IVisLT - If Op is comparing IV based value with an loop invariant and
- /// IV based value is less than the loop invariant then return the loop
- /// invariant. Otherwise return NULL.
- Value * IVisLT(ICmpInst &Op);
-
- /// IVisLE - If Op is comparing IV based value with an loop invariant and
- /// IV based value is less than or equal to the loop invariant then
- /// return the loop invariant. Otherwise return NULL.
- Value * IVisLE(ICmpInst &Op);
-
- /// IVisGT - If Op is comparing IV based value with an loop invariant and
- /// IV based value is greater than the loop invariant then return the loop
- /// invariant. Otherwise return NULL.
- Value * IVisGT(ICmpInst &Op);
-
- /// IVisGE - If Op is comparing IV based value with an loop invariant and
- /// IV based value is greater than or equal to the loop invariant then
- /// return the loop invariant. Otherwise return NULL.
- Value * IVisGE(ICmpInst &Op);
-
- private:
-
- // Current Loop information.
- Loop *L;
- LPPassManager *LPM;
- LoopInfo *LI;
- DominatorTree *DT;
- DominanceFrontier *DF;
-
- PHINode *IndVar;
- ICmpInst *ExitCondition;
- ICmpInst *SplitCondition;
- Value *IVStartValue;
- Value *IVExitValue;
- Instruction *IVIncrement;
- SmallPtrSet<Value *, 4> IVBasedValues;
- };
-}
-
-char LoopIndexSplit::ID = 0;
-INITIALIZE_PASS(LoopIndexSplit, "loop-index-split",
- "Index Split Loops", false, false);
-
-Pass *llvm::createLoopIndexSplitPass() {
- return new LoopIndexSplit();
-}
-
-// Index split Loop L. Return true if loop is split.
-bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
- L = IncomingLoop;
- LPM = &LPM_Ref;
-
- // If LoopSimplify form is not available, stay out of trouble.
- if (!L->isLoopSimplifyForm())
- return false;
-
- // FIXME - Nested loops make dominator info updates tricky.
- if (!L->getSubLoops().empty())
- return false;
-
- DT = &getAnalysis<DominatorTree>();
- LI = &getAnalysis<LoopInfo>();
- DF = &getAnalysis<DominanceFrontier>();
-
- // Initialize loop data.
- IndVar = L->getCanonicalInductionVariable();
- if (!IndVar) return false;
-
- bool P1InLoop = L->contains(IndVar->getIncomingBlock(1));
- IVStartValue = IndVar->getIncomingValue(!P1InLoop);
- IVIncrement = dyn_cast<Instruction>(IndVar->getIncomingValue(P1InLoop));
- if (!IVIncrement) return false;
-
- IVBasedValues.clear();
- IVBasedValues.insert(IndVar);
- IVBasedValues.insert(IVIncrement);
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I)
- for(BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end();
- BI != BE; ++BI) {
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BI))
- if (BO != IVIncrement
- && (BO->getOpcode() == Instruction::Add
- || BO->getOpcode() == Instruction::Sub))
- if (IVBasedValues.count(BO->getOperand(0))
- && L->isLoopInvariant(BO->getOperand(1)))
- IVBasedValues.insert(BO);
- }
-
- // Reject loop if loop exit condition is not suitable.
- BasicBlock *ExitingBlock = L->getExitingBlock();
- if (!ExitingBlock)
- return false;
- BranchInst *EBR = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!EBR) return false;
- ExitCondition = dyn_cast<ICmpInst>(EBR->getCondition());
- if (!ExitCondition) return false;
- if (ExitingBlock != L->getLoopLatch()) return false;
- IVExitValue = ExitCondition->getOperand(1);
- if (!L->isLoopInvariant(IVExitValue))
- IVExitValue = ExitCondition->getOperand(0);
- if (!L->isLoopInvariant(IVExitValue))
- return false;
- if (!IVBasedValues.count(
- ExitCondition->getOperand(IVExitValue == ExitCondition->getOperand(0))))
- return false;
-
- // If start value is more then exit value where induction variable
- // increments by 1 then we are potentially dealing with an infinite loop.
- // Do not index split this loop.
- if (ConstantInt *SV = dyn_cast<ConstantInt>(IVStartValue))
- if (ConstantInt *EV = dyn_cast<ConstantInt>(IVExitValue))
- if (SV->getSExtValue() > EV->getSExtValue())
- return false;
-
- if (processOneIterationLoop())
- return true;
-
- if (updateLoopIterationSpace())
- return true;
-
- if (splitLoop())
- return true;
-
- return false;
-}
-
-// --- Helper routines ---
-// isUsedOutsideLoop - Returns true iff V is used outside the loop L.
-static bool isUsedOutsideLoop(Value *V, Loop *L) {
- for(Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
- if (!L->contains(cast<Instruction>(*UI)))
- return true;
- return false;
-}
-
-// Return V+1
-static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt,
- LLVMContext &Context) {
- Constant *One = ConstantInt::get(V->getType(), 1, Sign);
- return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
-}
-
-// Return V-1
-static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt,
- LLVMContext &Context) {
- Constant *One = ConstantInt::get(V->getType(), 1, Sign);
- return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
-}
-
-// Return min(V1, V1)
-static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
-
- Value *C = new ICmpInst(InsertPt,
- Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- V1, V2, "lsp");
- return SelectInst::Create(C, V1, V2, "lsp", InsertPt);
-}
-
-// Return max(V1, V2)
-static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) {
-
- Value *C = new ICmpInst(InsertPt,
- Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
- V1, V2, "lsp");
- return SelectInst::Create(C, V2, V1, "lsp", InsertPt);
-}
-
-/// processOneIterationLoop -- Eliminate loop if loop body is executed
-/// only once. For example,
-/// for (i = 0; i < N; ++i) {
-/// if ( i == X) {
-/// ...
-/// }
-/// }
-///
-bool LoopIndexSplit::processOneIterationLoop() {
- SplitCondition = NULL;
- BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *Header = L->getHeader();
- BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
- if (!BR) return false;
- if (!isa<BranchInst>(Latch->getTerminator())) return false;
- if (BR->isUnconditional()) return false;
- SplitCondition = dyn_cast<ICmpInst>(BR->getCondition());
- if (!SplitCondition) return false;
- if (SplitCondition == ExitCondition) return false;
- if (SplitCondition->getPredicate() != ICmpInst::ICMP_EQ) return false;
- if (BR->getOperand(1) != Latch) return false;
- if (!IVBasedValues.count(SplitCondition->getOperand(0))
- && !IVBasedValues.count(SplitCondition->getOperand(1)))
- return false;
-
- // If IV is used outside the loop then this loop traversal is required.
- // FIXME: Calculate and use last IV value.
- if (isUsedOutsideLoop(IVIncrement, L))
- return false;
-
- // If BR operands are not IV or not loop invariants then skip this loop.
- Value *OPV = SplitCondition->getOperand(0);
- Value *SplitValue = SplitCondition->getOperand(1);
- if (!L->isLoopInvariant(SplitValue))
- std::swap(OPV, SplitValue);
- if (!L->isLoopInvariant(SplitValue))
- return false;
- Instruction *OPI = dyn_cast<Instruction>(OPV);
- if (!OPI)
- return false;
- if (OPI->getParent() != Header || isUsedOutsideLoop(OPI, L))
- return false;
- Value *StartValue = IVStartValue;
- Value *ExitValue = IVExitValue;;
-
- if (OPV != IndVar) {
- // If BR operand is IV based then use this operand to calculate
- // effective conditions for loop body.
- BinaryOperator *BOPV = dyn_cast<BinaryOperator>(OPV);
- if (!BOPV)
- return false;
- if (BOPV->getOpcode() != Instruction::Add)
- return false;
- StartValue = BinaryOperator::CreateAdd(OPV, StartValue, "" , BR);
- ExitValue = BinaryOperator::CreateAdd(OPV, ExitValue, "" , BR);
- }
-
- if (!cleanBlock(Header))
- return false;
-
- if (!cleanBlock(Latch))
- return false;
-
- // If the merge point for BR is not loop latch then skip this loop.
- if (BR->getSuccessor(0) != Latch) {
- DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
- assert (DF0 != DF->end() && "Unable to find dominance frontier");
- if (!DF0->second.count(Latch))
- return false;
- }
-
- if (BR->getSuccessor(1) != Latch) {
- DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
- assert (DF1 != DF->end() && "Unable to find dominance frontier");
- if (!DF1->second.count(Latch))
- return false;
- }
-
- // Now, Current loop L contains compare instruction
- // that compares induction variable, IndVar, against loop invariant. And
- // entire (i.e. meaningful) loop body is dominated by this compare
- // instruction. In such case eliminate
- // loop structure surrounding this loop body. For example,
- // for (int i = start; i < end; ++i) {
- // if ( i == somevalue) {
- // loop_body
- // }
- // }
- // can be transformed into
- // if (somevalue >= start && somevalue < end) {
- // i = somevalue;
- // loop_body
- // }
-
- // Replace index variable with split value in loop body. Loop body is executed
- // only when index variable is equal to split value.
- IndVar->replaceAllUsesWith(SplitValue);
-
- // Replace split condition in header.
- // Transform
- // SplitCondition : icmp eq i32 IndVar, SplitValue
- // into
- // c1 = icmp uge i32 SplitValue, StartValue
- // c2 = icmp ult i32 SplitValue, ExitValue
- // and i32 c1, c2
- Instruction *C1 = new ICmpInst(BR, ExitCondition->isSigned() ?
- ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
- SplitValue, StartValue, "lisplit");
-
- CmpInst::Predicate C2P = ExitCondition->getPredicate();
- BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- if (LatchBR->getOperand(1) != Header)
- C2P = CmpInst::getInversePredicate(C2P);
- Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit");
- Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR);
-
- SplitCondition->replaceAllUsesWith(NSplitCond);
- SplitCondition->eraseFromParent();
-
- // Remove Latch to Header edge.
- BasicBlock *LatchSucc = NULL;
- Header->removePredecessor(Latch);
- for (succ_iterator SI = succ_begin(Latch), E = succ_end(Latch);
- SI != E; ++SI) {
- if (Header != *SI)
- LatchSucc = *SI;
- }
-
- // Clean up latch block.
- Value *LatchBRCond = LatchBR->getCondition();
- LatchBR->setUnconditionalDest(LatchSucc);
- RecursivelyDeleteTriviallyDeadInstructions(LatchBRCond);
-
- LPM->deleteLoopFromQueue(L);
-
- // Update Dominator Info.
- // Only CFG change done is to remove Latch to Header edge. This
- // does not change dominator tree because Latch did not dominate
- // Header.
- if (DF) {
- DominanceFrontier::iterator HeaderDF = DF->find(Header);
- if (HeaderDF != DF->end())
- DF->removeFromFrontier(HeaderDF, Header);
-
- DominanceFrontier::iterator LatchDF = DF->find(Latch);
- if (LatchDF != DF->end())
- DF->removeFromFrontier(LatchDF, Header);
- }
-
- ++NumIndexSplitRemoved;
- return true;
-}
-
-/// restrictLoopBound - Op dominates loop body. Op compares an IV based value
-/// with a loop invariant value. Update loop's lower and upper bound based on
-/// the loop invariant value.
-bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) {
- bool Sign = Op.isSigned();
- Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
-
- if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
- BranchInst *EBR =
- cast<BranchInst>(ExitCondition->getParent()->getTerminator());
- ExitCondition->setPredicate(ExitCondition->getInversePredicate());
- BasicBlock *T = EBR->getSuccessor(0);
- EBR->setSuccessor(0, EBR->getSuccessor(1));
- EBR->setSuccessor(1, T);
- }
-
- LLVMContext &Context = Op.getContext();
-
- // New upper and lower bounds.
- Value *NLB = NULL;
- Value *NUB = NULL;
- if (Value *V = IVisLT(Op)) {
- // Restrict upper bound.
- if (IVisLE(*ExitCondition))
- V = getMinusOne(V, Sign, PHTerm, Context);
- NUB = getMin(V, IVExitValue, Sign, PHTerm);
- } else if (Value *V = IVisLE(Op)) {
- // Restrict upper bound.
- if (IVisLT(*ExitCondition))
- V = getPlusOne(V, Sign, PHTerm, Context);
- NUB = getMin(V, IVExitValue, Sign, PHTerm);
- } else if (Value *V = IVisGT(Op)) {
- // Restrict lower bound.
- V = getPlusOne(V, Sign, PHTerm, Context);
- NLB = getMax(V, IVStartValue, Sign, PHTerm);
- } else if (Value *V = IVisGE(Op))
- // Restrict lower bound.
- NLB = getMax(V, IVStartValue, Sign, PHTerm);
-
- if (!NLB && !NUB)
- return false;
-
- if (NLB) {
- unsigned i = IndVar->getBasicBlockIndex(L->getLoopPreheader());
- IndVar->setIncomingValue(i, NLB);
- }
-
- if (NUB) {
- unsigned i = (ExitCondition->getOperand(0) != IVExitValue);
- ExitCondition->setOperand(i, NUB);
- }
- return true;
-}
-
-/// updateLoopIterationSpace -- Update loop's iteration space if loop
-/// body is executed for certain IV range only. For example,
-///
-/// for (i = 0; i < N; ++i) {
-/// if ( i > A && i < B) {
-/// ...
-/// }
-/// }
-/// is transformed to iterators from A to B, if A > 0 and B < N.
-///
-bool LoopIndexSplit::updateLoopIterationSpace() {
- SplitCondition = NULL;
- if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
- || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
- return false;
- BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *Header = L->getHeader();
- BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
- if (!BR) return false;
- if (!isa<BranchInst>(Latch->getTerminator())) return false;
- if (BR->isUnconditional()) return false;
- BinaryOperator *AND = dyn_cast<BinaryOperator>(BR->getCondition());
- if (!AND) return false;
- if (AND->getOpcode() != Instruction::And) return false;
- ICmpInst *Op0 = dyn_cast<ICmpInst>(AND->getOperand(0));
- ICmpInst *Op1 = dyn_cast<ICmpInst>(AND->getOperand(1));
- if (!Op0 || !Op1)
- return false;
- IVBasedValues.insert(AND);
- IVBasedValues.insert(Op0);
- IVBasedValues.insert(Op1);
- if (!cleanBlock(Header)) return false;
- BasicBlock *ExitingBlock = ExitCondition->getParent();
- if (!cleanBlock(ExitingBlock)) return false;
-
- // If the merge point for BR is not loop latch then skip this loop.
- if (BR->getSuccessor(0) != Latch) {
- DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
- assert (DF0 != DF->end() && "Unable to find dominance frontier");
- if (!DF0->second.count(Latch))
- return false;
- }
-
- if (BR->getSuccessor(1) != Latch) {
- DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
- assert (DF1 != DF->end() && "Unable to find dominance frontier");
- if (!DF1->second.count(Latch))
- return false;
- }
-
- // Verify that loop exiting block has only two predecessor, where one pred
- // is split condition block. The other predecessor will become exiting block's
- // dominator after CFG is updated. TODO : Handle CFG's where exiting block has
- // more then two predecessors. This requires extra work in updating dominator
- // information.
- BasicBlock *ExitingBBPred = NULL;
- for (pred_iterator PI = pred_begin(ExitingBlock), PE = pred_end(ExitingBlock);
- PI != PE; ++PI) {
- BasicBlock *BB = *PI;
- if (Header == BB)
- continue;
- if (ExitingBBPred)
- return false;
- else
- ExitingBBPred = BB;
- }
-
- if (!restrictLoopBound(*Op0))
- return false;
-
- if (!restrictLoopBound(*Op1))
- return false;
-
- // Update CFG.
- if (BR->getSuccessor(0) == ExitingBlock)
- BR->setUnconditionalDest(BR->getSuccessor(1));
- else
- BR->setUnconditionalDest(BR->getSuccessor(0));
-
- AND->eraseFromParent();
- if (Op0->use_empty())
- Op0->eraseFromParent();
- if (Op1->use_empty())
- Op1->eraseFromParent();
-
- // Update domiantor info. Now, ExitingBlock has only one predecessor,
- // ExitingBBPred, and it is ExitingBlock's immediate domiantor.
- DT->changeImmediateDominator(ExitingBlock, ExitingBBPred);
-
- BasicBlock *ExitBlock = ExitingBlock->getTerminator()->getSuccessor(1);
- if (L->contains(ExitBlock))
- ExitBlock = ExitingBlock->getTerminator()->getSuccessor(0);
-
- // If ExitingBlock is a member of the loop basic blocks' DF list then
- // replace ExitingBlock with header and exit block in the DF list
- DominanceFrontier::iterator ExitingBlockDF = DF->find(ExitingBlock);
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
- if (BB == Header || BB == ExitingBlock)
- continue;
- DominanceFrontier::iterator BBDF = DF->find(BB);
- DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
- DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
- while (DomSetI != DomSetE) {
- DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
- ++DomSetI;
- BasicBlock *DFBB = *CurrentItr;
- if (DFBB == ExitingBlock) {
- BBDF->second.erase(DFBB);
- for (DominanceFrontier::DomSetType::iterator
- EBI = ExitingBlockDF->second.begin(),
- EBE = ExitingBlockDF->second.end(); EBI != EBE; ++EBI)
- BBDF->second.insert(*EBI);
- }
- }
- }
- ++NumRestrictBounds;
- return true;
-}
-
-/// removeBlocks - Remove basic block DeadBB and all blocks dominated by DeadBB.
-/// This routine is used to remove split condition's dead branch, dominated by
-/// DeadBB. LiveBB dominates split conidition's other branch.
-void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
- BasicBlock *LiveBB) {
-
- // First update DeadBB's dominance frontier.
- SmallVector<BasicBlock *, 8> FrontierBBs;
- DominanceFrontier::iterator DeadBBDF = DF->find(DeadBB);
- if (DeadBBDF != DF->end()) {
- SmallVector<BasicBlock *, 8> PredBlocks;
-
- DominanceFrontier::DomSetType DeadBBSet = DeadBBDF->second;
- for (DominanceFrontier::DomSetType::iterator DeadBBSetI = DeadBBSet.begin(),
- DeadBBSetE = DeadBBSet.end(); DeadBBSetI != DeadBBSetE; ++DeadBBSetI)
- {
- BasicBlock *FrontierBB = *DeadBBSetI;
- FrontierBBs.push_back(FrontierBB);
-
- // Rremove any PHI incoming edge from blocks dominated by DeadBB.
- PredBlocks.clear();
- for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (DT->dominates(DeadBB, P))
- PredBlocks.push_back(P);
- }
-
- for(BasicBlock::iterator FBI = FrontierBB->begin(), FBE = FrontierBB->end();
- FBI != FBE; ++FBI) {
- if (PHINode *PN = dyn_cast<PHINode>(FBI)) {
- for(SmallVector<BasicBlock *, 8>::iterator PI = PredBlocks.begin(),
- PE = PredBlocks.end(); PI != PE; ++PI) {
- BasicBlock *P = *PI;
- PN->removeIncomingValue(P);
- }
- }
- else
- break;
- }
- }
- }
-
- // Now remove DeadBB and all nodes dominated by DeadBB in df order.
- SmallVector<BasicBlock *, 32> WorkList;
- DomTreeNode *DN = DT->getNode(DeadBB);
- for (df_iterator<DomTreeNode*> DI = df_begin(DN),
- E = df_end(DN); DI != E; ++DI) {
- BasicBlock *BB = DI->getBlock();
- WorkList.push_back(BB);
- BB->replaceAllUsesWith(UndefValue::get(
- Type::getLabelTy(DeadBB->getContext())));
- }
-
- while (!WorkList.empty()) {
- BasicBlock *BB = WorkList.pop_back_val();
- LPM->deleteSimpleAnalysisValue(BB, LP);
- for(BasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
- BBI != BBE; ) {
- Instruction *I = BBI;
- ++BBI;
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- LPM->deleteSimpleAnalysisValue(I, LP);
- I->eraseFromParent();
- }
- DT->eraseNode(BB);
- DF->removeBlock(BB);
- LI->removeBlock(BB);
- BB->eraseFromParent();
- }
-
- // Update Frontier BBs' dominator info.
- while (!FrontierBBs.empty()) {
- BasicBlock *FBB = FrontierBBs.pop_back_val();
- BasicBlock *NewDominator = FBB->getSinglePredecessor();
- if (!NewDominator) {
- pred_iterator PI = pred_begin(FBB), PE = pred_end(FBB);
- NewDominator = *PI;
- ++PI;
- if (NewDominator != LiveBB) {
- for(; PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (P == LiveBB) {
- NewDominator = LiveBB;
- break;
- }
- NewDominator = DT->findNearestCommonDominator(NewDominator, P);
- }
- }
- }
- assert (NewDominator && "Unable to fix dominator info.");
- DT->changeImmediateDominator(FBB, NewDominator);
- DF->changeImmediateDominator(FBB, NewDominator, DT);
- }
-
-}
-
-// moveExitCondition - Move exit condition EC into split condition block CondBB.
-void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
- BasicBlock *ExitBB, ICmpInst *EC,
- ICmpInst *SC, PHINode *IV,
- Instruction *IVAdd, Loop *LP,
- unsigned ExitValueNum) {
-
- BasicBlock *ExitingBB = EC->getParent();
- Instruction *CurrentBR = CondBB->getTerminator();
-
- // Move exit condition into split condition block.
- EC->moveBefore(CurrentBR);
- EC->setOperand(ExitValueNum == 0 ? 1 : 0, IV);
-
- // Move exiting block's branch into split condition block. Update its branch
- // destination.
- BranchInst *ExitingBR = cast<BranchInst>(ExitingBB->getTerminator());
- ExitingBR->moveBefore(CurrentBR);
- BasicBlock *OrigDestBB = NULL;
- if (ExitingBR->getSuccessor(0) == ExitBB) {
- OrigDestBB = ExitingBR->getSuccessor(1);
- ExitingBR->setSuccessor(1, ActiveBB);
- }
- else {
- OrigDestBB = ExitingBR->getSuccessor(0);
- ExitingBR->setSuccessor(0, ActiveBB);
- }
-
- // Remove split condition and current split condition branch.
- SC->eraseFromParent();
- CurrentBR->eraseFromParent();
-
- // Connect exiting block to original destination.
- BranchInst::Create(OrigDestBB, ExitingBB);
-
- // Update PHINodes
- updatePHINodes(ExitBB, ExitingBB, CondBB, IV, IVAdd, LP);
-
- // Fix dominator info.
- // ExitBB is now dominated by CondBB
- DT->changeImmediateDominator(ExitBB, CondBB);
- DF->changeImmediateDominator(ExitBB, CondBB, DT);
-
- // Blocks outside the loop may have been in the dominance frontier of blocks
- // inside the condition; this is now impossible because the blocks inside the
- // condition no loger dominate the exit. Remove the relevant blocks from
- // the dominance frontiers.
- for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end();
- I != E; ++I) {
- if (!DT->properlyDominates(CondBB, *I)) continue;
- DominanceFrontier::iterator BBDF = DF->find(*I);
- DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
- DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
- while (DomSetI != DomSetE) {
- DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
- ++DomSetI;
- BasicBlock *DFBB = *CurrentItr;
- if (!LP->contains(DFBB))
- BBDF->second.erase(DFBB);
- }
- }
-}
-
-/// updatePHINodes - CFG has been changed.
-/// Before
-/// - ExitBB's single predecessor was Latch
-/// - Latch's second successor was Header
-/// Now
-/// - ExitBB's single predecessor is Header
-/// - Latch's one and only successor is Header
-///
-/// Update ExitBB PHINodes' to reflect this change.
-void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch,
- BasicBlock *Header,
- PHINode *IV, Instruction *IVIncrement,
- Loop *LP) {
-
- for (BasicBlock::iterator BI = ExitBB->begin(), BE = ExitBB->end();
- BI != BE; ) {
- PHINode *PN = dyn_cast<PHINode>(BI);
- ++BI;
- if (!PN)
- break;
-
- Value *V = PN->getIncomingValueForBlock(Latch);
- if (PHINode *PHV = dyn_cast<PHINode>(V)) {
- // PHV is in Latch. PHV has one use is in ExitBB PHINode. And one use
- // in Header which is new incoming value for PN.
- Value *NewV = NULL;
- for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end();
- UI != E; ++UI)
- if (PHINode *U = dyn_cast<PHINode>(*UI))
- if (LP->contains(U)) {
- NewV = U;
- break;
- }
-
- // Add incoming value from header only if PN has any use inside the loop.
- if (NewV)
- PN->addIncoming(NewV, Header);
-
- } else if (Instruction *PHI = dyn_cast<Instruction>(V)) {
- // If this instruction is IVIncrement then IV is new incoming value
- // from header otherwise this instruction must be incoming value from
- // header because loop is in LCSSA form.
- if (PHI == IVIncrement)
- PN->addIncoming(IV, Header);
- else
- PN->addIncoming(V, Header);
- } else
- // Otherwise this is an incoming value from header because loop is in
- // LCSSA form.
- PN->addIncoming(V, Header);
-
- // Remove incoming value from Latch.
- PN->removeIncomingValue(Latch);
- }
-}
-
-bool LoopIndexSplit::splitLoop() {
- SplitCondition = NULL;
- if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
- || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
- return false;
- BasicBlock *Header = L->getHeader();
- BasicBlock *Latch = L->getLoopLatch();
- BranchInst *SBR = NULL; // Split Condition Branch
- BranchInst *EBR = cast<BranchInst>(ExitCondition->getParent()->getTerminator());
- // If Exiting block includes loop variant instructions then this
- // loop may not be split safely.
- BasicBlock *ExitingBlock = ExitCondition->getParent();
- if (!cleanBlock(ExitingBlock)) return false;
-
- LLVMContext &Context = Header->getContext();
-
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator());
- if (!BR || BR->isUnconditional()) continue;
- ICmpInst *CI = dyn_cast<ICmpInst>(BR->getCondition());
- if (!CI || CI == ExitCondition
- || CI->getPredicate() == ICmpInst::ICMP_NE
- || CI->getPredicate() == ICmpInst::ICMP_EQ)
- continue;
-
- // Unable to handle triangle loops at the moment.
- // In triangle loop, split condition is in header and one of the
- // the split destination is loop latch. If split condition is EQ
- // then such loops are already handle in processOneIterationLoop().
- if (Header == (*I)
- && (Latch == BR->getSuccessor(0) || Latch == BR->getSuccessor(1)))
- continue;
-
- // If the block does not dominate the latch then this is not a diamond.
- // Such loop may not benefit from index split.
- if (!DT->dominates((*I), Latch))
- continue;
-
- // If split condition branches heads do not have single predecessor,
- // SplitCondBlock, then is not possible to remove inactive branch.
- if (!BR->getSuccessor(0)->getSinglePredecessor()
- || !BR->getSuccessor(1)->getSinglePredecessor())
- return false;
-
- // If the merge point for BR is not loop latch then skip this condition.
- if (BR->getSuccessor(0) != Latch) {
- DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
- assert (DF0 != DF->end() && "Unable to find dominance frontier");
- if (!DF0->second.count(Latch))
- continue;
- }
-
- if (BR->getSuccessor(1) != Latch) {
- DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
- assert (DF1 != DF->end() && "Unable to find dominance frontier");
- if (!DF1->second.count(Latch))
- continue;
- }
- SplitCondition = CI;
- SBR = BR;
- break;
- }
-
- if (!SplitCondition)
- return false;
-
- // If the predicate sign does not match then skip.
- if (ExitCondition->isSigned() != SplitCondition->isSigned())
- return false;
-
- unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue);
- unsigned SVOpNum = IVBasedValues.count(SplitCondition->getOperand(0));
- Value *SplitValue = SplitCondition->getOperand(SVOpNum);
- if (!L->isLoopInvariant(SplitValue))
- return false;
- if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum)))
- return false;
-
- // Check for side effects.
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
-
- assert(DT->dominates(Header, BB));
- if (DT->properlyDominates(SplitCondition->getParent(), BB))
- continue;
-
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE; ++BI) {
- Instruction *Inst = BI;
-
- if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst)
- && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst))
- return false;
- }
- }
-
- // Normalize loop conditions so that it is easier to calculate new loop
- // bounds.
- if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
- ExitCondition->setPredicate(ExitCondition->getInversePredicate());
- BasicBlock *T = EBR->getSuccessor(0);
- EBR->setSuccessor(0, EBR->getSuccessor(1));
- EBR->setSuccessor(1, T);
- }
-
- if (IVisGT(*SplitCondition) || IVisGE(*SplitCondition)) {
- SplitCondition->setPredicate(SplitCondition->getInversePredicate());
- BasicBlock *T = SBR->getSuccessor(0);
- SBR->setSuccessor(0, SBR->getSuccessor(1));
- SBR->setSuccessor(1, T);
- }
-
- //[*] Calculate new loop bounds.
- Value *AEV = SplitValue;
- Value *BSV = SplitValue;
- bool Sign = SplitCondition->isSigned();
- Instruction *PHTerm = L->getLoopPreheader()->getTerminator();
-
- if (IVisLT(*ExitCondition)) {
- if (IVisLT(*SplitCondition)) {
- /* Do nothing */
- }
- else if (IVisLE(*SplitCondition)) {
- AEV = getPlusOne(SplitValue, Sign, PHTerm, Context);
- BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
- } else {
- assert (0 && "Unexpected split condition!");
- }
- }
- else if (IVisLE(*ExitCondition)) {
- if (IVisLT(*SplitCondition)) {
- AEV = getMinusOne(SplitValue, Sign, PHTerm, Context);
- }
- else if (IVisLE(*SplitCondition)) {
- BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
- } else {
- assert (0 && "Unexpected split condition!");
- }
- } else {
- assert (0 && "Unexpected exit condition!");
- }
- AEV = getMin(AEV, IVExitValue, Sign, PHTerm);
- BSV = getMax(BSV, IVStartValue, Sign, PHTerm);
-
- // [*] Clone Loop
- ValueMap<const Value *, Value *> VMap;
- Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this);
- Loop *ALoop = L;
-
- // [*] ALoop's exiting edge enters BLoop's header.
- // ALoop's original exit block becomes BLoop's exit block.
- PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]);
- BasicBlock *A_ExitingBlock = ExitCondition->getParent();
- BranchInst *A_ExitInsn =
- dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
- assert (A_ExitInsn && "Unable to find suitable loop exit branch");
- BasicBlock *B_ExitBlock = A_ExitInsn->getSuccessor(1);
- BasicBlock *B_Header = BLoop->getHeader();
- if (ALoop->contains(B_ExitBlock)) {
- B_ExitBlock = A_ExitInsn->getSuccessor(0);
- A_ExitInsn->setSuccessor(0, B_Header);
- } else
- A_ExitInsn->setSuccessor(1, B_Header);
-
- // [*] Update ALoop's exit value using new exit value.
- ExitCondition->setOperand(EVOpNum, AEV);
-
- // [*] Update BLoop's header phi nodes. Remove incoming PHINode's from
- // original loop's preheader. Add incoming PHINode values from
- // ALoop's exiting block. Update BLoop header's domiantor info.
-
- // Collect inverse map of Header PHINodes.
- DenseMap<Value *, Value *> InverseMap;
- for (BasicBlock::iterator BI = ALoop->getHeader()->begin(),
- BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- PHINode *PNClone = cast<PHINode>(VMap[PN]);
- InverseMap[PNClone] = PN;
- } else
- break;
- }
-
- BasicBlock *A_Preheader = ALoop->getLoopPreheader();
- for (BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
- BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- // Remove incoming value from original preheader.
- PN->removeIncomingValue(A_Preheader);
-
- // Add incoming value from A_ExitingBlock.
- if (PN == B_IndVar)
- PN->addIncoming(BSV, A_ExitingBlock);
- else {
- PHINode *OrigPN = cast<PHINode>(InverseMap[PN]);
- Value *V2 = NULL;
- // If loop header is also loop exiting block then
- // OrigPN is incoming value for B loop header.
- if (A_ExitingBlock == ALoop->getHeader())
- V2 = OrigPN;
- else
- V2 = OrigPN->getIncomingValueForBlock(A_ExitingBlock);
- PN->addIncoming(V2, A_ExitingBlock);
- }
- } else
- break;
- }
-
- DT->changeImmediateDominator(B_Header, A_ExitingBlock);
- DF->changeImmediateDominator(B_Header, A_ExitingBlock, DT);
-
- // [*] Update BLoop's exit block. Its new predecessor is BLoop's exit
- // block. Remove incoming PHINode values from ALoop's exiting block.
- // Add new incoming values from BLoop's incoming exiting value.
- // Update BLoop exit block's dominator info..
- BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]);
- for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
- BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)],
- B_ExitingBlock);
- PN->removeIncomingValue(A_ExitingBlock);
- } else
- break;
- }
-
- DT->changeImmediateDominator(B_ExitBlock, B_ExitingBlock);
- DF->changeImmediateDominator(B_ExitBlock, B_ExitingBlock, DT);
-
- //[*] Split ALoop's exit edge. This creates a new block which
- // serves two purposes. First one is to hold PHINode defnitions
- // to ensure that ALoop's LCSSA form. Second use it to act
- // as a preheader for BLoop.
- BasicBlock *A_ExitBlock = SplitEdge(A_ExitingBlock, B_Header, this);
-
- //[*] Preserve ALoop's LCSSA form. Create new forwarding PHINodes
- // in A_ExitBlock to redefine outgoing PHI definitions from ALoop.
- for(BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
- BI != BE; ++BI) {
- if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- Value *V1 = PN->getIncomingValueForBlock(A_ExitBlock);
- PHINode *newPHI = PHINode::Create(PN->getType(), PN->getName());
- newPHI->addIncoming(V1, A_ExitingBlock);
- A_ExitBlock->getInstList().push_front(newPHI);
- PN->removeIncomingValue(A_ExitBlock);
- PN->addIncoming(newPHI, A_ExitBlock);
- } else
- break;
- }
-
- //[*] Eliminate split condition's inactive branch from ALoop.
- BasicBlock *A_SplitCondBlock = SplitCondition->getParent();
- BranchInst *A_BR = cast<BranchInst>(A_SplitCondBlock->getTerminator());
- BasicBlock *A_InactiveBranch = NULL;
- BasicBlock *A_ActiveBranch = NULL;
- A_ActiveBranch = A_BR->getSuccessor(0);
- A_InactiveBranch = A_BR->getSuccessor(1);
- A_BR->setUnconditionalDest(A_ActiveBranch);
- removeBlocks(A_InactiveBranch, L, A_ActiveBranch);
-
- //[*] Eliminate split condition's inactive branch in from BLoop.
- BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]);
- BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
- BasicBlock *B_InactiveBranch = NULL;
- BasicBlock *B_ActiveBranch = NULL;
- B_ActiveBranch = B_BR->getSuccessor(1);
- B_InactiveBranch = B_BR->getSuccessor(0);
- B_BR->setUnconditionalDest(B_ActiveBranch);
- removeBlocks(B_InactiveBranch, BLoop, B_ActiveBranch);
-
- BasicBlock *A_Header = ALoop->getHeader();
- if (A_ExitingBlock == A_Header)
- return true;
-
- //[*] Move exit condition into split condition block to avoid
- // executing dead loop iteration.
- ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]);
- Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]);
- ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]);
-
- moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
- cast<ICmpInst>(SplitCondition), IndVar, IVIncrement,
- ALoop, EVOpNum);
-
- moveExitCondition(B_SplitCondBlock, B_ActiveBranch,
- B_ExitBlock, B_ExitCondition,
- B_SplitCondition, B_IndVar, B_IndVarIncrement,
- BLoop, EVOpNum);
-
- ++NumIndexSplit;
- return true;
-}
-
-/// cleanBlock - A block is considered clean if all non terminal instructions
-/// are either, PHINodes, IV based.
-bool LoopIndexSplit::cleanBlock(BasicBlock *BB) {
- Instruction *Terminator = BB->getTerminator();
- for(BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE; ++BI) {
- Instruction *I = BI;
-
- if (isa<PHINode>(I) || I == Terminator || I == ExitCondition
- || I == SplitCondition || IVBasedValues.count(I)
- || isa<DbgInfoIntrinsic>(I))
- continue;
-
- if (I->mayHaveSideEffects())
- return false;
-
- // I is used only inside this block then it is OK.
- bool usedOutsideBB = false;
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI) {
- Instruction *U = cast<Instruction>(*UI);
- if (U->getParent() != BB)
- usedOutsideBB = true;
- }
- if (!usedOutsideBB)
- continue;
-
- // Otherwise we have a instruction that may not allow loop spliting.
- return false;
- }
- return true;
-}
-
-/// IVisLT - If Op is comparing IV based value with an loop invariant and
-/// IV based value is less than the loop invariant then return the loop
-/// invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisLT(ICmpInst &Op) {
- ICmpInst::Predicate P = Op.getPredicate();
- if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT)
- && IVBasedValues.count(Op.getOperand(0))
- && L->isLoopInvariant(Op.getOperand(1)))
- return Op.getOperand(1);
-
- if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT)
- && IVBasedValues.count(Op.getOperand(1))
- && L->isLoopInvariant(Op.getOperand(0)))
- return Op.getOperand(0);
-
- return NULL;
-}
-
-/// IVisLE - If Op is comparing IV based value with an loop invariant and
-/// IV based value is less than or equal to the loop invariant then
-/// return the loop invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisLE(ICmpInst &Op) {
- ICmpInst::Predicate P = Op.getPredicate();
- if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE)
- && IVBasedValues.count(Op.getOperand(0))
- && L->isLoopInvariant(Op.getOperand(1)))
- return Op.getOperand(1);
-
- if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE)
- && IVBasedValues.count(Op.getOperand(1))
- && L->isLoopInvariant(Op.getOperand(0)))
- return Op.getOperand(0);
-
- return NULL;
-}
-
-/// IVisGT - If Op is comparing IV based value with an loop invariant and
-/// IV based value is greater than the loop invariant then return the loop
-/// invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisGT(ICmpInst &Op) {
- ICmpInst::Predicate P = Op.getPredicate();
- if ((P == ICmpInst::ICMP_SGT || P == ICmpInst::ICMP_UGT)
- && IVBasedValues.count(Op.getOperand(0))
- && L->isLoopInvariant(Op.getOperand(1)))
- return Op.getOperand(1);
-
- if ((P == ICmpInst::ICMP_SLT || P == ICmpInst::ICMP_ULT)
- && IVBasedValues.count(Op.getOperand(1))
- && L->isLoopInvariant(Op.getOperand(0)))
- return Op.getOperand(0);
-
- return NULL;
-}
-
-/// IVisGE - If Op is comparing IV based value with an loop invariant and
-/// IV based value is greater than or equal to the loop invariant then
-/// return the loop invariant. Otherwise return NULL.
-Value * LoopIndexSplit::IVisGE(ICmpInst &Op) {
- ICmpInst::Predicate P = Op.getPredicate();
- if ((P == ICmpInst::ICMP_SGE || P == ICmpInst::ICMP_UGE)
- && IVBasedValues.count(Op.getOperand(0))
- && L->isLoopInvariant(Op.getOperand(1)))
- return Op.getOperand(1);
-
- if ((P == ICmpInst::ICMP_SLE || P == ICmpInst::ICMP_ULE)
- && IVBasedValues.count(Op.getOperand(1))
- && L->isLoopInvariant(Op.getOperand(0)))
- return Op.getOperand(0);
-
- return NULL;
-}
-
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
new file mode 100644
index 0000000..af25c5c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -0,0 +1,170 @@
+//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs lightweight instruction simplification on loop bodies.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-instsimplify"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions simplified");
+
+namespace {
+ class LoopInstSimplify : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopInstSimplify() : LoopPass(ID) {
+ initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop*, LPPassManager&);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreservedID(LCSSAID);
+ AU.addPreserved("scalar-evolution");
+ }
+ };
+}
+
+char LoopInstSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
+ "Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
+ "Simplify instructions in loops", false, false)
+
+Pass *llvm::createLoopInstSimplifyPass() {
+ return new LoopInstSimplify();
+}
+
+bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+ array_pod_sort(ExitBlocks.begin(), ExitBlocks.end());
+
+ SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+
+ // The bit we are stealing from the pointer represents whether this basic
+ // block is the header of a subloop, in which case we only process its phis.
+ typedef PointerIntPair<BasicBlock*, 1> WorklistItem;
+ SmallVector<WorklistItem, 16> VisitStack;
+ SmallPtrSet<BasicBlock*, 32> Visited;
+
+ bool Changed = false;
+ bool LocalChanged;
+ do {
+ LocalChanged = false;
+
+ VisitStack.clear();
+ Visited.clear();
+
+ VisitStack.push_back(WorklistItem(L->getHeader(), false));
+
+ while (!VisitStack.empty()) {
+ WorklistItem Item = VisitStack.pop_back_val();
+ BasicBlock *BB = Item.getPointer();
+ bool IsSubloopHeader = Item.getInt();
+
+ // Simplify instructions in the current basic block.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ Instruction *I = BI++;
+
+ // The first time through the loop ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations ToSimplify is not
+ // empty and we only bother simplifying instructions that are in it.
+ if (!ToSimplify->empty() && !ToSimplify->count(I))
+ continue;
+
+ // Don't bother simplifying unused instructions.
+ if (!I->use_empty()) {
+ Value *V = SimplifyInstruction(I, TD, DT);
+ if (V && LI->replacementPreservesLCSSAForm(I, V)) {
+ // Mark all uses for resimplification next time round the loop.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Next->insert(cast<Instruction>(*UI));
+
+ I->replaceAllUsesWith(V);
+ LocalChanged = true;
+ ++NumSimplified;
+ }
+ }
+ LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I);
+
+ if (IsSubloopHeader && !isa<PHINode>(I))
+ break;
+ }
+
+ // Add all successors to the worklist, except for loop exit blocks and the
+ // bodies of subloops. We visit the headers of loops so that we can process
+ // their phis, but we contract the rest of the subloop body and only follow
+ // edges leading back to the original loop.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+ ++SI) {
+ BasicBlock *SuccBB = *SI;
+ if (!Visited.insert(SuccBB))
+ continue;
+
+ const Loop *SuccLoop = LI->getLoopFor(SuccBB);
+ if (SuccLoop && SuccLoop->getHeader() == SuccBB
+ && L->contains(SuccLoop)) {
+ VisitStack.push_back(WorklistItem(SuccBB, true));
+
+ SmallVector<BasicBlock*, 8> SubLoopExitBlocks;
+ SuccLoop->getExitBlocks(SubLoopExitBlocks);
+
+ for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
+ BasicBlock *ExitBB = SubLoopExitBlocks[i];
+ if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB))
+ VisitStack.push_back(WorklistItem(ExitBB, false));
+ }
+
+ continue;
+ }
+
+ bool IsExitBlock = std::binary_search(ExitBlocks.begin(),
+ ExitBlocks.end(), SuccBB);
+ if (IsExitBlock)
+ continue;
+
+ VisitStack.push_back(WorklistItem(SuccBB, false));
+ }
+ }
+
+ // Place the list of instructions to simplify on the next loop iteration
+ // into ToSimplify.
+ std::swap(ToSimplify, Next);
+ Next->clear();
+
+ Changed |= LocalChanged;
+ } while (LocalChanged);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 65acc1d..95e1578 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -15,16 +15,16 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
using namespace llvm;
#define MAX_HEADER_SIZE 16
@@ -35,16 +35,13 @@ namespace {
class LoopRotate : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotate() : LoopPass(ID) {}
-
- // Rotate Loop L as many times as possible. Return true if
- // loop is rotated at least once.
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ LoopRotate() : LoopPass(ID) {
+ initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ }
// LCSSA form makes instruction renaming easier.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
@@ -54,79 +51,119 @@ namespace {
AU.addPreserved<ScalarEvolution>();
}
- // Helper functions
-
- /// Do actual work
- bool rotateLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool rotateLoop(Loop *L);
- /// Initialize local data
- void initialize();
-
- /// After loop rotation, loop pre-header has multiple sucessors.
- /// Insert one forwarding basic block to ensure that loop pre-header
- /// has only one successor.
- void preserveCanonicalLoopForm(LPPassManager &LPM);
-
private:
- Loop *L;
- BasicBlock *OrigHeader;
- BasicBlock *OrigPreHeader;
- BasicBlock *OrigLatch;
- BasicBlock *NewHeader;
- BasicBlock *Exit;
- LPPassManager *LPM_Ptr;
+ LoopInfo *LI;
};
}
char LoopRotate::ID = 0;
-INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
/// Rotate Loop L as many times as possible. Return true if
/// the loop is rotated at least once.
-bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
-
- bool RotatedOneLoop = false;
- initialize();
- LPM_Ptr = &LPM;
+bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
+ LI = &getAnalysis<LoopInfo>();
// One loop can be rotated multiple times.
- while (rotateLoop(Lp,LPM)) {
- RotatedOneLoop = true;
- initialize();
- }
+ bool MadeChange = false;
+ while (rotateLoop(L))
+ MadeChange = true;
- return RotatedOneLoop;
+ return MadeChange;
}
-/// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
- L = Lp;
-
- OrigPreHeader = L->getLoopPreheader();
- if (!OrigPreHeader) return false;
-
- OrigLatch = L->getLoopLatch();
- if (!OrigLatch) return false;
+/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
+/// old header into the preheader. If there were uses of the values produced by
+/// these instruction that were outside of the loop, we have to insert PHI nodes
+/// to merge the two values. Do this now.
+static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
+ BasicBlock *OrigPreheader,
+ ValueToValueMapTy &ValueMap) {
+ // Remove PHI node entries that are no longer live.
+ BasicBlock::iterator I, E = OrigHeader->end();
+ for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+
+ // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
+ // as necessary.
+ SSAUpdater SSA;
+ for (I = OrigHeader->begin(); I != E; ++I) {
+ Value *OrigHeaderVal = I;
+
+ // If there are no uses of the value (e.g. because it returns void), there
+ // is nothing to rewrite.
+ if (OrigHeaderVal->use_empty())
+ continue;
+
+ Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
- OrigHeader = L->getHeader();
+ // The value now exits in two versions: the initial value in the preheader
+ // and the loop "next" value in the original header.
+ SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+ SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
+ SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
+
+ // Visit each use of the OrigHeader instruction.
+ for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
+ UE = OrigHeaderVal->use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+
+ // SSAUpdater can't handle a non-PHI use in the same block as an
+ // earlier def. We can easily handle those cases manually.
+ Instruction *UserInst = cast<Instruction>(U.getUser());
+ if (!isa<PHINode>(UserInst)) {
+ BasicBlock *UserBB = UserInst->getParent();
+
+ // The original users in the OrigHeader are already using the
+ // original definitions.
+ if (UserBB == OrigHeader)
+ continue;
+
+ // Users in the OrigPreHeader need to use the value to which the
+ // original definitions are mapped.
+ if (UserBB == OrigPreheader) {
+ U = OrigPreHeaderVal;
+ continue;
+ }
+ }
+
+ // Anything else can be handled by SSAUpdater.
+ SSA.RewriteUse(U);
+ }
+ }
+}
+/// Rotate loop LP. Return true if the loop is rotated.
+bool LoopRotate::rotateLoop(Loop *L) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
-
+
+ BasicBlock *OrigHeader = L->getHeader();
+
+ BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+ if (BI == 0 || BI->isUnconditional())
+ return false;
+
// If the loop header is not one of the loop exiting blocks then
// either this loop is already rotated or it is not
// suitable for loop rotation transformations.
if (!L->isLoopExiting(OrigHeader))
return false;
- BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
- if (!BI)
- return false;
- assert(BI->isConditional() && "Branch Instruction is not conditional");
-
// Updating PHInodes in loops with multiple exits adds complexity.
// Keep it simple, and restrict loop rotation to loops with one exit only.
// In future, lift this restriction and support for multiple exits if
@@ -136,24 +173,18 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
if (ExitBlocks.size() > 1)
return false;
- // Check size of original header and reject
- // loop if it is very big.
- unsigned Size = 0;
-
- // FIXME: Use common api to estimate size.
- for (BasicBlock::const_iterator OI = OrigHeader->begin(),
- OE = OrigHeader->end(); OI != OE; ++OI) {
- if (isa<PHINode>(OI))
- continue; // PHI nodes don't count.
- if (isa<DbgInfoIntrinsic>(OI))
- continue; // Debug intrinsics don't count as size.
- ++Size;
+ // Check size of original header and reject loop if it is very big.
+ {
+ CodeMetrics Metrics;
+ Metrics.analyzeBasicBlock(OrigHeader);
+ if (Metrics.NumInsts > MAX_HEADER_SIZE)
+ return false;
}
- if (Size > MAX_HEADER_SIZE)
- return false;
-
// Now, this loop is suitable for rotation.
+ BasicBlock *OrigPreheader = L->getLoopPreheader();
+ BasicBlock *OrigLatch = L->getLoopLatch();
+ assert(OrigPreheader && OrigLatch && "Loop not in canonical form?");
// Anything ScalarEvolution may know about this loop or the PHI nodes
// in its header will soon be invalidated.
@@ -163,8 +194,8 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// Find new Loop header. NewHeader is a Header's one and only successor
// that is inside loop. Header's other successor is outside the
// loop. Otherwise loop is not suitable for rotation.
- Exit = BI->getSuccessor(0);
- NewHeader = BI->getSuccessor(1);
+ BasicBlock *Exit = BI->getSuccessor(0);
+ BasicBlock *NewHeader = BI->getSuccessor(1);
if (L->contains(Exit))
std::swap(Exit, NewHeader);
assert(NewHeader && "Unable to determine new loop header");
@@ -180,20 +211,54 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// Begin by walking OrigHeader and populating ValueMap with an entry for
// each Instruction.
BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
- DenseMap<const Value *, Value *> ValueMap;
+ ValueToValueMapTy ValueMap;
// For PHI nodes, the value available in OldPreHeader is just the
// incoming value from OldPreHeader.
for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
- ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
+ ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
- // For the rest of the instructions, create a clone in the OldPreHeader.
- TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator();
- for (; I != E; ++I) {
- Instruction *C = I->clone();
- C->setName(I->getName());
- C->insertBefore(LoopEntryBranch);
- ValueMap[I] = C;
+ // For the rest of the instructions, either hoist to the OrigPreheader if
+ // possible or create a clone in the OldPreHeader if not.
+ TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
+ while (I != E) {
+ Instruction *Inst = I++;
+
+ // If the instruction's operands are invariant and it doesn't read or write
+ // memory, then it is safe to hoist. Doing this doesn't change the order of
+ // execution in the preheader, but does prevent the instruction from
+ // executing in each iteration of the loop. This means it is safe to hoist
+ // something that might trap, but isn't safe to hoist something that reads
+ // memory (without proving that the loop doesn't write).
+ if (L->hasLoopInvariantOperands(Inst) &&
+ !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
+ !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) {
+ Inst->moveBefore(LoopEntryBranch);
+ continue;
+ }
+
+ // Otherwise, create a duplicate of the instruction.
+ Instruction *C = Inst->clone();
+
+ // Eagerly remap the operands of the instruction.
+ RemapInstruction(C, ValueMap,
+ RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+
+ // With the operands remapped, see if the instruction constant folds or is
+ // otherwise simplifyable. This commonly occurs because the entry from PHI
+ // nodes allows icmps and other instructions to fold.
+ Value *V = SimplifyInstruction(C);
+ if (V && LI->replacementPreservesLCSSAForm(C, V)) {
+ // If so, then delete the temporary instruction and stick the folded value
+ // in the map.
+ delete C;
+ ValueMap[Inst] = V;
+ } else {
+ // Otherwise, stick the new instruction into the new block!
+ C->setName(Inst->getName());
+ C->insertBefore(LoopEntryBranch);
+ ValueMap[Inst] = C;
+ }
}
// Along with all the other instructions, we just cloned OrigHeader's
@@ -203,221 +268,81 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
- PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreHeader);
+ PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
// Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
// OrigPreHeader's old terminator (the original branch into the loop), and
// remove the corresponding incoming values from the PHI nodes in OrigHeader.
LoopEntryBranch->eraseFromParent();
- for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
- PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreHeader));
- // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
- // as necessary.
- SSAUpdater SSA;
- for (I = OrigHeader->begin(); I != E; ++I) {
- Value *OrigHeaderVal = I;
- Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
-
- // The value now exits in two versions: the initial value in the preheader
- // and the loop "next" value in the original header.
- SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
- SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
- SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
-
- // Visit each use of the OrigHeader instruction.
- for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
- UE = OrigHeaderVal->use_end(); UI != UE; ) {
- // Grab the use before incrementing the iterator.
- Use &U = UI.getUse();
-
- // Increment the iterator before removing the use from the list.
- ++UI;
-
- // SSAUpdater can't handle a non-PHI use in the same block as an
- // earlier def. We can easily handle those cases manually.
- Instruction *UserInst = cast<Instruction>(U.getUser());
- if (!isa<PHINode>(UserInst)) {
- BasicBlock *UserBB = UserInst->getParent();
-
- // The original users in the OrigHeader are already using the
- // original definitions.
- if (UserBB == OrigHeader)
- continue;
-
- // Users in the OrigPreHeader need to use the value to which the
- // original definitions are mapped.
- if (UserBB == OrigPreHeader) {
- U = OrigPreHeaderVal;
- continue;
- }
- }
-
- // Anything else can be handled by SSAUpdater.
- SSA.RewriteUse(U);
- }
- }
+ // If there were any uses of instructions in the duplicated block outside the
+ // loop, update them, inserting PHI nodes as required
+ RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
// NewHeader is now the header of the loop.
L->moveToHeader(NewHeader);
+ assert(L->getHeader() == NewHeader && "Latch block is our new header");
- // Move the original header to the bottom of the loop, where it now more
- // naturally belongs. This isn't necessary for correctness, and CodeGen can
- // usually reorder blocks on its own to fix things like this up, but it's
- // still nice to keep the IR readable.
- //
- // The original header should have only one predecessor at this point, since
- // we checked that the loop had a proper preheader and unique backedge before
- // we started.
- assert(OrigHeader->getSinglePredecessor() &&
- "Original loop header has too many predecessors after loop rotation!");
- OrigHeader->moveAfter(OrigHeader->getSinglePredecessor());
-
- // Also, since this original header only has one predecessor, zap its
- // PHI nodes, which are now trivial.
- FoldSingleEntryPHINodes(OrigHeader);
-
- // TODO: We could just go ahead and merge OrigHeader into its predecessor
- // at this point, if we don't mind updating dominator info.
-
- // Establish a new preheader, update dominators, etc.
- preserveCanonicalLoopForm(LPM);
-
- ++NumRotated;
- return true;
-}
-
-/// Initialize local data
-void LoopRotate::initialize() {
- L = NULL;
- OrigHeader = NULL;
- OrigPreHeader = NULL;
- NewHeader = NULL;
- Exit = NULL;
-}
-
-/// After loop rotation, loop pre-header has multiple sucessors.
-/// Insert one forwarding basic block to ensure that loop pre-header
-/// has only one successor.
-void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
-
- // Right now original pre-header has two successors, new header and
- // exit block. Insert new block between original pre-header and
- // new header such that loop's new pre-header has only one successor.
- BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(),
- "bb.nph",
- OrigHeader->getParent(),
- NewHeader);
- LoopInfo &LI = getAnalysis<LoopInfo>();
- if (Loop *PL = LI.getLoopFor(OrigPreHeader))
- PL->addBasicBlockToLoop(NewPreHeader, LI.getBase());
- BranchInst::Create(NewHeader, NewPreHeader);
- BranchInst *OrigPH_BI = cast<BranchInst>(OrigPreHeader->getTerminator());
- if (OrigPH_BI->getSuccessor(0) == NewHeader)
- OrigPH_BI->setSuccessor(0, NewPreHeader);
- else {
- assert(OrigPH_BI->getSuccessor(1) == NewHeader &&
- "Unexpected original pre-header terminator");
- OrigPH_BI->setSuccessor(1, NewPreHeader);
- }
-
- PHINode *PN;
- for (BasicBlock::iterator I = NewHeader->begin();
- (PN = dyn_cast<PHINode>(I)); ++I) {
- int index = PN->getBasicBlockIndex(OrigPreHeader);
- assert(index != -1 && "Expected incoming value from Original PreHeader");
- PN->setIncomingBlock(index, NewPreHeader);
- assert(PN->getBasicBlockIndex(OrigPreHeader) == -1 &&
- "Expected only one incoming value from Original PreHeader");
- }
-
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
- DT->addNewBlock(NewPreHeader, OrigPreHeader);
- DT->changeImmediateDominator(L->getHeader(), NewPreHeader);
- DT->changeImmediateDominator(Exit, OrigPreHeader);
- for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
- BI != BE; ++BI) {
- BasicBlock *B = *BI;
- if (L->getHeader() != B) {
- DomTreeNode *Node = DT->getNode(B);
- if (Node && Node->getBlock() == OrigHeader)
- DT->changeImmediateDominator(*BI, L->getHeader());
- }
- }
- DT->changeImmediateDominator(OrigHeader, OrigLatch);
- }
-
- if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>()) {
- // New Preheader's dominance frontier is Exit block.
- DominanceFrontier::DomSetType NewPHSet;
- NewPHSet.insert(Exit);
- DF->addBasicBlock(NewPreHeader, NewPHSet);
-
- // New Header's dominance frontier now includes itself and Exit block
- DominanceFrontier::iterator HeadI = DF->find(L->getHeader());
- if (HeadI != DF->end()) {
- DominanceFrontier::DomSetType & HeaderSet = HeadI->second;
- HeaderSet.clear();
- HeaderSet.insert(L->getHeader());
- HeaderSet.insert(Exit);
- } else {
- DominanceFrontier::DomSetType HeaderSet;
- HeaderSet.insert(L->getHeader());
- HeaderSet.insert(Exit);
- DF->addBasicBlock(L->getHeader(), HeaderSet);
- }
-
- // Original header (new Loop Latch)'s dominance frontier is Exit.
- DominanceFrontier::iterator LatchI = DF->find(L->getLoopLatch());
- if (LatchI != DF->end()) {
- DominanceFrontier::DomSetType &LatchSet = LatchI->second;
- LatchSet = LatchI->second;
- LatchSet.clear();
- LatchSet.insert(Exit);
- } else {
- DominanceFrontier::DomSetType LatchSet;
- LatchSet.insert(Exit);
- DF->addBasicBlock(L->getHeader(), LatchSet);
+ // At this point, we've finished our major CFG changes. As part of cloning
+ // the loop into the preheader we've simplified instructions and the
+ // duplicated conditional branch may now be branching on a constant. If it is
+ // branching on a constant and if that constant means that we enter the loop,
+ // then we fold away the cond branch to an uncond branch. This simplifies the
+ // loop in cases important for nested loops, and it also means we don't have
+ // to split as many edges.
+ BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
+ assert(PHBI->isConditional() && "Should be clone of BI condbr!");
+ if (!isa<ConstantInt>(PHBI->getCondition()) ||
+ PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
+ != NewHeader) {
+ // The conditional branch can't be folded, handle the general case.
+ // Update DominatorTree to reflect the CFG change we just made. Then split
+ // edges as necessary to preserve LoopSimplify form.
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ // Since OrigPreheader now has the conditional branch to Exit block, it is
+ // the dominator of Exit.
+ DT->changeImmediateDominator(Exit, OrigPreheader);
+ DT->changeImmediateDominator(NewHeader, OrigPreheader);
+
+ // Update OrigHeader to be dominated by the new header block.
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
}
-
- // If a loop block dominates new loop latch then add to its frontiers
- // new header and Exit and remove new latch (which is equal to original
- // header).
- BasicBlock *NewLatch = L->getLoopLatch();
-
- assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader");
-
+
+ // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
+ // thus is not a preheader anymore. Split the edge to form a real preheader.
+ BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
+ NewPH->setName(NewHeader->getName() + ".lr.ph");
+
+ // Preserve canonical loop form, which means that 'Exit' should have only one
+ // predecessor.
+ BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
+ ExitSplit->moveBefore(Exit);
+ } else {
+ // We can fold the conditional branch in the preheader, this makes things
+ // simpler. The first step is to remove the extra edge to the Exit block.
+ Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
+ BranchInst::Create(NewHeader, PHBI);
+ PHBI->eraseFromParent();
+
+ // With our CFG finalized, update DomTree if it is available.
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
- for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
- BI != BE; ++BI) {
- BasicBlock *B = *BI;
- if (DT->dominates(B, NewLatch)) {
- DominanceFrontier::iterator BDFI = DF->find(B);
- if (BDFI != DF->end()) {
- DominanceFrontier::DomSetType &BSet = BDFI->second;
- BSet.erase(NewLatch);
- BSet.insert(L->getHeader());
- BSet.insert(Exit);
- } else {
- DominanceFrontier::DomSetType BSet;
- BSet.insert(L->getHeader());
- BSet.insert(Exit);
- DF->addBasicBlock(B, BSet);
- }
- }
- }
+ // Update OrigHeader to be dominated by the new header block.
+ DT->changeImmediateDominator(NewHeader, OrigPreheader);
+ DT->changeImmediateDominator(OrigHeader, OrigLatch);
}
}
-
- // Preserve canonical loop form, which means Exit block should
- // have only one predecessor.
- SplitEdge(L->getLoopLatch(), Exit, this);
-
- assert(NewHeader && L->getHeader() == NewHeader &&
- "Invalid loop header after loop rotation");
- assert(NewPreHeader && L->getLoopPreheader() == NewPreHeader &&
- "Invalid loop preheader after loop rotation");
- assert(L->getLoopLatch() &&
- "Invalid loop latch after loop rotation");
+
+ assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
+ assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+
+ // Now that the CFG and DomTree are in a consistent state again, try to merge
+ // the OrigHeader block into OrigLatch. This will succeed if they are
+ // connected by an unconditional branch. This is just a cleanup so the
+ // emitted code isn't too gross in this common case.
+ MergeBlockIntoPredecessor(OrigHeader, this);
+
+ ++NumRotated;
+ return true;
}
+
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index e8dc5d3..ac4aea2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -63,6 +63,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -113,7 +114,7 @@ class RegUseTracker {
public:
void CountRegister(const SCEV *Reg, size_t LUIdx);
void DropRegister(const SCEV *Reg, size_t LUIdx);
- void DropUse(size_t LUIdx);
+ void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
@@ -152,11 +153,19 @@ RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
}
void
-RegUseTracker::DropUse(size_t LUIdx) {
- // Remove the use index from every register's use list.
+RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+ assert(LUIdx <= LastLUIdx);
+
+ // Update RegUses. The data structure is not optimized for this purpose;
+ // we must iterate through it and update each of the bit vectors.
for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
- I != E; ++I)
- I->second.UsedByIndices.reset(LUIdx);
+ I != E; ++I) {
+ SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+ if (LUIdx < UsedByIndices.size())
+ UsedByIndices[LUIdx] =
+ LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
+ UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
+ }
}
bool
@@ -202,8 +211,7 @@ struct Formula {
Formula() : ScaledReg(0) {}
- void InitialMatch(const SCEV *S, Loop *L,
- ScalarEvolution &SE, DominatorTree &DT);
+ void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
unsigned getNumRegs() const;
const Type *getType() const;
@@ -224,9 +232,9 @@ struct Formula {
static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVectorImpl<const SCEV *> &Good,
SmallVectorImpl<const SCEV *> &Bad,
- ScalarEvolution &SE, DominatorTree &DT) {
+ ScalarEvolution &SE) {
// Collect expressions which properly dominate the loop header.
- if (S->properlyDominates(L->getHeader(), &DT)) {
+ if (SE.properlyDominates(S, L->getHeader())) {
Good.push_back(S);
return;
}
@@ -235,18 +243,18 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
- DoInitialMatch(*I, L, Good, Bad, SE, DT);
+ DoInitialMatch(*I, L, Good, Bad, SE);
return;
}
// Look at addrec operands.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
if (!AR->getStart()->isZero()) {
- DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
+ DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
AR->getLoop()),
- L, Good, Bad, SE, DT);
+ L, Good, Bad, SE);
return;
}
@@ -258,7 +266,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVector<const SCEV *, 4> MyGood;
SmallVector<const SCEV *, 4> MyBad;
- DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
+ DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
SE.getEffectiveSCEVType(NewMul->getType())));
for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
@@ -278,11 +286,10 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
/// attempting to keep all loop-invariant and loop-computable values in a
/// single base register.
-void Formula::InitialMatch(const SCEV *S, Loop *L,
- ScalarEvolution &SE, DominatorTree &DT) {
+void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
SmallVector<const SCEV *, 4> Good;
SmallVector<const SCEV *, 4> Bad;
- DoInitialMatch(S, L, Good, Bad, SE, DT);
+ DoInitialMatch(S, L, Good, Bad, SE);
if (!Good.empty()) {
const SCEV *Sum = SE.getAddExpr(Good);
if (!Sum->isZero())
@@ -608,7 +615,7 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
bool Changed = false;
while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+ Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
@@ -645,8 +652,6 @@ public:
: NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
SetupCost(0) {}
- unsigned getNumRegs() const { return NumRegs; }
-
bool operator<(const Cost &Other) const;
void Loose();
@@ -722,6 +727,9 @@ void Cost::RateRegister(const SCEV *Reg,
(isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
++SetupCost;
+
+ NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+ SE.hasComputableLoopEvolution(Reg, L);
}
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
@@ -756,9 +764,6 @@ void Cost::RateFormula(const Formula &F,
return;
}
RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
-
- NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
- BaseReg->hasComputableLoopEvolution(L);
}
if (F.BaseRegs.size() > 1)
@@ -1257,32 +1262,6 @@ struct UseMapDenseMapInfo {
}
};
-/// FormulaSorter - This class implements an ordering for formulae which sorts
-/// the by their standalone cost.
-class FormulaSorter {
- /// These two sets are kept empty, so that we compute standalone costs.
- DenseSet<const SCEV *> VisitedRegs;
- SmallPtrSet<const SCEV *, 16> Regs;
- Loop *L;
- LSRUse *LU;
- ScalarEvolution &SE;
- DominatorTree &DT;
-
-public:
- FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt)
- : L(l), LU(&lu), SE(se), DT(dt) {}
-
- bool operator()(const Formula &A, const Formula &B) {
- Cost CostA;
- CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
- Regs.clear();
- Cost CostB;
- CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
- Regs.clear();
- return CostA < CostB;
- }
-};
-
/// LSRInstance - This class holds state for the main loop strength reduction
/// logic.
class LSRInstance {
@@ -1341,7 +1320,7 @@ class LSRInstance {
LSRUse::KindType Kind,
const Type *AccessTy);
- void DeleteUse(LSRUse &LU);
+ void DeleteUse(LSRUse &LU, size_t LUIdx);
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
@@ -1925,10 +1904,13 @@ LSRInstance::getUse(const SCEV *&Expr,
}
/// DeleteUse - Delete the given use from the Uses list.
-void LSRInstance::DeleteUse(LSRUse &LU) {
+void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
if (&LU != &Uses.back())
std::swap(LU, Uses.back());
Uses.pop_back();
+
+ // Update RegUses.
+ RegUses.SwapAndDropUse(LUIdx, Uses.size());
}
/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
@@ -2073,7 +2055,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (N->isLoopInvariant(L)) {
+ if (SE.isLoopInvariant(N, L)) {
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
}
@@ -2113,7 +2095,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
Formula F;
- F.InitialMatch(S, L, SE, DT);
+ F.InitialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
@@ -2213,7 +2195,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
unsigned OtherIdx = !UI.getOperandNo();
Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
- if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
+ if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
continue;
}
@@ -2296,7 +2278,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Loop-variant "unknown" values are uninteresting; we won't be able to
// do anything meaningful with them.
- if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L))
+ if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
continue;
// Don't pull a constant into a register if the constant could be folded
@@ -2347,8 +2329,8 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
for (SmallVectorImpl<const SCEV *>::const_iterator
I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
- if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
- !BaseReg->hasComputableLoopEvolution(L))
+ if (SE.properlyDominates(BaseReg, L->getHeader()) &&
+ !SE.hasComputableLoopEvolution(BaseReg, L))
Ops.push_back(BaseReg);
else
F.BaseRegs.push_back(BaseReg);
@@ -2813,9 +2795,11 @@ LSRInstance::GenerateAllReuseFormulae() {
print_uses(dbgs()));
}
-/// If their are multiple formulae with the same set of registers used
+/// If there are multiple formulae with the same set of registers used
/// by other uses, pick the best one and delete the others.
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+ DenseSet<const SCEV *> VisitedRegs;
+ SmallPtrSet<const SCEV *, 16> Regs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
@@ -2828,7 +2812,6 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
- FormulaSorter Sorter(L, LU, SE, DT);
DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
bool Any = false;
@@ -2854,7 +2837,14 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
BestFormulae.insert(std::make_pair(Key, FIdx));
if (!P.second) {
Formula &Best = LU.Formulae[P.first->second];
- if (Sorter.operator()(F, Best))
+
+ Cost CostF;
+ CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ Regs.clear();
+ Cost CostBest;
+ CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+ Regs.clear();
+ if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
@@ -2894,7 +2884,7 @@ static const size_t ComplexityLimit = UINT16_MAX;
/// this many solutions because it prune the search space, but the pruning
/// isn't always sufficient.
size_t LSRInstance::EstimateSearchSpaceComplexity() const {
- uint32_t Power = 1;
+ size_t Power = 1;
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
size_t FSize = I->Formulae.size();
@@ -3001,6 +2991,28 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+ // Update the relocs to reference the new use.
+ for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ LSRFixup &Fixup = *I;
+ if (Fixup.LUIdx == LUIdx) {
+ Fixup.LUIdx = LUThatHas - &Uses.front();
+ Fixup.Offset += F.AM.BaseOffs;
+ // Add the new offset to LUThatHas' offset list.
+ if (LUThatHas->Offsets.back() != Fixup.Offset) {
+ LUThatHas->Offsets.push_back(Fixup.Offset);
+ if (Fixup.Offset > LUThatHas->MaxOffset)
+ LUThatHas->MaxOffset = Fixup.Offset;
+ if (Fixup.Offset < LUThatHas->MinOffset)
+ LUThatHas->MinOffset = Fixup.Offset;
+ }
+ DEBUG(dbgs() << "New fixup has offset "
+ << Fixup.Offset << '\n');
+ }
+ if (Fixup.LUIdx == NumUses-1)
+ Fixup.LUIdx = LUIdx;
+ }
+
// Delete formulae from the new use which are no longer legal.
bool Any = false;
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
@@ -3019,22 +3031,8 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (Any)
LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
- // Update the relocs to reference the new use.
- for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
- E = Fixups.end(); I != E; ++I) {
- LSRFixup &Fixup = *I;
- if (Fixup.LUIdx == LUIdx) {
- Fixup.LUIdx = LUThatHas - &Uses.front();
- Fixup.Offset += F.AM.BaseOffs;
- DEBUG(dbgs() << "New fixup has offset "
- << Fixup.Offset << '\n');
- }
- if (Fixup.LUIdx == NumUses-1)
- Fixup.LUIdx = LUIdx;
- }
-
// Delete the old use.
- DeleteUse(LU);
+ DeleteUse(LU, LUIdx);
--LUIdx;
--NumUses;
break;
@@ -3546,21 +3544,23 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
// is the canonical backedge for this loop, which complicates post-inc
// users.
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
- !isa<IndirectBrInst>(BB->getTerminator()) &&
- (PN->getParent() != L->getHeader() || !L->contains(BB))) {
- // Split the critical edge.
- BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
-
- // If PN is outside of the loop and BB is in the loop, we want to
- // move the block to be immediately before the PHI block, not
- // immediately after BB.
- if (L->contains(BB) && !L->contains(PN))
- NewBB->moveBefore(PN->getParent());
-
- // Splitting the edge can reduce the number of PHI entries we have.
- e = PN->getNumIncomingValues();
- BB = NewBB;
- i = PN->getBasicBlockIndex(BB);
+ !isa<IndirectBrInst>(BB->getTerminator())) {
+ Loop *PNLoop = LI.getLoopFor(PN->getParent());
+ if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
+ // Split the critical edge.
+ BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+ // If PN is outside of the loop and BB is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after BB.
+ if (L->contains(BB) && !L->contains(PN))
+ NewBB->moveBefore(PN->getParent());
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ BB = NewBB;
+ i = PN->getBasicBlockIndex(BB);
+ }
}
std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
@@ -3792,21 +3792,30 @@ private:
}
char LoopStrengthReduce::ID = 0;
-INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce",
- "Loop Strength Reduction", false, false);
+INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false)
+
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
- : LoopPass(ID), TLI(tli) {}
+ : LoopPass(ID), TLI(tli) {
+ initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+ }
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
- AU.addPreserved("domfrontier");
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
@@ -3815,6 +3824,9 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
+ // Requiring LoopSimplify a second time here prevents IVUsers from running
+ // twice, since LoopSimplify was invalidated by running ScalarEvolution.
+ AU.addRequiredID(LoopSimplifyID);
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d0edfa2..80b263a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -16,7 +16,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -27,7 +27,7 @@
using namespace llvm;
static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(200), cl::Hidden,
+UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
cl::desc("The cut-off point for automatic loop unrolling"));
static cl::opt<unsigned>
@@ -43,12 +43,20 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll() : LoopPass(ID) {}
+ LoopUnroll() : LoopPass(ID) {
+ initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+ }
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
static const unsigned NoThreshold = UINT_MAX;
+
+ // Threshold to use when optsize is specified (and there is no
+ // explicit -unroll-threshold).
+ static const unsigned OptSizeUnrollThreshold = 50;
+
+ unsigned CurrentThreshold;
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -73,7 +81,11 @@ namespace {
}
char LoopUnroll::ID = 0;
-INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
@@ -83,8 +95,16 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
Metrics.analyzeBasicBlock(*I);
- NumCalls = Metrics.NumCalls;
- return Metrics.NumInsts;
+ NumCalls = Metrics.NumInlineCandidates;
+
+ unsigned LoopSize = Metrics.NumInsts;
+
+ // Don't allow an estimate of size zero. This would allows unrolling of loops
+ // with huge iteration counts, which is a compile time problem even if it's
+ // not a problem for code quality.
+ if (LoopSize == 0) LoopSize = 1;
+
+ return LoopSize;
}
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -94,6 +114,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n");
(void)Header;
+
+ // Determine the current unrolling threshold. While this is normally set
+ // from UnrollThreshold, it is overridden to a smaller value if the current
+ // function is marked as optimize-for-size, and the unroll threshold was
+ // not user specified.
+ CurrentThreshold = UnrollThreshold;
+ if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) &&
+ UnrollThreshold.getNumOccurrences() == 0)
+ CurrentThreshold = OptSizeUnrollThreshold;
// Find trip count
unsigned TripCount = L->getSmallConstantTripCount();
@@ -111,25 +140,25 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Enforce the threshold.
- if (UnrollThreshold != NoThreshold) {
- unsigned NumCalls;
- unsigned LoopSize = ApproximateLoopSize(L, NumCalls);
+ if (CurrentThreshold != NoThreshold) {
+ unsigned NumInlineCandidates;
+ unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
- if (NumCalls != 0) {
- DEBUG(dbgs() << " Not unrolling loop with function calls.\n");
+ if (NumInlineCandidates != 0) {
+ DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return false;
}
uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > UnrollThreshold) {
+ if (TripCount != 1 && Size > CurrentThreshold) {
DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << UnrollThreshold << "\n");
+ << " because size: " << Size << ">" << CurrentThreshold << "\n");
if (!UnrollAllowPartial) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
// Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = UnrollThreshold / LoopSize;
+ Count = CurrentThreshold / LoopSize;
while (Count != 0 && TripCount%Count != 0) {
Count--;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 9afe428..b4e3d31 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,12 +32,12 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -93,7 +93,9 @@ namespace {
explicit LoopUnswitch(bool Os = false) :
LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
currentLoop(NULL), DT(NULL), loopHeader(NULL),
- loopPreheader(NULL) {}
+ loopPreheader(NULL) {
+ initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
+ }
bool runOnLoop(Loop *L, LPPassManager &LPM);
bool processCurrentLoop();
@@ -109,6 +111,7 @@ namespace {
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<DominatorTree>();
+ AU.addPreserved<ScalarEvolution>();
}
private:
@@ -158,7 +161,13 @@ namespace {
};
}
char LoopUnswitch::ID = 0;
-INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false);
+INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+ false, false)
Pass *llvm::createLoopUnswitchPass(bool Os) {
return new LoopUnswitch(Os);
@@ -450,22 +459,9 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
return true;
}
-// RemapInstruction - Convert the instruction operands from referencing the
-// current values into those specified by VMap.
-//
-static inline void RemapInstruction(Instruction *I,
- ValueMap<const Value *, Value*> &VMap) {
- for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
- Value *Op = I->getOperand(op);
- ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
- if (It != VMap.end()) Op = It->second;
- I->setOperand(op, Op);
- }
-}
-
/// CloneLoop - Recursively clone the specified loop and all of its children,
/// mapping the blocks with the specified map.
-static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM,
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
LoopInfo *LI, LPPassManager *LPM) {
Loop *New = new Loop();
LPM->insertLoop(New, PL);
@@ -580,6 +576,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
<< " blocks] in Function " << F->getName()
<< " when '" << *Val << "' == " << *LIC << "\n");
+ if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+ SE->forgetLoop(L);
+
LoopBlocks.clear();
NewBlocks.clear();
@@ -609,7 +608,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// the loop preheader and exit blocks), keeping track of the mapping between
// the instructions and blocks.
NewBlocks.reserve(LoopBlocks.size());
- ValueMap<const Value*, Value*> VMap;
+ ValueToValueMapTy VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
NewBlocks.push_back(NewBB);
@@ -647,7 +646,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
PN = cast<PHINode>(I);
Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
- ValueMap<const Value *, Value*>::iterator It = VMap.find(V);
+ ValueToValueMapTy::iterator It = VMap.find(V);
if (It != VMap.end()) V = It->second;
PN->addIncoming(V, NewExit);
}
@@ -657,7 +656,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, VMap);
+ RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
@@ -961,13 +960,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
while (!Worklist.empty()) {
Instruction *I = Worklist.back();
Worklist.pop_back();
-
- // Simple constant folding.
- if (Constant *C = ConstantFoldInstruction(I)) {
- ReplaceUsesOfWith(I, C, Worklist, L, LPM);
- continue;
- }
-
+
// Simple DCE.
if (isInstructionTriviallyDead(I)) {
DEBUG(dbgs() << "Remove dead instruction '" << *I);
@@ -982,15 +975,16 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
++NumSimplify;
continue;
}
-
+
// See if instruction simplification can hack this up. This is common for
// things like "select false, X, Y" after unswitching made the condition be
// 'false'.
- if (Value *V = SimplifyInstruction(I)) {
- ReplaceUsesOfWith(I, V, Worklist, L, LPM);
- continue;
- }
-
+ if (Value *V = SimplifyInstruction(I, 0, DT))
+ if (LI->replacementPreservesLCSSAForm(I, V)) {
+ ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+ continue;
+ }
+
// Special case hacks that appear commonly in unswitched code.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
if (BI->isUnconditional()) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
index 973ffe7..9087b46 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -14,26 +14,15 @@
#define DEBUG_TYPE "loweratomic"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/BasicBlock.h"
#include "llvm/Function.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Support/IRBuilder.h"
-
using namespace llvm;
-namespace {
-
-bool LowerAtomicIntrinsic(CallInst *CI) {
- IRBuilder<> Builder(CI->getParent(), CI);
-
- Function *Callee = CI->getCalledFunction();
- if (!Callee)
- return false;
-
- unsigned IID = Callee->getIntrinsicID();
+static bool LowerAtomicIntrinsic(IntrinsicInst *II) {
+ IRBuilder<> Builder(II->getParent(), II);
+ unsigned IID = II->getIntrinsicID();
switch (IID) {
case Intrinsic::memory_barrier:
break;
@@ -48,80 +37,70 @@ bool LowerAtomicIntrinsic(CallInst *CI) {
case Intrinsic::atomic_load_min:
case Intrinsic::atomic_load_umax:
case Intrinsic::atomic_load_umin: {
- Value *Ptr = CI->getArgOperand(0);
- Value *Delta = CI->getArgOperand(1);
+ Value *Ptr = II->getArgOperand(0), *Delta = II->getArgOperand(1);
LoadInst *Orig = Builder.CreateLoad(Ptr);
Value *Res = NULL;
switch (IID) {
- default: assert(0 && "Unrecognized atomic modify operation");
- case Intrinsic::atomic_load_add:
- Res = Builder.CreateAdd(Orig, Delta);
- break;
- case Intrinsic::atomic_load_sub:
- Res = Builder.CreateSub(Orig, Delta);
- break;
- case Intrinsic::atomic_load_and:
- Res = Builder.CreateAnd(Orig, Delta);
- break;
- case Intrinsic::atomic_load_nand:
- Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
- break;
- case Intrinsic::atomic_load_or:
- Res = Builder.CreateOr(Orig, Delta);
- break;
- case Intrinsic::atomic_load_xor:
- Res = Builder.CreateXor(Orig, Delta);
- break;
- case Intrinsic::atomic_load_max:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
- Delta,
- Orig);
- break;
- case Intrinsic::atomic_load_min:
- Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
- Orig,
- Delta);
- break;
- case Intrinsic::atomic_load_umax:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
- Delta,
- Orig);
- break;
- case Intrinsic::atomic_load_umin:
- Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
- Orig,
- Delta);
- break;
+ default: assert(0 && "Unrecognized atomic modify operation");
+ case Intrinsic::atomic_load_add:
+ Res = Builder.CreateAdd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_sub:
+ Res = Builder.CreateSub(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_and:
+ Res = Builder.CreateAnd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_nand:
+ Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
+ break;
+ case Intrinsic::atomic_load_or:
+ Res = Builder.CreateOr(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_xor:
+ Res = Builder.CreateXor(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_max:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Delta, Orig);
+ break;
+ case Intrinsic::atomic_load_min:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_umax:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Delta, Orig);
+ break;
+ case Intrinsic::atomic_load_umin:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Orig, Delta);
+ break;
}
Builder.CreateStore(Res, Ptr);
- CI->replaceAllUsesWith(Orig);
+ II->replaceAllUsesWith(Orig);
break;
}
case Intrinsic::atomic_swap: {
- Value *Ptr = CI->getArgOperand(0);
- Value *Val = CI->getArgOperand(1);
-
+ Value *Ptr = II->getArgOperand(0), *Val = II->getArgOperand(1);
LoadInst *Orig = Builder.CreateLoad(Ptr);
Builder.CreateStore(Val, Ptr);
-
- CI->replaceAllUsesWith(Orig);
+ II->replaceAllUsesWith(Orig);
break;
}
case Intrinsic::atomic_cmp_swap: {
- Value *Ptr = CI->getArgOperand(0);
- Value *Cmp = CI->getArgOperand(1);
- Value *Val = CI->getArgOperand(2);
+ Value *Ptr = II->getArgOperand(0), *Cmp = II->getArgOperand(1);
+ Value *Val = II->getArgOperand(2);
LoadInst *Orig = Builder.CreateLoad(Ptr);
Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
Value *Res = Builder.CreateSelect(Equal, Val, Orig);
Builder.CreateStore(Res, Ptr);
-
- CI->replaceAllUsesWith(Orig);
+ II->replaceAllUsesWith(Orig);
break;
}
@@ -129,33 +108,32 @@ bool LowerAtomicIntrinsic(CallInst *CI) {
return false;
}
- assert(CI->use_empty() &&
+ assert(II->use_empty() &&
"Lowering should have eliminated any uses of the intrinsic call!");
- CI->eraseFromParent();
+ II->eraseFromParent();
return true;
}
-struct LowerAtomic : public BasicBlockPass {
- static char ID;
- LowerAtomic() : BasicBlockPass(ID) {}
- bool runOnBasicBlock(BasicBlock &BB) {
- bool Changed = false;
- for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
- Instruction *Inst = DI++;
- if (CallInst *CI = dyn_cast<CallInst>(Inst))
- Changed |= LowerAtomicIntrinsic(CI);
+namespace {
+ struct LowerAtomic : public BasicBlockPass {
+ static char ID;
+ LowerAtomic() : BasicBlockPass(ID) {
+ initializeLowerAtomicPass(*PassRegistry::getPassRegistry());
}
- return Changed;
- }
-
-};
-
+ bool runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; )
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DI++))
+ Changed |= LowerAtomicIntrinsic(II);
+ return Changed;
+ }
+ };
}
char LowerAtomic::ID = 0;
INITIALIZE_PASS(LowerAtomic, "loweratomic",
"Lower atomic intrinsics to non-atomic form",
- false, false);
+ false, false)
Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 24fae42..bde0e53 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -14,16 +14,18 @@
#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include <list>
@@ -32,62 +34,10 @@ using namespace llvm;
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
-
-/// isBytewiseValue - If the specified value can be set by repeating the same
-/// byte in memory, return the i8 value that it is represented with. This is
-/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
-/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
-/// byte store (e.g. i16 0x1234), return null.
-static Value *isBytewiseValue(Value *V) {
- LLVMContext &Context = V->getContext();
-
- // All byte-wide stores are splatable, even of arbitrary variables.
- if (V->getType()->isIntegerTy(8)) return V;
-
- // Constant float and double values can be handled as integer values if the
- // corresponding integer value is "byteable". An important case is 0.0.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
- if (CFP->getType()->isFloatTy())
- V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context));
- if (CFP->getType()->isDoubleTy())
- V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context));
- // Don't handle long double formats, which have strange constraints.
- }
-
- // We can handle constant integers that are power of two in size and a
- // multiple of 8 bits.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- unsigned Width = CI->getBitWidth();
- if (isPowerOf2_32(Width) && Width > 8) {
- // We can handle this value if the recursive binary decomposition is the
- // same at all levels.
- APInt Val = CI->getValue();
- APInt Val2;
- while (Val.getBitWidth() != 8) {
- unsigned NextWidth = Val.getBitWidth()/2;
- Val2 = Val.lshr(NextWidth);
- Val2.trunc(Val.getBitWidth()/2);
- Val.trunc(Val.getBitWidth()/2);
-
- // If the top/bottom halves aren't the same, reject it.
- if (Val != Val2)
- return 0;
- }
- return ConstantInt::get(Context, Val);
- }
- }
-
- // Conceptually, we could handle things like:
- // %a = zext i8 %X to i16
- // %b = shl i16 %a, 8
- // %c = or i16 %a, %b
- // but until there is an example that actually needs this, it doesn't seem
- // worth worrying about.
- return 0;
-}
+STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
- bool &VariableIdxFound, TargetData &TD) {
+ bool &VariableIdxFound, const TargetData &TD){
// Skip over the first indices.
gep_type_iterator GTI = gep_type_begin(GEP);
for (unsigned i = 1; i != Idx; ++i, ++GTI)
@@ -120,14 +70,31 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
/// constant offset, and return that constant offset. For example, Ptr1 might
/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
- TargetData &TD) {
+ const TargetData &TD) {
+ Ptr1 = Ptr1->stripPointerCasts();
+ Ptr2 = Ptr2->stripPointerCasts();
+ GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
+ GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
+
+ bool VariableIdxFound = false;
+
+ // If one pointer is a GEP and the other isn't, then see if the GEP is a
+ // constant offset from the base, as in "P" and "gep P, 1".
+ if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
+ Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
+ return !VariableIdxFound;
+ }
+
+ if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
+ Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
+ return !VariableIdxFound;
+ }
+
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
// base. After that base, they may have some number of common (and
// potentially variable) indices. After that they handle some constant
// offset, which determines their offset from each other. At this point, we
// handle no other case.
- GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
- GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
return false;
@@ -137,7 +104,6 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
break;
- bool VariableIdxFound = false;
int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
if (VariableIdxFound) return false;
@@ -171,7 +137,7 @@ struct MemsetRange {
unsigned Alignment;
/// TheStores - The actual stores that make up this range.
- SmallVector<StoreInst*, 16> TheStores;
+ SmallVector<Instruction*, 16> TheStores;
bool isProfitableToUseMemset(const TargetData &TD) const;
@@ -181,10 +147,19 @@ struct MemsetRange {
bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
// If we found more than 8 stores to merge or 64 bytes, use memset.
if (TheStores.size() >= 8 || End-Start >= 64) return true;
+
+ // If there is nothing to merge, don't do anything.
+ if (TheStores.size() < 2) return false;
+
+ // If any of the stores are a memset, then it is always good to extend the
+ // memset.
+ for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
+ if (!isa<StoreInst>(TheStores[i]))
+ return true;
// Assume that the code generator is capable of merging pairs of stores
// together if it wants to.
- if (TheStores.size() <= 2) return false;
+ if (TheStores.size() == 2) return false;
// If we have fewer than 8 stores, it can still be worthwhile to do this.
// For example, merging 4 i8 stores into an i32 store is useful almost always.
@@ -215,31 +190,53 @@ class MemsetRanges {
/// because each element is relatively large and expensive to copy.
std::list<MemsetRange> Ranges;
typedef std::list<MemsetRange>::iterator range_iterator;
- TargetData &TD;
+ const TargetData &TD;
public:
- MemsetRanges(TargetData &td) : TD(td) {}
+ MemsetRanges(const TargetData &td) : TD(td) {}
typedef std::list<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
const_iterator end() const { return Ranges.end(); }
bool empty() const { return Ranges.empty(); }
- void addStore(int64_t OffsetFromFirst, StoreInst *SI);
+ void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ addStore(OffsetFromFirst, SI);
+ else
+ addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
+ }
+
+ void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
+ int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
+
+ addRange(OffsetFromFirst, StoreSize,
+ SI->getPointerOperand(), SI->getAlignment(), SI);
+ }
+
+ void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
+ int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
+ }
+
+ void addRange(int64_t Start, int64_t Size, Value *Ptr,
+ unsigned Alignment, Instruction *Inst);
+
};
} // end anon namespace
-/// addStore - Add a new store to the MemsetRanges data structure. This adds a
+/// addRange - Add a new store to the MemsetRanges data structure. This adds a
/// new range for the specified store at the specified offset, merging into
/// existing ranges as appropriate.
-void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
- int64_t End = Start+TD.getTypeStoreSize(SI->getOperand(0)->getType());
-
- // Do a linear search of the ranges to see if this can be joined and/or to
- // find the insertion point in the list. We keep the ranges sorted for
- // simplicity here. This is a linear search of a linked list, which is ugly,
- // however the number of ranges is limited, so this won't get crazy slow.
+///
+/// Do a linear search of the ranges to see if this can be joined and/or to
+/// find the insertion point in the list. We keep the ranges sorted for
+/// simplicity here. This is a linear search of a linked list, which is ugly,
+/// however the number of ranges is limited, so this won't get crazy slow.
+void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
+ unsigned Alignment, Instruction *Inst) {
+ int64_t End = Start+Size;
range_iterator I = Ranges.begin(), E = Ranges.end();
while (I != E && Start > I->End)
@@ -252,14 +249,14 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
MemsetRange &R = *Ranges.insert(I, MemsetRange());
R.Start = Start;
R.End = End;
- R.StartPtr = SI->getPointerOperand();
- R.Alignment = SI->getAlignment();
- R.TheStores.push_back(SI);
+ R.StartPtr = Ptr;
+ R.Alignment = Alignment;
+ R.TheStores.push_back(Inst);
return;
}
-
+
// This store overlaps with I, add it.
- I->TheStores.push_back(SI);
+ I->TheStores.push_back(Inst);
// At this point, we may have an interval that completely contains our store.
// If so, just add it to the interval and return.
@@ -274,8 +271,8 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
// stopped on *it*.
if (Start < I->Start) {
I->Start = Start;
- I->StartPtr = SI->getPointerOperand();
- I->Alignment = SI->getAlignment();
+ I->StartPtr = Ptr;
+ I->Alignment = Alignment;
}
// Now we know that Start <= I->End and Start >= I->Start (so the startpoint
@@ -301,10 +298,16 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
namespace {
class MemCpyOpt : public FunctionPass {
- bool runOnFunction(Function &F);
+ MemoryDependenceAnalysis *MD;
+ const TargetData *TD;
public:
static char ID; // Pass identification, replacement for typeid
- MemCpyOpt() : FunctionPass(ID) {}
+ MemCpyOpt() : FunctionPass(ID) {
+ initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
+ MD = 0;
+ }
+
+ bool runOnFunction(Function &F);
private:
// This transformation requires dominator postdominator info
@@ -319,9 +322,17 @@ namespace {
// Helper fuctions
bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
+ bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
bool processMemCpy(MemCpyInst *M);
bool processMemMove(MemMoveInst *M);
- bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);
+ bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
+ uint64_t cpyLen, CallInst *C);
+ bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+ uint64_t MSize);
+ bool processByValArgument(CallSite CS, unsigned ArgNo);
+ Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
+ Value *ByteVal);
+
bool iterateOnFunction(Function &F);
};
@@ -331,165 +342,199 @@ namespace {
// createMemCpyOptPass - The public interface to this file...
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
-INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false);
-
-
+INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+ false, false)
-/// processStore - When GVN is scanning forward over instructions, we look for
+/// tryMergingIntoMemset - When scanning forward over instructions, we look for
/// some other patterns to fold away. In particular, this looks for stores to
-/// neighboring locations of memory. If it sees enough consequtive ones
-/// (currently 4) it attempts to merge them together into a memcpy/memset.
-bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
- if (SI->isVolatile()) return false;
-
- LLVMContext &Context = SI->getContext();
-
- // There are two cases that are interesting for this code to handle: memcpy
- // and memset. Right now we only handle memset.
+/// neighboring locations of memory. If it sees enough consecutive ones, it
+/// attempts to merge them together into a memcpy/memset.
+Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
+ Value *StartPtr, Value *ByteVal) {
+ if (TD == 0) return 0;
- // Ensure that the value being stored is something that can be memset'able a
- // byte at a time like "0" or "-1" or any width, as well as things like
- // 0xA0A0A0A0 and 0.0.
- Value *ByteVal = isBytewiseValue(SI->getOperand(0));
- if (!ByteVal)
- return false;
-
- TargetData *TD = getAnalysisIfAvailable<TargetData>();
- if (!TD) return false;
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- Module *M = SI->getParent()->getParent()->getParent();
-
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
// are stored.
MemsetRanges Ranges(*TD);
- Value *StartPtr = SI->getPointerOperand();
-
- BasicBlock::iterator BI = SI;
+ BasicBlock::iterator BI = StartInst;
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
- if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
- // If the call is readnone, ignore it, otherwise bail out. We don't even
- // allow readonly here because we don't want something like:
+ if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
+ // If the instruction is readnone, ignore it, otherwise bail out. We
+ // don't even allow readonly here because we don't want something like:
// A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
- if (AA.getModRefBehavior(CallSite(BI)) ==
- AliasAnalysis::DoesNotAccessMemory)
- continue;
-
- // TODO: If this is a memset, try to join it in.
-
- break;
- } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
- break;
-
- // If this is a non-store instruction it is fine, ignore it.
- StoreInst *NextStore = dyn_cast<StoreInst>(BI);
- if (NextStore == 0) continue;
+ if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
+ break;
+ continue;
+ }
- // If this is a store, see if we can merge it in.
- if (NextStore->isVolatile()) break;
+ if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
+ // If this is a store, see if we can merge it in.
+ if (NextStore->isVolatile()) break;
- // Check to see if this stored value is of the same byte-splattable value.
- if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
- break;
-
- // Check to see if this store is to a constant offset from the start ptr.
- int64_t Offset;
- if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
- break;
-
- Ranges.addStore(Offset, NextStore);
+ // Check to see if this stored value is of the same byte-splattable value.
+ if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ int64_t Offset;
+ if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
+ Offset, *TD))
+ break;
+
+ Ranges.addStore(Offset, NextStore);
+ } else {
+ MemSetInst *MSI = cast<MemSetInst>(BI);
+
+ if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
+ !isa<ConstantInt>(MSI->getLength()))
+ break;
+
+ // Check to see if this store is to a constant offset from the start ptr.
+ int64_t Offset;
+ if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
+ break;
+
+ Ranges.addMemSet(Offset, MSI);
+ }
}
-
+
// If we have no ranges, then we just had a single store with nothing that
// could be merged in. This is a very common case of course.
if (Ranges.empty())
- return false;
+ return 0;
// If we had at least one store that could be merged in, add the starting
// store as well. We try to avoid this unless there is at least something
// interesting as a small compile-time optimization.
- Ranges.addStore(0, SI);
-
-
+ Ranges.addInst(0, StartInst);
+
+ // If we create any memsets, we put it right before the first instruction that
+ // isn't part of the memset block. This ensure that the memset is dominated
+ // by any addressing instruction needed by the start of the block.
+ IRBuilder<> Builder(BI);
+
// Now that we have full information about ranges, loop over the ranges and
// emit memset's for anything big enough to be worthwhile.
- bool MadeChange = false;
+ Instruction *AMemSet = 0;
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
I != E; ++I) {
const MemsetRange &Range = *I;
-
+
if (Range.TheStores.size() == 1) continue;
// If it is profitable to lower this range to memset, do so now.
if (!Range.isProfitableToUseMemset(*TD))
continue;
- // Otherwise, we do want to transform this! Create a new memset. We put
- // the memset right before the first instruction that isn't part of this
- // memset block. This ensure that the memset is dominated by any addressing
- // instruction needed by the start of the block.
- BasicBlock::iterator InsertPt = BI;
-
+ // Otherwise, we do want to transform this! Create a new memset.
// Get the starting pointer of the block.
StartPtr = Range.StartPtr;
-
+
// Determine alignment
unsigned Alignment = Range.Alignment;
if (Alignment == 0) {
const Type *EltType =
- cast<PointerType>(StartPtr->getType())->getElementType();
+ cast<PointerType>(StartPtr->getType())->getElementType();
Alignment = TD->getABITypeAlignment(EltType);
}
-
- // Cast the start ptr to be i8* as memset requires.
- const PointerType* StartPTy = cast<PointerType>(StartPtr->getType());
- const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
- StartPTy->getAddressSpace());
- if (StartPTy!= i8Ptr)
- StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
- InsertPt);
-
- Value *Ops[] = {
- StartPtr, ByteVal, // Start, value
- // size
- ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
- // align
- ConstantInt::get(Type::getInt32Ty(Context), Alignment),
- // volatile
- ConstantInt::get(Type::getInt1Ty(Context), 0),
- };
- const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
-
- Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
-
- Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
+
+ AMemSet =
+ Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+
DEBUG(dbgs() << "Replace stores:\n";
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
- dbgs() << *Range.TheStores[i];
- dbgs() << "With: " << *C); C=C;
-
- // Don't invalidate the iterator
- BBI = BI;
-
+ dbgs() << *Range.TheStores[i] << '\n';
+ dbgs() << "With: " << *AMemSet << '\n');
+
// Zap all the stores.
- for (SmallVector<StoreInst*, 16>::const_iterator
+ for (SmallVector<Instruction*, 16>::const_iterator
SI = Range.TheStores.begin(),
- SE = Range.TheStores.end(); SI != SE; ++SI)
+ SE = Range.TheStores.end(); SI != SE; ++SI) {
+ MD->removeInstruction(*SI);
(*SI)->eraseFromParent();
+ }
++NumMemSetInfer;
- MadeChange = true;
}
- return MadeChange;
+ return AMemSet;
+}
+
+
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
+ if (SI->isVolatile()) return false;
+
+ if (TD == 0) return false;
+
+ // Detect cases where we're performing call slot forwarding, but
+ // happen to be using a load-store pair to implement it, rather than
+ // a memcpy.
+ if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+ if (!LI->isVolatile() && LI->hasOneUse()) {
+ MemDepResult dep = MD->getDependency(LI);
+ CallInst *C = 0;
+ if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
+ C = dyn_cast<CallInst>(dep.getInst());
+
+ if (C) {
+ bool changed = performCallSlotOptzn(LI,
+ SI->getPointerOperand()->stripPointerCasts(),
+ LI->getPointerOperand()->stripPointerCasts(),
+ TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
+ if (changed) {
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
+ MD->removeInstruction(LI);
+ LI->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+ }
+
+ // There are two cases that are interesting for this code to handle: memcpy
+ // and memset. Right now we only handle memset.
+
+ // Ensure that the value being stored is something that can be memset'able a
+ // byte at a time like "0" or "-1" or any width, as well as things like
+ // 0xA0A0A0A0 and 0.0.
+ if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+ if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
+ ByteVal)) {
+ BBI = I; // Don't invalidate iterator.
+ return true;
+ }
+
+ return false;
+}
+
+bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
+ // See if there is another memset or store neighboring this memset which
+ // allows us to widen out the memset to do a single larger store.
+ if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
+ if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
+ MSI->getValue())) {
+ BBI = I; // Don't invalidate iterator.
+ return true;
+ }
+ return false;
}
/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
-bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
+bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
+ Value *cpyDest, Value *cpySrc,
+ uint64_t cpyLen, CallInst *C) {
// The general transformation to keep in mind is
//
// call @func(..., src, ...)
@@ -506,24 +551,15 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// Deliberately get the source and destination with bitcasts stripped away,
// because we'll need to do type comparisons based on the underlying type.
- Value *cpyDest = cpy->getDest();
- Value *cpySrc = cpy->getSource();
CallSite CS(C);
- // We need to be able to reason about the size of the memcpy, so we require
- // that it be a constant.
- ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
- if (!cpyLength)
- return false;
-
// Require that src be an alloca. This simplifies the reasoning considerably.
AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
if (!srcAlloca)
return false;
// Check that all of src is copied to dest.
- TargetData *TD = getAnalysisIfAvailable<TargetData>();
- if (!TD) return false;
+ if (TD == 0) return false;
ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
if (!srcArraySize)
@@ -532,7 +568,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
- if (cpyLength->getZExtValue() < srcSize)
+ if (cpyLen < srcSize)
return false;
// Check that accessing the first srcSize bytes of dest will not cause a
@@ -601,8 +637,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// the use analysis, we also need to know that it does not sneakily
// access dest. We rely on AA to figure this out for us.
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=
- AliasAnalysis::NoModRef)
+ if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef)
return false;
// All the checks have passed, so do the transformation.
@@ -625,99 +660,142 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// Drop any cached information about the call, because we may have changed
// its dependence information by changing its parameter.
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
- MD.removeInstruction(C);
+ MD->removeInstruction(C);
- // Remove the memcpy
- MD.removeInstruction(cpy);
- cpy->eraseFromParent();
+ // Remove the memcpy.
+ MD->removeInstruction(cpy);
++NumMemCpyInstr;
return true;
}
-/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
-/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
-/// B to be a memcpy from X to Z (or potentially a memmove, depending on
-/// circumstances). This allows later passes to remove the first memcpy
-/// altogether.
-bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
-
- // The are two possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- MemDepResult dep = MD.getDependency(M);
- if (!dep.isClobber())
- return false;
- if (!isa<MemCpyInst>(dep.getInst())) {
- if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
- return performCallSlotOptzn(M, C);
+/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
+/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
+/// copy from MDep's input if we can. MSize is the size of M's copy.
+///
+bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+ uint64_t MSize) {
+ // We can only transforms memcpy's where the dest of one is the source of the
+ // other.
+ if (M->getSource() != MDep->getDest() || MDep->isVolatile())
return false;
- }
-
- MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst());
- // We can only transforms memcpy's where the dest of one is the source of the
- // other
- if (M->getSource() != MDep->getDest())
+ // If dep instruction is reading from our current input, then it is a noop
+ // transfer and substituting the input won't change this instruction. Just
+ // ignore the input and let someone else zap MDep. This handles cases like:
+ // memcpy(a <- a)
+ // memcpy(b <- a)
+ if (M->getSource() == MDep->getSource())
return false;
// Second, the length of the memcpy's must be the same, or the preceeding one
// must be larger than the following one.
- ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
- ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength());
- if (!C1 || !C2)
- return false;
-
- uint64_t DepSize = C1->getValue().getZExtValue();
- uint64_t CpySize = C2->getValue().getZExtValue();
-
- if (DepSize < CpySize)
+ ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+ ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
+ if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
return false;
- // Finally, we have to make sure that the dest of the second does not
- // alias the source of the first
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
- AliasAnalysis::NoAlias)
+
+ // Verify that the copied-from memory doesn't change in between the two
+ // transfers. For example, in:
+ // memcpy(a <- b)
+ // *b = 42;
+ // memcpy(c <- a)
+ // It would be invalid to transform the second memcpy into memcpy(c <- b).
+ //
+ // TODO: If the code between M and MDep is transparent to the destination "c",
+ // then we could still perform the xform by moving M up to the first memcpy.
+ //
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(AA.getLocationForSource(MDep),
+ false, M, M->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
return false;
- else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
- AliasAnalysis::NoAlias)
+
+ // If the dest of the second might alias the source of the first, then the
+ // source and dest might overlap. We still want to eliminate the intermediate
+ // value, but we have to generate a memmove instead of memcpy.
+ bool UseMemMove = false;
+ if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
+ UseMemMove = true;
+
+ // If all checks passed, then we can transform M.
+
+ // Make sure to use the lesser of the alignment of the source and the dest
+ // since we're changing where we're reading from, but don't want to increase
+ // the alignment past what can be read from or written to.
+ // TODO: Is this worth it if we're creating a less aligned memcpy? For
+ // example we could be moving from movaps -> movq on x86.
+ unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
+
+ IRBuilder<> Builder(M);
+ if (UseMemMove)
+ Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+ Align, M->isVolatile());
+ else
+ Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+ Align, M->isVolatile());
+
+ // Remove the instruction we're replacing.
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+}
+
+
+/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
+bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
+ // We can only optimize statically-sized memcpy's that are non-volatile.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (CopySize == 0 || M->isVolatile()) return false;
+
+ // If the source and destination of the memcpy are the same, then zap it.
+ if (M->getSource() == M->getDest()) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
return false;
- else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
- != AliasAnalysis::NoAlias)
+ }
+
+ // If copying from a constant, try to turn the memcpy into a memset.
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
+ IRBuilder<> Builder(M);
+ Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize,
+ M->getAlignment(), false);
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumCpyToSet;
+ return true;
+ }
+
+ // The are two possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ MemDepResult DepInfo = MD->getDependency(M);
+ if (!DepInfo.isClobber())
return false;
- // If all checks passed, then we can transform these memcpy's
- const Type *ArgTys[3] = { M->getRawDest()->getType(),
- MDep->getRawSource()->getType(),
- M->getLength()->getType() };
- Function *MemCpyFun = Intrinsic::getDeclaration(
- M->getParent()->getParent()->getParent(),
- M->getIntrinsicID(), ArgTys, 3);
+ if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
+ return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
- Value *Args[5] = {
- M->getRawDest(), MDep->getRawSource(), M->getLength(),
- M->getAlignmentCst(), M->getVolatileCst()
- };
-
- CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
-
-
- // If C and M don't interfere, then this is a valid transformation. If they
- // did, this would mean that the two sources overlap, which would be bad.
- if (MD.getDependency(C) == dep) {
- MD.removeInstruction(M);
- M->eraseFromParent();
- ++NumMemCpyInstr;
- return true;
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), C)) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ return true;
+ }
}
- // Otherwise, there was no point in doing this, so we remove the call we
- // inserted and act like nothing happened.
- MD.removeInstruction(C);
- C->eraseFromParent();
return false;
}
@@ -726,15 +804,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
bool MemCpyOpt::processMemMove(MemMoveInst *M) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- // If the memmove is a constant size, use it for the alias query, this allows
- // us to optimize things like: memmove(P, P+64, 64);
- uint64_t MemMoveSize = ~0ULL;
- if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()))
- MemMoveSize = Len->getZExtValue();
-
// See if the pointers alias.
- if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) !=
- AliasAnalysis::NoAlias)
+ if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
return false;
DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
@@ -749,33 +820,107 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
- getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M);
+ MD->removeInstruction(M);
++NumMoveToCpy;
return true;
}
+/// processByValArgument - This is called on every byval argument in call sites.
+bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
+ if (TD == 0) return false;
+
+ // Find out what feeds this byval argument.
+ Value *ByValArg = CS.getArgument(ArgNo);
+ const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType();
+ uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
+ MemDepResult DepInfo =
+ MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
+ true, CS.getInstruction(),
+ CS.getInstruction()->getParent());
+ if (!DepInfo.isClobber())
+ return false;
+
+ // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
+ // a memcpy, see if we can byval from the source of the memcpy instead of the
+ // result.
+ MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
+ if (MDep == 0 || MDep->isVolatile() ||
+ ByValArg->stripPointerCasts() != MDep->getDest())
+ return false;
+
+ // The length of the memcpy must be larger or equal to the size of the byval.
+ ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+ if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
+ return false;
+
+ // Get the alignment of the byval. If it is greater than the memcpy, then we
+ // can't do the substitution. If the call doesn't specify the alignment, then
+ // it is some target specific value that we can't know.
+ unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+ if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign)
+ return false;
+
+ // Verify that the copied-from memory doesn't change in between the memcpy and
+ // the byval call.
+ // memcpy(a <- b)
+ // *b = 42;
+ // foo(*a)
+ // It would be invalid to transform the second memcpy into foo(*b).
+ //
+ // NOTE: This is conservative, it will stop on any read from the source loc,
+ // not just the defining memcpy.
+ MemDepResult SourceDep =
+ MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
+ false, CS.getInstruction(), MDep->getParent());
+ if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+ return false;
+
+ Value *TmpCast = MDep->getSource();
+ if (MDep->getSource()->getType() != ByValArg->getType())
+ TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
+ "tmpcast", CS.getInstruction());
+
+ DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
+ << " " << *MDep << "\n"
+ << " " << *CS.getInstruction() << "\n");
+
+ // Otherwise we're good! Update the byval argument.
+ CS.setArgument(ArgNo, TmpCast);
+ ++NumMemCpyInstr;
+ return true;
+}
-// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.
+/// iterateOnFunction - Executes one iteration of MemCpyOpt.
bool MemCpyOpt::iterateOnFunction(Function &F) {
bool MadeChange = false;
// Walk all instruction in the function.
for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE;) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
// Avoid invalidating the iterator.
Instruction *I = BI++;
+ bool RepeatInstruction = false;
+
if (StoreInst *SI = dyn_cast<StoreInst>(I))
MadeChange |= processStore(SI, BI);
+ else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
+ RepeatInstruction = processMemSet(M, BI);
else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
- MadeChange |= processMemCpy(M);
- else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
- if (processMemMove(M)) {
- --BI; // Reprocess the new memcpy.
- MadeChange = true;
- }
+ RepeatInstruction = processMemCpy(M);
+ else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
+ RepeatInstruction = processMemMove(M);
+ else if (CallSite CS = (Value*)I) {
+ for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+ if (CS.paramHasAttr(i+1, Attribute::ByVal))
+ MadeChange |= processByValArgument(CS, i);
+ }
+
+ // Reprocess the instruction if desired.
+ if (RepeatInstruction) {
+ if (BI != BB->begin()) --BI;
+ MadeChange = true;
}
}
}
@@ -788,14 +933,14 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
//
bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
+ TD = getAnalysisIfAvailable<TargetData>();
while (1) {
if (!iterateOnFunction(F))
break;
MadeChange = true;
}
+ MD = 0;
return MadeChange;
}
-
-
-
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index b8afcc1..e093b52 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -77,7 +77,9 @@ namespace {
bool MadeChange;
public:
static char ID; // Pass identification, replacement for typeid
- Reassociate() : FunctionPass(ID) {}
+ Reassociate() : FunctionPass(ID) {
+ initializeReassociatePass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
@@ -104,7 +106,7 @@ namespace {
char Reassociate::ID = 0;
INITIALIZE_PASS(Reassociate, "reassociate",
- "Reassociate expressions", false, false);
+ "Reassociate expressions", false, false)
// Public interface to the Reassociate pass
FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
@@ -238,6 +240,12 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {
RHS->setOperand(0, LHS);
I->setOperand(0, RHS);
+ // Conservatively clear all the optional flags, which may not hold
+ // after the reassociation.
+ I->clearSubclassOptionalData();
+ LHS->clearSubclassOptionalData();
+ RHS->clearSubclassOptionalData();
+
++NumLinear;
MadeChange = true;
DEBUG(dbgs() << "Linearized: " << *I << '\n');
@@ -339,6 +347,12 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
DEBUG(dbgs() << "RA: " << *I << '\n');
I->setOperand(0, Ops[i].Op);
I->setOperand(1, Ops[i+1].Op);
+
+ // Clear all the optional flags, which may not hold after the
+ // reassociation if the expression involved more than just this operation.
+ if (Ops.size() != 2)
+ I->clearSubclassOptionalData();
+
DEBUG(dbgs() << "TO: " << *I << '\n');
MadeChange = true;
++NumChanged;
@@ -354,6 +368,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
if (I->getOperand(1) != Ops[i].Op) {
DEBUG(dbgs() << "RA: " << *I << '\n');
I->setOperand(1, Ops[i].Op);
+
+ // Conservatively clear all the optional flags, which may not hold
+ // after the reassociation.
+ I->clearSubclassOptionalData();
+
DEBUG(dbgs() << "TO: " << *I << '\n');
MadeChange = true;
++NumChanged;
@@ -809,16 +828,23 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// RemoveFactorFromExpression on successive values to behave differently.
Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
SmallVector<Value*, 4> NewMulOps;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ for (unsigned i = 0; i != Ops.size(); ++i) {
// Only try to remove factors from expressions we're allowed to.
BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
continue;
if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
- NewMulOps.push_back(V);
- Ops.erase(Ops.begin()+i);
- --i; --e;
+ // The factorized operand may occur several times. Convert them all in
+ // one fell swoop.
+ for (unsigned j = Ops.size(); j != i;) {
+ --j;
+ if (Ops[j].Op == Ops[i].Op) {
+ NewMulOps.push_back(V);
+ Ops.erase(Ops.begin()+j);
+ }
+ }
+ --i;
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 506b72a..459bb06 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -36,7 +36,9 @@ STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
namespace {
struct RegToMem : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- RegToMem() : FunctionPass(ID) {}
+ RegToMem() : FunctionPass(ID) {
+ initializeRegToMemPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(BreakCriticalEdgesID);
@@ -59,9 +61,11 @@ namespace {
}
char RegToMem::ID = 0;
-INITIALIZE_PASS(RegToMem, "reg2mem", "Demote all values to stack slots",
- false, false);
-
+INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false)
bool RegToMem::runOnFunction(Function &F) {
if (F.isDeclaration())
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index 6115c05..c82e929 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -481,6 +481,19 @@ private:
}
}
+ /// InsertInOverdefinedPHIs - Insert an entry in the UsersOfOverdefinedPHIS
+ /// map for I and PN, but if one is there already, do not create another.
+ /// (Duplicate entries do not break anything directly, but can lead to
+ /// exponential growth of the table in rare cases.)
+ void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) {
+ std::multimap<PHINode*, Instruction*>::iterator J, E;
+ tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN);
+ for (; J != E; ++J)
+ if (J->second == I)
+ return;
+ UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I));
+ }
+
private:
friend class InstVisitor<SCCPSolver>;
@@ -973,9 +986,9 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
if (Result.isConstant()) {
markConstant(IV, &I, Result.getConstant());
// Remember that this instruction is virtually using the PHI node
- // operands.
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+ // operands.
+ InsertInOverdefinedPHIs(&I, PN1);
+ InsertInOverdefinedPHIs(&I, PN2);
return;
}
@@ -1056,8 +1069,8 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
markConstant(&I, Result.getConstant());
// Remember that this instruction is virtually using the PHI node
// operands.
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN1, &I));
- UsersOfOverdefinedPHIs.insert(std::make_pair(PN2, &I));
+ InsertInOverdefinedPHIs(&I, PN1);
+ InsertInOverdefinedPHIs(&I, PN2);
return;
}
@@ -1585,22 +1598,20 @@ namespace {
///
struct SCCP : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- SCCP() : FunctionPass(ID) {}
+ SCCP() : FunctionPass(ID) {
+ initializeSCCPPass(*PassRegistry::getPassRegistry());
+ }
// runOnFunction - Run the Sparse Conditional Constant Propagation
// algorithm, and return true if the function was modified.
//
bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- }
};
} // end anonymous namespace
char SCCP::ID = 0;
INITIALIZE_PASS(SCCP, "sccp",
- "Sparse Conditional Constant Propagation", false, false);
+ "Sparse Conditional Constant Propagation", false, false)
// createSCCPPass - This is the public interface to this file.
FunctionPass *llvm::createSCCPPass() {
@@ -1701,7 +1712,9 @@ namespace {
///
struct IPSCCP : public ModulePass {
static char ID;
- IPSCCP() : ModulePass(ID) {}
+ IPSCCP() : ModulePass(ID) {
+ initializeIPSCCPPass(*PassRegistry::getPassRegistry());
+ }
bool runOnModule(Module &M);
};
} // end anonymous namespace
@@ -1709,7 +1722,7 @@ namespace {
char IPSCCP::ID = 0;
INITIALIZE_PASS(IPSCCP, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
- false, false);
+ false, false)
// createIPSCCPPass - This is the public interface to this file.
ModulePass *llvm::createIPSCCPPass() {
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index cb03423..bf9ca6d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -7,12 +7,15 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the C bindings for libLLVMScalarOpts.a, which implements
-// several scalar transformations over the LLVM intermediate representation.
+// This file implements common infrastructure for libLLVMScalarOpts.a, which
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
//
//===----------------------------------------------------------------------===//
#include "llvm-c/Transforms/Scalar.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Target/TargetData.h"
@@ -20,6 +23,50 @@
using namespace llvm;
+/// initializeScalarOptsPasses - Initialize all passes linked into the
+/// ScalarOpts library.
+void llvm::initializeScalarOpts(PassRegistry &Registry) {
+ initializeADCEPass(Registry);
+ initializeBlockPlacementPass(Registry);
+ initializeCodeGenPreparePass(Registry);
+ initializeConstantPropagationPass(Registry);
+ initializeCorrelatedValuePropagationPass(Registry);
+ initializeDCEPass(Registry);
+ initializeDeadInstEliminationPass(Registry);
+ initializeDSEPass(Registry);
+ initializeGEPSplitterPass(Registry);
+ initializeGVNPass(Registry);
+ initializeEarlyCSEPass(Registry);
+ initializeIndVarSimplifyPass(Registry);
+ initializeJumpThreadingPass(Registry);
+ initializeLICMPass(Registry);
+ initializeLoopDeletionPass(Registry);
+ initializeLoopInstSimplifyPass(Registry);
+ initializeLoopRotatePass(Registry);
+ initializeLoopStrengthReducePass(Registry);
+ initializeLoopUnrollPass(Registry);
+ initializeLoopUnswitchPass(Registry);
+ initializeLoopIdiomRecognizePass(Registry);
+ initializeLowerAtomicPass(Registry);
+ initializeMemCpyOptPass(Registry);
+ initializeReassociatePass(Registry);
+ initializeRegToMemPass(Registry);
+ initializeSCCPPass(Registry);
+ initializeIPSCCPPass(Registry);
+ initializeSROA_DTPass(Registry);
+ initializeSROA_SSAUpPass(Registry);
+ initializeCFGSimplifyPassPass(Registry);
+ initializeSimplifyHalfPowrLibCallsPass(Registry);
+ initializeSimplifyLibCallsPass(Registry);
+ initializeSinkingPass(Registry);
+ initializeTailDupPass(Registry);
+ initializeTailCallElimPass(Registry);
+}
+
+void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
+ initializeScalarOpts(*unwrap(R));
+}
+
void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createAggressiveDCEPass());
}
@@ -56,10 +103,6 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopDeletionPass());
}
-void LLVMAddLoopIndexSplitPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopIndexSplitPass());
-}
-
void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopRotatePass());
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index fee317d..c3ca852 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -31,28 +31,34 @@
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
+STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- explicit SROA(signed T = -1) : FunctionPass(ID) {
+ SROA(int T, bool hasDT, char &ID)
+ : FunctionPass(ID), HasDomTree(hasDT) {
if (T == -1)
SRThreshold = 128;
else
@@ -64,17 +70,10 @@ namespace {
bool performScalarRepl(Function &F);
bool performPromotion(Function &F);
- // getAnalysisUsage - This pass does not require any passes, but we know it
- // will not alter the CFG, so say so.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>();
- AU.setPreservesCFG();
- }
-
private:
+ bool HasDomTree;
TargetData *TD;
-
+
/// DeadInsts - Keep track of instructions we have made dead, so that
/// we can remove them after we are done working.
SmallVector<Value*, 32> DeadInsts;
@@ -83,39 +82,61 @@ namespace {
/// information about the uses. All these fields are initialized to false
/// and set to true when something is learned.
struct AllocaInfo {
+ /// The alloca to promote.
+ AllocaInst *AI;
+
+ /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
+ /// looping and avoid redundant work.
+ SmallPtrSet<PHINode*, 8> CheckedPHIs;
+
/// isUnsafe - This is set to true if the alloca cannot be SROA'd.
bool isUnsafe : 1;
-
+
/// isMemCpySrc - This is true if this aggregate is memcpy'd from.
bool isMemCpySrc : 1;
/// isMemCpyDst - This is true if this aggregate is memcpy'd into.
bool isMemCpyDst : 1;
- AllocaInfo()
- : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {}
+ /// hasSubelementAccess - This is true if a subelement of the alloca is
+ /// ever accessed, or false if the alloca is only accessed with mem
+ /// intrinsics or load/store that only access the entire alloca at once.
+ bool hasSubelementAccess : 1;
+
+ /// hasALoadOrStore - This is true if there are any loads or stores to it.
+ /// The alloca may just be accessed with memcpy, for example, which would
+ /// not set this.
+ bool hasALoadOrStore : 1;
+
+ explicit AllocaInfo(AllocaInst *ai)
+ : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
+ hasSubelementAccess(false), hasALoadOrStore(false) {}
};
-
+
unsigned SRThreshold;
- void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
+ void MarkUnsafe(AllocaInfo &I, Instruction *User) {
+ I.isUnsafe = true;
+ DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
+ }
bool isSafeAllocaToScalarRepl(AllocaInst *AI);
- void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
- AllocaInfo &Info);
- void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset,
- AllocaInfo &Info);
- void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
- const Type *MemOpType, bool isStore, AllocaInfo &Info);
+ void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
+ void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
+ AllocaInfo &Info);
+ void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
+ void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
+ const Type *MemOpType, bool isStore, AllocaInfo &Info,
+ Instruction *TheAccess, bool AllowWholeAccess);
bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
const Type *&IdxTy);
-
- void DoScalarReplacement(AllocaInst *AI,
+
+ void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
void DeleteDeadInstructions();
-
+
void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
@@ -129,18 +150,63 @@ namespace {
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
-
+
static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
};
+
+ // SROA_DT - SROA that uses DominatorTree.
+ struct SROA_DT : public SROA {
+ static char ID;
+ public:
+ SROA_DT(int T = -1) : SROA(T, true, ID) {
+ initializeSROA_DTPass(*PassRegistry::getPassRegistry());
+ }
+
+ // getAnalysisUsage - This pass does not require any passes, but we know it
+ // will not alter the CFG, so say so.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+ }
+ };
+
+ // SROA_SSAUp - SROA that uses SSAUpdater.
+ struct SROA_SSAUp : public SROA {
+ static char ID;
+ public:
+ SROA_SSAUp(int T = -1) : SROA(T, false, ID) {
+ initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
+ }
+
+ // getAnalysisUsage - This pass does not require any passes, but we know it
+ // will not alter the CFG, so say so.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+ };
+
}
-char SROA::ID = 0;
-INITIALIZE_PASS(SROA, "scalarrepl",
- "Scalar Replacement of Aggregates", false, false);
+char SROA_DT::ID = 0;
+char SROA_SSAUp::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
+ "Scalar Replacement of Aggregates (DT)", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
+ "Scalar Replacement of Aggregates (DT)", false, false)
+
+INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
+ "Scalar Replacement of Aggregates (SSAUp)", false, false)
+INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
+ "Scalar Replacement of Aggregates (SSAUp)", false, false)
// Public interface to the ScalarReplAggregates pass
-FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
- return new SROA(Threshold);
+FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
+ bool UseDomTree) {
+ if (UseDomTree)
+ return new SROA_DT(Threshold);
+ return new SROA_SSAUp(Threshold);
}
@@ -156,16 +222,16 @@ class ConvertToScalarInfo {
/// AllocaSize - The size of the alloca being considered.
unsigned AllocaSize;
const TargetData &TD;
-
+
/// IsNotTrivial - This is set to true if there is some access to the object
/// which means that mem2reg can't promote it.
bool IsNotTrivial;
-
+
/// VectorTy - This tracks the type that we should promote the vector to if
/// it is possible to turn it into a vector. This starts out null, and if it
/// isn't possible to turn into a vector type, it gets set to VoidTy.
const Type *VectorTy;
-
+
/// HadAVector - True if there is at least one vector access to the alloca.
/// We don't want to turn random arrays into vectors and use vector element
/// insert/extract, but if there are element accesses to something that is
@@ -179,14 +245,14 @@ public:
VectorTy = 0;
HadAVector = false;
}
-
+
AllocaInst *TryConvert(AllocaInst *AI);
-
+
private:
bool CanConvertToScalar(Value *V, uint64_t Offset);
void MergeInType(const Type *In, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
-
+
Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
uint64_t Offset, IRBuilder<> &Builder);
Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
@@ -195,26 +261,6 @@ private:
} // end anonymous namespace.
-/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't
-/// allowed to form. We do this to avoid MMX types, which is a complete hack,
-/// but is required until the backend is fixed.
-static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) {
- StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple());
- if (!Triple.startswith("i386") &&
- !Triple.startswith("x86_64"))
- return false;
-
- // Reject all the MMX vector types.
- switch (VTy->getNumElements()) {
- default: return false;
- case 1: return VTy->getElementType()->isIntegerTy(64);
- case 2: return VTy->getElementType()->isIntegerTy(32);
- case 4: return VTy->getElementType()->isIntegerTy(16);
- case 8: return VTy->getElementType()->isIntegerTy(8);
- }
-}
-
-
/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
/// alloca if possible or null if not.
@@ -223,7 +269,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// out.
if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
return 0;
-
+
// If we were able to find a vector type that can handle this with
// insert/extract elements, and if there was at least one use that had
// a vector type, promote this to a vector. We don't want to promote
@@ -231,8 +277,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
const Type *NewTy;
- if (VectorTy && VectorTy->isVectorTy() && HadAVector &&
- !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) {
+ if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
@@ -263,7 +308,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
// nothing to be done.
if (VectorTy && VectorTy->isVoidTy())
return;
-
+
// If this could be contributing to a vector, analyze it.
// If the In type is a vector that is the same size as the alloca, see if it
@@ -271,7 +316,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
// Remember if we saw a vector type.
HadAVector = true;
-
+
if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
// If we're storing/loading a vector of the right size, allow it as a
// vector. If this the first vector we see, remember the type so that
@@ -290,7 +335,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
// compatible with it.
unsigned EltSize = In->getPrimitiveSizeInBits()/8;
if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
- (VectorTy == 0 ||
+ (VectorTy == 0 ||
cast<VectorType>(VectorTy)->getElementType()
->getPrimitiveSizeInBits()/8 == EltSize)) {
if (VectorTy == 0)
@@ -298,7 +343,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
return;
}
}
-
+
// Otherwise, we have a case that we can't handle with an optimized vector
// form. We can still turn this into a large integer.
VectorTy = Type::getVoidTy(In->getContext());
@@ -316,22 +361,28 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
-
+
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// Don't break volatile loads.
if (LI->isVolatile())
return false;
+ // Don't touch MMX operations.
+ if (LI->getType()->isX86_MMXTy())
+ return false;
MergeInType(LI->getType(), Offset);
continue;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Storing the pointer, not into the value?
if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+ // Don't touch MMX operations.
+ if (SI->getOperand(0)->getType()->isX86_MMXTy())
+ return false;
MergeInType(SI->getOperand(0)->getType(), Offset);
continue;
}
-
+
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
IsNotTrivial = true; // Can't be mem2reg'd.
if (!CanConvertToScalar(BCI, Offset))
@@ -343,7 +394,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// If this is a GEP with a variable indices, we can't handle it.
if (!GEP->hasAllConstantIndices())
return false;
-
+
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
@@ -372,15 +423,15 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
return false;
-
+
IsNotTrivial = true; // Can't be mem2reg'd.
continue;
}
-
+
// Otherwise, we cannot handle this!
return false;
}
-
+
return true;
}
@@ -411,9 +462,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
GEP->eraseFromParent();
continue;
}
-
- IRBuilder<> Builder(User->getParent(), User);
-
+
+ IRBuilder<> Builder(User);
+
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// The load is a bit extract from NewAI shifted right by Offset bits.
Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
@@ -423,7 +474,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
LI->eraseFromParent();
continue;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
assert(SI->getOperand(0) != Ptr && "Consistency error!");
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
@@ -431,14 +482,14 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
Builder);
Builder.CreateStore(New, NewAI);
SI->eraseFromParent();
-
+
// If the load we just inserted is now dead, then the inserted store
// overwrote the entire thing.
if (Old->use_empty())
Old->eraseFromParent();
continue;
}
-
+
// If this is a constant sized memset of a constant value (e.g. 0) we can
// transform it into a store of the expanded constant value.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
@@ -446,7 +497,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
if (NumBytes != 0) {
unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
-
+
// Compute the value replicated the right number of times.
APInt APVal(NumBytes*8, Val);
@@ -454,17 +505,17 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (Val)
for (unsigned i = 1; i != NumBytes; ++i)
APVal |= APVal << 8;
-
+
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
Old, Offset, Builder);
Builder.CreateStore(New, NewAI);
-
+
// If the load we just inserted is now dead, then the memset overwrote
// the entire thing.
if (Old->use_empty())
- Old->eraseFromParent();
+ Old->eraseFromParent();
}
MSI->eraseFromParent();
continue;
@@ -474,29 +525,42 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
assert(Offset == 0 && "must be store to start of alloca");
-
+
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
// as appropriate.
- AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
-
- if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
+ AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
+
+ if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
// Dest must be OrigAI, change this to be a load from the original
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
Value *SrcPtr = MTI->getSource();
- SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
-
+ const PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
+ const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+ AIPTy = PointerType::get(AIPTy->getElementType(),
+ SPTy->getAddressSpace());
+ }
+ SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
+
LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
SrcVal->setAlignment(MTI->getAlignment());
Builder.CreateStore(SrcVal, NewAI);
- } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
+ } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
// Src must be OrigAI, change this to be a load from NewAI then a store
// through the original dest pointer (bitcasted).
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
- Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
+ const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
+ const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+ if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+ AIPTy = PointerType::get(AIPTy->getElementType(),
+ DPTy->getAddressSpace());
+ }
+ Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
+
StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
NewStore->setAlignment(MTI->getAlignment());
} else {
@@ -506,7 +570,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
MTI->eraseFromParent();
continue;
}
-
+
llvm_unreachable("Unsupported operation!");
}
}
@@ -548,7 +612,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
V = Builder.CreateBitCast(V, ToType, "tmp");
return V;
}
-
+
// If ToType is a first class aggregate, extract out each of the pieces and
// use insertvalue's to form the FCA.
if (const StructType *ST = dyn_cast<StructType>(ToType)) {
@@ -562,7 +626,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
}
return Res;
}
-
+
if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
@@ -598,7 +662,7 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
ConstantInt::get(FromVal->getType(),
ShAmt), "tmp");
else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
- FromVal = Builder.CreateShl(FromVal,
+ FromVal = Builder.CreateShl(FromVal,
ConstantInt::get(FromVal->getType(),
-ShAmt), "tmp");
@@ -606,11 +670,11 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
if (LIBitWidth < NTy->getBitWidth())
FromVal =
- Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
+ Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
LIBitWidth), "tmp");
else if (LIBitWidth > NTy->getBitWidth())
FromVal =
- Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
+ Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
LIBitWidth), "tmp");
// If the result is an integer, this is a trunc or bitcast.
@@ -647,7 +711,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
-
+
// Changing the whole vector with memset or with an access of a different
// vector type?
if (ValSize == VecSize)
@@ -657,28 +721,28 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// Must be an element insertion.
unsigned Elt = Offset/EltSize;
-
+
if (SV->getType() != VTy->getElementType())
SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
-
- SV = Builder.CreateInsertElement(Old, SV,
+
+ SV = Builder.CreateInsertElement(Old, SV,
ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
"tmp");
return SV;
}
-
+
// If SV is a first-class aggregate value, insert each value recursively.
if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
const StructLayout &Layout = *TD.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
- Old = ConvertScalar_InsertValue(Elt, Old,
+ Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
Builder);
}
return Old;
}
-
+
if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
@@ -778,16 +842,298 @@ bool SROA::runOnFunction(Function &F) {
return Changed;
}
+namespace {
+class AllocaPromoter : public LoadAndStorePromoter {
+ AllocaInst *AI;
+public:
+ AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S)
+ : LoadAndStorePromoter(Insts, S), AI(0) {}
+
+ void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
+ // Remember which alloca we're promoting (for isInstInList).
+ this->AI = AI;
+ LoadAndStorePromoter::run(Insts);
+ AI->eraseFromParent();
+ }
+
+ virtual bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts) const {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getOperand(0) == AI;
+ return cast<StoreInst>(I)->getPointerOperand() == AI;
+ }
+};
+} // end anon namespace
+
+/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers and then
+/// select between the result, allowing the load of the alloca to be promoted.
+/// From this:
+/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// %V2 = load i32* %Other
+/// %V = select i1 %cond, i32 %V1, i32 %V2
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
+ bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
+ bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
+
+ for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || LI->isVolatile()) return false;
+
+ // Both operands to the select need to be dereferencable, either absolutely
+ // (e.g. allocas) or at this point because we can see other accesses to it.
+ if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
+ LI->getAlignment(), TD))
+ return false;
+ if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
+ LI->getAlignment(), TD))
+ return false;
+ }
+
+ return true;
+}
+
+/// isSafePHIToSpeculate - PHI instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers in the pred
+/// blocks and then PHI the results, allowing the load of the alloca to be
+/// promoted.
+/// From this:
+/// %P2 = phi [i32* %Alloca, i32* %Other]
+/// %V = load i32* %P2
+/// to:
+/// %V1 = load i32* %Alloca -> will be mem2reg'd
+/// ...
+/// %V2 = load i32* %Other
+/// ...
+/// %V = phi [i32 %V1, i32 %V2]
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
+ // For now, we can only do this promotion if the load is in the same block as
+ // the PHI, and if there are no stores between the phi and load.
+ // TODO: Allow recursive phi users.
+ // TODO: Allow stores.
+ BasicBlock *BB = PN->getParent();
+ unsigned MaxAlign = 0;
+ for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+ UI != UE; ++UI) {
+ LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ if (LI == 0 || LI->isVolatile()) return false;
+
+ // For now we only allow loads in the same block as the PHI. This is a
+ // common case that happens when instcombine merges two loads through a PHI.
+ if (LI->getParent() != BB) return false;
+
+ // Ensure that there are no instructions between the PHI and the load that
+ // could store.
+ for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+ if (BBI->mayWriteToMemory())
+ return false;
+
+ MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ }
+
+ // Okay, we know that we have one or more loads in the same block as the PHI.
+ // We can transform this if it is safe to push the loads into the predecessor
+ // blocks. The only thing to watch out for is that we can't put a possibly
+ // trapping load in the predecessor if it is a critical edge.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+
+ // If the predecessor has a single successor, then the edge isn't critical.
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ Value *InVal = PN->getIncomingValue(i);
+
+ // If the InVal is an invoke in the pred, we can't put a load on the edge.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+ if (II->getParent() == Pred)
+ return false;
+
+ // If this pointer is always safe to load, or if we can prove that there is
+ // already a load in the block, then we can move the load to the pred block.
+ if (InVal->isDereferenceablePointer() ||
+ isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+
+/// tryToMakeAllocaBePromotable - This returns true if the alloca only has
+/// direct (non-volatile) loads and stores to it. If the alloca is close but
+/// not quite there, this will transform the code to allow promotion. As such,
+/// it is a non-pure predicate.
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
+ SetVector<Instruction*, SmallVector<Instruction*, 4>,
+ SmallPtrSet<Instruction*, 4> > InstsToRewrite;
+
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) {
+ User *U = *UI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LI->isVolatile())
+ return false;
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI || SI->isVolatile())
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ continue;
+ }
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
+ // If the condition being selected on is a constant, fold the select, yes
+ // this does (rarely) happen early on.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
+ Value *Result = SI->getOperand(1+CI->isZero());
+ SI->replaceAllUsesWith(Result);
+ SI->eraseFromParent();
+
+ // This is very rare and we just scrambled the use list of AI, start
+ // over completely.
+ return tryToMakeAllocaBePromotable(AI, TD);
+ }
+
+ // If it is safe to turn "load (select c, AI, ptr)" into a select of two
+ // loads, then we can transform this by rewriting the select.
+ if (!isSafeSelectToSpeculate(SI, TD))
+ return false;
+
+ InstsToRewrite.insert(SI);
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(U)) {
+ if (PN->use_empty()) { // Dead PHIs can be stripped.
+ InstsToRewrite.insert(PN);
+ continue;
+ }
+
+ // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
+ // in the pred blocks, then we can transform this by rewriting the PHI.
+ if (!isSafePHIToSpeculate(PN, TD))
+ return false;
+
+ InstsToRewrite.insert(PN);
+ continue;
+ }
+
+ return false;
+ }
+
+ // If there are no instructions to rewrite, then all uses are load/stores and
+ // we're done!
+ if (InstsToRewrite.empty())
+ return true;
+
+ // If we have instructions that need to be rewritten for this to be promotable
+ // take care of it now.
+ for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
+ // Selects in InstsToRewrite only have load uses. Rewrite each as two
+ // loads with a new select.
+ while (!SI->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(SI->use_back());
+
+ IRBuilder<> Builder(LI);
+ LoadInst *TrueLoad =
+ Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
+ LoadInst *FalseLoad =
+ Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t");
+
+ // Transfer alignment and TBAA info if present.
+ TrueLoad->setAlignment(LI->getAlignment());
+ FalseLoad->setAlignment(LI->getAlignment());
+ if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+ TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+ }
+
+ Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
+ V->takeName(LI);
+ LI->replaceAllUsesWith(V);
+ LI->eraseFromParent();
+ }
+
+ // Now that all the loads are gone, the select is gone too.
+ SI->eraseFromParent();
+ continue;
+ }
+
+ // Otherwise, we have a PHI node which allows us to push the loads into the
+ // predecessors.
+ PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
+ if (PN->use_empty()) {
+ PN->eraseFromParent();
+ continue;
+ }
+
+ const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
+ PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN);
+
+ // Get the TBAA tag and alignment to use from one of the loads. It doesn't
+ // matter which one we get and if any differ, it doesn't matter.
+ LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
+ MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+ unsigned Align = SomeLoad->getAlignment();
+
+ // Rewrite all loads of the PN to use the new PHI.
+ while (!PN->use_empty()) {
+ LoadInst *LI = cast<LoadInst>(PN->use_back());
+ LI->replaceAllUsesWith(NewPN);
+ LI->eraseFromParent();
+ }
+
+ // Inject loads into all of the pred blocks. Keep track of which blocks we
+ // insert them into in case we have multiple edges from the same block.
+ DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ LoadInst *&Load = InsertedLoads[Pred];
+ if (Load == 0) {
+ Load = new LoadInst(PN->getIncomingValue(i),
+ PN->getName() + "." + Pred->getName(),
+ Pred->getTerminator());
+ Load->setAlignment(Align);
+ if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+ }
+
+ NewPN->addIncoming(Load, Pred);
+ }
+
+ PN->eraseFromParent();
+ }
+
+ ++NumAdjusted;
+ return true;
+}
+
bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
- DominatorTree &DT = getAnalysis<DominatorTree>();
- DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
+ DominatorTree *DT = 0;
+ if (HasDomTree)
+ DT = &getAnalysis<DominatorTree>();
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
bool Changed = false;
-
+ SmallVector<Instruction*, 64> Insts;
while (1) {
Allocas.clear();
@@ -795,12 +1141,27 @@ bool SROA::performPromotion(Function &F) {
// the entry node
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
- if (isAllocaPromotable(AI))
+ if (tryToMakeAllocaBePromotable(AI, TD))
Allocas.push_back(AI);
if (Allocas.empty()) break;
- PromoteMemToReg(Allocas, DT, DF);
+ if (HasDomTree)
+ PromoteMemToReg(Allocas, *DT);
+ else {
+ SSAUpdater SSA;
+ for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+ AllocaInst *AI = Allocas[i];
+
+ // Build list of instructions to promote.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E; ++UI)
+ Insts.push_back(cast<Instruction>(*UI));
+
+ AllocaPromoter(Insts, SSA).run(AI, Insts);
+ Insts.clear();
+ }
+ }
NumPromoted += Allocas.size();
Changed = true;
}
@@ -842,7 +1203,7 @@ bool SROA::performScalarRepl(Function &F) {
while (!WorkList.empty()) {
AllocaInst *AI = WorkList.back();
WorkList.pop_back();
-
+
// Handle dead allocas trivially. These can be formed by SROA'ing arrays
// with unused elements.
if (AI->use_empty()) {
@@ -854,7 +1215,7 @@ bool SROA::performScalarRepl(Function &F) {
// If this alloca is impossible for us to promote, reject it early.
if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
continue;
-
+
// Check to see if this allocation is only modified by a memcpy/memmove from
// a constant global. If this is the case, we can change all users to use
// the constant global instead. This is commonly produced by the CFE by
@@ -871,7 +1232,7 @@ bool SROA::performScalarRepl(Function &F) {
Changed = true;
continue;
}
-
+
// Check to see if we can perform the core SROA transformation. We cannot
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
@@ -880,10 +1241,10 @@ bool SROA::performScalarRepl(Function &F) {
// Do not promote [0 x %struct].
if (AllocaSize == 0) continue;
-
+
// Do not promote any struct whose size is too big.
if (AllocaSize > SRThreshold) continue;
-
+
// If the alloca looks like a good candidate for scalar replacement, and if
// all its users can be transformed, then split up the aggregate into its
// separate elements.
@@ -906,8 +1267,8 @@ bool SROA::performScalarRepl(Function &F) {
++NumConverted;
Changed = true;
continue;
- }
-
+ }
+
// Otherwise, couldn't process this alloca.
}
@@ -916,14 +1277,14 @@ bool SROA::performScalarRepl(Function &F) {
/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
/// predicate, do SROA now.
-void SROA::DoScalarReplacement(AllocaInst *AI,
+void SROA::DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList) {
DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
SmallVector<AllocaInst*, 32> ElementAllocas;
if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
ElementAllocas.reserve(ST->getNumContainedTypes());
for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+ AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
ElementAllocas.push_back(NA);
@@ -971,48 +1332,106 @@ void SROA::DeleteDeadInstructions() {
I->eraseFromParent();
}
}
-
+
/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
/// performing scalar replacement of alloca AI. The results are flagged in
/// the Info parameter. Offset indicates the position within AI that is
/// referenced by this instruction.
-void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
AllocaInfo &Info) {
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
- isSafeForScalarRepl(BC, AI, Offset, Info);
+ isSafeForScalarRepl(BC, Offset, Info);
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
uint64_t GEPOffset = Offset;
- isSafeGEP(GEPI, AI, GEPOffset, Info);
+ isSafeGEP(GEPI, GEPOffset, Info);
if (!Info.isUnsafe)
- isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
+ isSafeForScalarRepl(GEPI, GEPOffset, Info);
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
- if (Length)
- isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == 0, Info);
- else
- MarkUnsafe(Info);
+ if (Length == 0)
+ return MarkUnsafe(Info, User);
+ isSafeMemAccess(Offset, Length->getZExtValue(), 0,
+ UI.getOperandNo() == 0, Info, MI,
+ true /*AllowWholeAccess*/);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (LI->isVolatile())
+ return MarkUnsafe(Info, User);
+ const Type *LIType = LI->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info, LI, true /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Store is ok if storing INTO the pointer, not storing the pointer
+ if (SI->isVolatile() || SI->getOperand(0) == I)
+ return MarkUnsafe(Info, User);
+
+ const Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info, SI, true /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+ } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+ isSafePHISelectUseForScalarRepl(User, Offset, Info);
+ } else {
+ return MarkUnsafe(Info, User);
+ }
+ if (Info.isUnsafe) return;
+ }
+}
+
+
+/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
+/// derived from the alloca, we can often still split the alloca into elements.
+/// This is useful if we have a large alloca where one element is phi'd
+/// together somewhere: we can SRoA and promote all the other elements even if
+/// we end up not being able to promote this one.
+///
+/// All we require is that the uses of the PHI do not index into other parts of
+/// the alloca. The most important use case for this is single load and stores
+/// that are PHI'd together, which can happen due to code sinking.
+void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
+ AllocaInfo &Info) {
+ // If we've already checked this PHI, don't do it again.
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ if (!Info.CheckedPHIs.insert(PN))
+ return;
+
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ isSafePHISelectUseForScalarRepl(BC, Offset, Info);
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ // Only allow "bitcast" GEPs for simplicity. We could generalize this,
+ // but would have to prove that we're staying inside of an element being
+ // promoted.
+ if (!GEPI->hasAllZeroIndices())
+ return MarkUnsafe(Info, User);
+ isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- if (!LI->isVolatile()) {
- const Type *LIType = LI->getType();
- isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType),
- LIType, false, Info);
- } else
- MarkUnsafe(Info);
+ if (LI->isVolatile())
+ return MarkUnsafe(Info, User);
+ const Type *LIType = LI->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ LIType, false, Info, LI, false /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Store is ok if storing INTO the pointer, not storing the pointer
- if (!SI->isVolatile() && SI->getOperand(0) != I) {
- const Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType),
- SIType, true, Info);
- } else
- MarkUnsafe(Info);
+ if (SI->isVolatile() || SI->getOperand(0) == I)
+ return MarkUnsafe(Info, User);
+
+ const Type *SIType = SI->getOperand(0)->getType();
+ isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ SIType, true, Info, SI, false /*AllowWholeAccess*/);
+ Info.hasALoadOrStore = true;
+ } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+ isSafePHISelectUseForScalarRepl(User, Offset, Info);
} else {
- DEBUG(errs() << " Transformation preventing inst: " << *User << '\n');
- MarkUnsafe(Info);
+ return MarkUnsafe(Info, User);
}
if (Info.isUnsafe) return;
}
@@ -1023,7 +1442,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
/// references, and when the resulting offset corresponds to an element within
/// the alloca type. The results are flagged in the Info parameter. Upon
/// return, Offset is adjusted as specified by the GEP indices.
-void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
+void SROA::isSafeGEP(GetElementPtrInst *GEPI,
uint64_t &Offset, AllocaInfo &Info) {
gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
if (GEPIt == E)
@@ -1038,7 +1457,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
if (!IdxVal)
- return MarkUnsafe(Info);
+ return MarkUnsafe(Info, GEPI);
}
// Compute the offset due to this GEP and check if the alloca has a
@@ -1046,40 +1465,92 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
&Indices[0], Indices.size());
- if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0))
- MarkUnsafe(Info);
+ if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
+ MarkUnsafe(Info, GEPI);
+}
+
+/// isHomogeneousAggregate - Check if type T is a struct or array containing
+/// elements of the same type (which is always true for arrays). If so,
+/// return true with NumElts and EltTy set to the number of elements and the
+/// element type, respectively.
+static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
+ const Type *&EltTy) {
+ if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+ NumElts = AT->getNumElements();
+ EltTy = (NumElts == 0 ? 0 : AT->getElementType());
+ return true;
+ }
+ if (const StructType *ST = dyn_cast<StructType>(T)) {
+ NumElts = ST->getNumContainedTypes();
+ EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
+ for (unsigned n = 1; n < NumElts; ++n) {
+ if (ST->getContainedType(n) != EltTy)
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
+/// "homogeneous" aggregates with the same element type and number of elements.
+static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
+ if (T1 == T2)
+ return true;
+
+ unsigned NumElts1, NumElts2;
+ const Type *EltTy1, *EltTy2;
+ if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
+ isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
+ NumElts1 == NumElts2 &&
+ EltTy1 == EltTy2)
+ return true;
+
+ return false;
}
/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
/// alloca or has an offset and size that corresponds to a component element
/// within it. The offset checked here may have been formed from a GEP with a
/// pointer bitcasted to a different type.
-void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
+///
+/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
+/// unit. If false, it only allows accesses known to be in a single element.
+void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
const Type *MemOpType, bool isStore,
- AllocaInfo &Info) {
+ AllocaInfo &Info, Instruction *TheAccess,
+ bool AllowWholeAccess) {
// Check if this is a load/store of the entire alloca.
- if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) {
- bool UsesAggregateType = (MemOpType == AI->getAllocatedType());
- // This is safe for MemIntrinsics (where MemOpType is 0), integer types
- // (which are essentially the same as the MemIntrinsics, especially with
- // regard to copying padding between elements), or references using the
- // aggregate type of the alloca.
- if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) {
- if (!UsesAggregateType) {
- if (isStore)
- Info.isMemCpyDst = true;
- else
- Info.isMemCpySrc = true;
- }
+ if (Offset == 0 && AllowWholeAccess &&
+ MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
+ // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
+ // loads/stores (which are essentially the same as the MemIntrinsics with
+ // regard to copying padding between elements). But, if an alloca is
+ // flagged as both a source and destination of such operations, we'll need
+ // to check later for padding between elements.
+ if (!MemOpType || MemOpType->isIntegerTy()) {
+ if (isStore)
+ Info.isMemCpyDst = true;
+ else
+ Info.isMemCpySrc = true;
+ return;
+ }
+ // This is also safe for references using a type that is compatible with
+ // the type of the alloca, so that loads/stores can be rewritten using
+ // insertvalue/extractvalue.
+ if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) {
+ Info.hasSubelementAccess = true;
return;
}
}
// Check if the offset/size correspond to a component within the alloca type.
- const Type *T = AI->getAllocatedType();
- if (TypeHasComponent(T, Offset, MemSize))
+ const Type *T = Info.AI->getAllocatedType();
+ if (TypeHasComponent(T, Offset, MemSize)) {
+ Info.hasSubelementAccess = true;
return;
+ }
- return MarkUnsafe(Info);
+ return MarkUnsafe(Info, TheAccess);
}
/// TypeHasComponent - Return true if T has a component type with the
@@ -1116,14 +1587,21 @@ bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
/// instruction.
void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts) {
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI++);
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
RewriteBitCast(BC, AI, Offset, NewElts);
- } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ continue;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
RewriteGEP(GEPI, AI, Offset, NewElts);
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+ continue;
+ }
+
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
uint64_t MemSize = Length->getZExtValue();
if (Offset == 0 &&
@@ -1131,9 +1609,13 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
// Otherwise the intrinsic can only touch a single element and the
// address operand will be updated, so nothing else needs to be done.
- } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ continue;
+ }
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
const Type *LIType = LI->getType();
- if (LIType == AI->getAllocatedType()) {
+
+ if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
// Replace:
// %res = load { i32, i32 }* %alloc
// with:
@@ -1155,10 +1637,13 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
// If this is a load of the entire alloca to an integer, rewrite it.
RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
}
- } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
Value *Val = SI->getOperand(0);
const Type *SIType = Val->getType();
- if (SIType == AI->getAllocatedType()) {
+ if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
// Replace:
// store { i32, i32 } %val, { i32, i32 }* %alloc
// with:
@@ -1178,6 +1663,26 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
// If this is a store of the entire alloca from an integer, rewrite it.
RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
}
+ continue;
+ }
+
+ if (isa<SelectInst>(User) || isa<PHINode>(User)) {
+ // If we have a PHI user of the alloca itself (as opposed to a GEP or
+ // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to
+ // the new pointer.
+ if (!isa<AllocaInst>(I)) continue;
+
+ assert(Offset == 0 && NewElts[0] &&
+ "Direct alloca use should have a zero offset");
+
+ // If we have a use of the alloca, we know the derived uses will be
+ // utilizing just the first element of the scalarized result. Insert a
+ // bitcast of the first alloca before the user as required.
+ AllocaInst *NewAI = NewElts[0];
+ BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
+ NewAI->moveBefore(BCI);
+ TheUse = BCI;
+ continue;
}
}
}
@@ -1305,7 +1810,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// function is only called for mem intrinsics that access the whole
// aggregate, so non-zero GEPs are not an issue here.)
OtherPtr = OtherPtr->stripPointerCasts();
-
+
// Copying the alloca to itself is a no-op: just delete it.
if (OtherPtr == AI || OtherPtr == NewElts[0]) {
// This code will run twice for a no-op memcpy -- once for each operand.
@@ -1316,28 +1821,26 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
DeadInsts.push_back(MI);
return;
}
-
+
// If the pointer is not the right type, insert a bitcast to the right
// type.
const Type *NewTy =
PointerType::get(AI->getType()->getElementType(), AddrSpace);
-
+
if (OtherPtr->getType() != NewTy)
OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
}
-
+
// Process each element of the aggregate.
- Value *TheFn = MI->getCalledValue();
- const Type *BytePtrTy = MI->getRawDest()->getType();
bool SROADest = MI->getRawDest() == Inst;
-
+
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// If this is a memcpy/memmove, emit a GEP of the other element address.
Value *OtherElt = 0;
unsigned OtherEltAlign = MemAlignment;
-
+
if (OtherPtr) {
Value *Idx[2] = { Zero,
ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
@@ -1353,7 +1856,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
EltOffset = TD->getTypeAllocSize(EltTy)*i;
}
-
+
// The alignment of the other pointer is the guaranteed alignment of the
// element, which is affected by both the known alignment of the whole
// mem intrinsic and the alignment of the element. If the alignment of
@@ -1361,10 +1864,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// known alignment is just 4 bytes.
OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
}
-
+
Value *EltPtr = NewElts[i];
const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
-
+
// If we got down to a scalar, insert a load or store as appropriate.
if (EltTy->isSingleValueType()) {
if (isa<MemTransferInst>(MI)) {
@@ -1380,7 +1883,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
continue;
}
assert(isa<MemSetInst>(MI));
-
+
// If the stored element is zero (common case), just store a null
// constant.
Constant *StoreVal;
@@ -1400,7 +1903,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
TotalVal = TotalVal.shl(8);
TotalVal |= OneVal;
}
-
+
// Convert the integer value to the appropriate type.
StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
if (ValTy->isPointerTy())
@@ -1408,12 +1911,12 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
else if (ValTy->isFloatingPointTy())
StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
assert(StoreVal->getType() == ValTy && "Type mismatch!");
-
+
// If the requested value was a vector constant, create it.
if (EltTy != ValTy) {
unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
- StoreVal = ConstantVector::get(&Elts[0], NumElts);
+ StoreVal = ConstantVector::get(Elts);
}
}
new StoreInst(StoreVal, EltPtr, MI);
@@ -1422,55 +1925,24 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// Otherwise, if we're storing a byte variable, use a memset call for
// this element.
}
-
- // Cast the element pointer to BytePtrTy.
- if (EltPtr->getType() != BytePtrTy)
- EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
-
- // Cast the other pointer (if we have one) to BytePtrTy.
- if (OtherElt && OtherElt->getType() != BytePtrTy) {
- // Preserve address space of OtherElt
- const PointerType* OtherPTy = cast<PointerType>(OtherElt->getType());
- const PointerType* PTy = cast<PointerType>(BytePtrTy);
- if (OtherPTy->getElementType() != PTy->getElementType()) {
- Type *NewOtherPTy = PointerType::get(PTy->getElementType(),
- OtherPTy->getAddressSpace());
- OtherElt = new BitCastInst(OtherElt, NewOtherPTy,
- OtherElt->getNameStr(), MI);
- }
- }
-
+
unsigned EltSize = TD->getTypeAllocSize(EltTy);
-
+
+ IRBuilder<> Builder(MI);
+
// Finally, insert the meminst for this element.
- if (isa<MemTransferInst>(MI)) {
- Value *Ops[] = {
- SROADest ? EltPtr : OtherElt, // Dest ptr
- SROADest ? OtherElt : EltPtr, // Src ptr
- ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
- // Align
- ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
- MI->getVolatileCst()
- };
- // In case we fold the address space overloaded memcpy of A to B
- // with memcpy of B to C, change the function to be a memcpy of A to C.
- const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(),
- Ops[2]->getType() };
- Module *M = MI->getParent()->getParent()->getParent();
- TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3);
- CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
+ if (isa<MemSetInst>(MI)) {
+ Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
+ MI->isVolatile());
} else {
- assert(isa<MemSetInst>(MI));
- Value *Ops[] = {
- EltPtr, MI->getArgOperand(1), // Dest, Value,
- ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
- Zero, // Align
- ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
- };
- const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };
- Module *M = MI->getParent()->getParent()->getParent();
- TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
- CallInst::Create(TheFn, Ops, Ops + 5, "", MI);
+ assert(isa<MemTransferInst>(MI));
+ Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr
+ Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr
+
+ if (isa<MemCpyInst>(MI))
+ Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
+ else
+ Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
}
}
DeadInsts.push_back(MI);
@@ -1486,12 +1958,13 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
Value *SrcVal = SI->getOperand(0);
const Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+ IRBuilder<> Builder(SI);
// Handle tail padding by extending the operand
if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
- SrcVal = new ZExtInst(SrcVal,
- IntegerType::get(SI->getContext(), AllocaSizeBits),
- "", SI);
+ SrcVal = Builder.CreateZExt(SrcVal,
+ IntegerType::get(SI->getContext(), AllocaSizeBits));
DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
<< '\n');
@@ -1500,47 +1973,44 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// have different ways to compute the element offset.
if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
const StructLayout *Layout = TD->getStructLayout(EltSTy);
-
+
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Get the number of bits to shift SrcVal to get the value.
const Type *FieldTy = EltSTy->getElementType(i);
uint64_t Shift = Layout->getElementOffsetInBits(i);
-
+
if (TD->isBigEndian())
Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
-
+
Value *EltVal = SrcVal;
if (Shift) {
Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
- EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
- "sroa.store.elt", SI);
+ EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
}
-
+
// Truncate down to an integer of the right size.
uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
-
+
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
-
+
if (FieldSizeBits != AllocaSizeBits)
- EltVal = new TruncInst(EltVal,
- IntegerType::get(SI->getContext(), FieldSizeBits),
- "", SI);
+ EltVal = Builder.CreateTrunc(EltVal,
+ IntegerType::get(SI->getContext(), FieldSizeBits));
Value *DestField = NewElts[i];
if (EltVal->getType() == FieldTy) {
// Storing to an integer field of this size, just do it.
} else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
- EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
+ EltVal = Builder.CreateBitCast(EltVal, FieldTy);
} else {
// Otherwise, bitcast the dest pointer (for aggregates).
- DestField = new BitCastInst(DestField,
- PointerType::getUnqual(EltVal->getType()),
- "", SI);
+ DestField = Builder.CreateBitCast(DestField,
+ PointerType::getUnqual(EltVal->getType()));
}
new StoreInst(EltVal, DestField, SI);
}
-
+
} else {
const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
const Type *ArrayEltTy = ATy->getElementType();
@@ -1548,50 +2018,48 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
uint64_t Shift;
-
+
if (TD->isBigEndian())
Shift = AllocaSizeBits-ElementOffset;
- else
+ else
Shift = 0;
-
+
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Ignore zero sized fields like {}, they obviously contain no data.
if (ElementSizeBits == 0) continue;
-
+
Value *EltVal = SrcVal;
if (Shift) {
Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
- EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
- "sroa.store.elt", SI);
+ EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
}
-
+
// Truncate down to an integer of the right size.
if (ElementSizeBits != AllocaSizeBits)
- EltVal = new TruncInst(EltVal,
- IntegerType::get(SI->getContext(),
- ElementSizeBits),"",SI);
+ EltVal = Builder.CreateTrunc(EltVal,
+ IntegerType::get(SI->getContext(),
+ ElementSizeBits));
Value *DestField = NewElts[i];
if (EltVal->getType() == ArrayEltTy) {
// Storing to an integer field of this size, just do it.
} else if (ArrayEltTy->isFloatingPointTy() ||
ArrayEltTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
- EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
+ EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy);
} else {
// Otherwise, bitcast the dest pointer (for aggregates).
- DestField = new BitCastInst(DestField,
- PointerType::getUnqual(EltVal->getType()),
- "", SI);
+ DestField = Builder.CreateBitCast(DestField,
+ PointerType::getUnqual(EltVal->getType()));
}
new StoreInst(EltVal, DestField, SI);
-
+
if (TD->isBigEndian())
Shift -= ElementOffset;
- else
+ else
Shift += ElementOffset;
}
}
-
+
DeadInsts.push_back(SI);
}
@@ -1603,10 +2071,10 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// and form the result value.
const Type *AllocaEltTy = AI->getAllocatedType();
uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
-
+
DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
<< '\n');
-
+
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
const StructLayout *Layout = 0;
@@ -1616,11 +2084,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
} else {
const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
- }
-
- Value *ResultVal =
+ }
+
+ Value *ResultVal =
Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
-
+
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Load the value from the alloca. If the NewElt is an aggregate, cast
// the pointer to an integer of the same size before doing the load.
@@ -1628,11 +2096,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
const Type *FieldTy =
cast<PointerType>(SrcField->getType())->getElementType();
uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
-
+
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
-
- const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
+
+ const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
FieldSizeBits);
if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
!FieldTy->isVectorTy())
@@ -1650,17 +2118,17 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// we can shift and insert it.
if (SrcField->getType() != ResultVal->getType())
SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
-
+
// Determine the number of bits to shift SrcField.
uint64_t Shift;
if (Layout) // Struct case.
Shift = Layout->getElementOffsetInBits(i);
else // Array case.
Shift = i*ArrayEltBitOffset;
-
+
if (TD->isBigEndian())
Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
-
+
if (Shift) {
Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
@@ -1683,46 +2151,39 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
}
/// HasPadding - Return true if the specified type has any structure or
-/// alignment padding, false otherwise.
+/// alignment padding in between the elements that would be split apart
+/// by SROA; return false otherwise.
static bool HasPadding(const Type *Ty, const TargetData &TD) {
- if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
- return HasPadding(ATy->getElementType(), TD);
-
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
- return HasPadding(VTy->getElementType(), TD);
-
- if (const StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = TD.getStructLayout(STy);
- unsigned PrevFieldBitOffset = 0;
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
-
- // Padding in sub-elements?
- if (HasPadding(STy->getElementType(i), TD))
- return true;
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Ty = ATy->getElementType();
+ return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+ }
- // Check to see if there is any padding between this element and the
- // previous one.
- if (i) {
- unsigned PrevFieldEnd =
+ // SROA currently handles only Arrays and Structs.
+ const StructType *STy = cast<StructType>(Ty);
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned PrevFieldBitOffset = 0;
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
+
+ // Check to see if there is any padding between this element and the
+ // previous one.
+ if (i) {
+ unsigned PrevFieldEnd =
PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
- if (PrevFieldEnd < FieldBitOffset)
- return true;
- }
-
- PrevFieldBitOffset = FieldBitOffset;
- }
-
- // Check for tail padding.
- if (unsigned EltCount = STy->getNumElements()) {
- unsigned PrevFieldEnd = PrevFieldBitOffset +
- TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
- if (PrevFieldEnd < SL->getSizeInBits())
+ if (PrevFieldEnd < FieldBitOffset)
return true;
}
+ PrevFieldBitOffset = FieldBitOffset;
}
-
- return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+ // Check for tail padding.
+ if (unsigned EltCount = STy->getNumElements()) {
+ unsigned PrevFieldEnd = PrevFieldBitOffset +
+ TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
+ if (PrevFieldEnd < SL->getSizeInBits())
+ return true;
+ }
+ return false;
}
/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
@@ -1731,14 +2192,14 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) {
bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
// Loop over the use list of the alloca. We can only transform it if all of
// the users are safe to transform.
- AllocaInfo Info;
-
- isSafeForScalarRepl(AI, AI, 0, Info);
+ AllocaInfo Info(AI);
+
+ isSafeForScalarRepl(AI, 0, Info);
if (Info.isUnsafe) {
DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
return false;
}
-
+
// Okay, we know all the users are promotable. If the aggregate is a memcpy
// source and destination, we have to be careful. In particular, the memcpy
// could be moving around elements that live in structure padding of the LLVM
@@ -1748,6 +2209,20 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
HasPadding(AI->getAllocatedType(), *TD))
return false;
+ // If the alloca never has an access to just *part* of it, but is accessed
+ // via loads and stores, then we should use ConvertToScalarInfo to promote
+ // the alloca instead of promoting each piece at a time and inserting fission
+ // and fusion code.
+ if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
+ // If the struct/array just has one element, use basic SRoA.
+ if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+ if (ST->getNumElements() > 1) return false;
+ } else {
+ if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
+ return false;
+ }
+ }
+
return true;
}
@@ -1760,7 +2235,7 @@ static bool PointsToConstantGlobal(Value *V) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return GV->isConstant();
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::BitCast ||
+ if (CE->getOpcode() == Instruction::BitCast ||
CE->getOpcode() == Instruction::GetElementPtr)
return PointsToConstantGlobal(CE->getOperand(0));
return false;
@@ -1771,18 +2246,19 @@ static bool PointsToConstantGlobal(Value *V) {
/// see any stores or other unknown uses. If we see pointer arithmetic, keep
/// track of whether it moves the pointer (with isOffset) but otherwise traverse
/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
-/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// the alloca, and if the source pointer is a pointer to a constant global, we
/// can optimize this.
static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
bool isOffset) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
User *U = cast<Instruction>(*UI);
- if (LoadInst *LI = dyn_cast<LoadInst>(U))
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
// Ignore non-volatile loads, they are always ok.
- if (!LI->isVolatile())
- continue;
-
+ if (LI->isVolatile()) return false;
+ continue;
+ }
+
if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
// If uses of the bitcast are ok, we are ok.
if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
@@ -1797,27 +2273,52 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
return false;
continue;
}
-
+
+ if (CallSite CS = U) {
+ // If this is a readonly/readnone call site, then we know it is just a
+ // load and we can ignore it.
+ if (CS.onlyReadsMemory())
+ continue;
+
+ // If this is the function being called then we treat it like a load and
+ // ignore it.
+ if (CS.isCallee(UI))
+ continue;
+
+ // If this is being passed as a byval argument, the caller is making a
+ // copy, so it is only a read of the alloca.
+ unsigned ArgNo = CS.getArgumentNo(UI);
+ if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal))
+ continue;
+ }
+
// If this is isn't our memcpy/memmove, reject it as something we can't
// handle.
MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
if (MI == 0)
return false;
+ // If the transfer is using the alloca as a source of the transfer, then
+ // ignore it since it is a load (unless the transfer is volatile).
+ if (UI.getOperandNo() == 1) {
+ if (MI->isVolatile()) return false;
+ continue;
+ }
+
// If we already have seen a copy, reject the second one.
if (TheCopy) return false;
-
+
// If the pointer has been offset from the start of the alloca, we can't
// safely handle this.
if (isOffset) return false;
// If the memintrinsic isn't using the alloca as the dest, reject it.
if (UI.getOperandNo() != 0) return false;
-
+
// If the source of the memcpy/move is not a constant global, reject it.
if (!PointsToConstantGlobal(MI->getSource()))
return false;
-
+
// Otherwise, the transform is safe. Remember the copy instruction.
TheCopy = MI;
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 360749c..ce5dd73 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -42,7 +42,9 @@ STATISTIC(NumSimpl, "Number of blocks simplified");
namespace {
struct CFGSimplifyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGSimplifyPass() : FunctionPass(ID) {}
+ CFGSimplifyPass() : FunctionPass(ID) {
+ initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
};
@@ -50,7 +52,7 @@ namespace {
char CFGSimplifyPass::ID = 0;
INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
- "Simplify the CFG", false, false);
+ "Simplify the CFG", false, false)
// Public interface to the CFGSimplification pass
FunctionPass *llvm::createCFGSimplificationPass() {
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index 3ec70ec..70ff32e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -32,7 +32,9 @@ namespace {
const TargetData *TD;
public:
static char ID; // Pass identification
- SimplifyHalfPowrLibCalls() : FunctionPass(ID) {}
+ SimplifyHalfPowrLibCalls() : FunctionPass(ID) {
+ initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F);
@@ -47,7 +49,7 @@ namespace {
} // end anonymous namespace.
INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
- "Simplify half_powr library calls", false, false);
+ "Simplify half_powr library calls", false, false)
// Public interface to the Simplify HalfPowr LibCalls pass.
FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
@@ -95,7 +97,8 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
InlineFunctionInfo IFI(0, TD);
bool B = InlineFunction(Call, IFI);
- assert(B && "half_powr didn't inline?"); B=B;
+ assert(B && "half_powr didn't inline?");
+ (void)B;
BasicBlock *NewBody = NewBlock->getSinglePredecessor();
assert(NewBody);
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index d7ce53f..ec45b71 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -123,7 +123,7 @@ struct StrCatOpt : public LibCallOptimization {
// Verify the "strcat" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
FT->getParamType(1) != FT->getReturnType())
return 0;
@@ -160,9 +160,8 @@ struct StrCatOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- EmitMemCpy(CpyDst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len+1),
- 1, false, B, TD);
+ B.CreateMemCpy(CpyDst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
}
};
@@ -174,7 +173,7 @@ struct StrNCatOpt : public StrCatOpt {
// Verify the "strncat" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
- FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
FT->getParamType(1) != FT->getReturnType() ||
!FT->getParamType(2)->isIntegerTy())
@@ -222,8 +221,9 @@ struct StrChrOpt : public LibCallOptimization {
// Verify the "strchr" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
- FT->getParamType(0) != FT->getReturnType())
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
return 0;
Value *SrcStr = CI->getArgOperand(0);
@@ -252,22 +252,55 @@ struct StrChrOpt : public LibCallOptimization {
// strchr can find the nul character.
Str += '\0';
- char CharValue = CharC->getSExtValue();
// Compute the offset.
- uint64_t i = 0;
- while (1) {
- if (i == Str.size()) // Didn't find the char. strchr returns null.
- return Constant::getNullValue(CI->getType());
- // Did we find our match?
- if (Str[i] == CharValue)
- break;
- ++i;
- }
+ size_t I = Str.find(CharC->getSExtValue());
+ if (I == std::string::npos) // Didn't find the char. strchr returns null.
+ return Constant::getNullValue(CI->getType());
// strchr(s+n,c) -> gep(s+n+i,c)
- Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i);
- return B.CreateGEP(SrcStr, Idx, "strchr");
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+ }
+};
+
+//===---------------------------------------===//
+// 'strrchr' Optimizations
+
+struct StrRChrOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strrchr" function prototype.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != B.getInt8PtrTy() ||
+ FT->getParamType(0) != FT->getReturnType() ||
+ !FT->getParamType(1)->isIntegerTy(32))
+ return 0;
+
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+ // Cannot fold anything if we're not looking for a constant.
+ if (!CharC)
+ return 0;
+
+ std::string Str;
+ if (!GetConstantStringInfo(SrcStr, Str)) {
+ // strrchr(s, 0) -> strchr(s, 0)
+ if (TD && CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, TD);
+ return 0;
+ }
+
+ // strrchr can find the nul character.
+ Str += '\0';
+
+ // Compute the offset.
+ size_t I = Str.rfind(CharC->getSExtValue());
+ if (I == std::string::npos) // Didn't find the char. Return null.
+ return Constant::getNullValue(CI->getType());
+
+ // strrchr(s+n,c) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
}
};
@@ -281,7 +314,7 @@ struct StrCmpOpt : public LibCallOptimization {
if (FT->getNumParams() != 2 ||
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context))
+ FT->getParamType(0) != B.getInt8PtrTy())
return 0;
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
@@ -329,7 +362,7 @@ struct StrNCmpOpt : public LibCallOptimization {
if (FT->getNumParams() != 3 ||
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getParamType(2)->isIntegerTy())
return 0;
@@ -384,7 +417,7 @@ struct StrCpyOpt : public LibCallOptimization {
if (FT->getNumParams() != NumParams ||
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context))
+ FT->getParamType(0) != B.getInt8PtrTy())
return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
@@ -405,9 +438,8 @@ struct StrCpyOpt : public LibCallOptimization {
ConstantInt::get(TD->getIntPtrType(*Context), Len),
CI->getArgOperand(2), B, TD);
else
- EmitMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- 1, false, B, TD);
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
return Dst;
}
};
@@ -420,7 +452,7 @@ struct StrNCpyOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getParamType(2)->isIntegerTy())
return 0;
@@ -435,8 +467,7 @@ struct StrNCpyOpt : public LibCallOptimization {
if (SrcLen == 0) {
// strncpy(x, "", y) -> memset(x, '\0', y, 1)
- EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'),
- LenOp, false, B, TD);
+ B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
return Dst;
}
@@ -455,9 +486,8 @@ struct StrNCpyOpt : public LibCallOptimization {
if (Len > SrcLen+1) return 0;
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
- EmitMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- 1, false, B, TD);
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
return Dst;
}
@@ -470,7 +500,7 @@ struct StrLenOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 ||
- FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getReturnType()->isIntegerTy())
return 0;
@@ -488,6 +518,45 @@ struct StrLenOpt : public LibCallOptimization {
}
};
+
+//===---------------------------------------===//
+// 'strpbrk' Optimizations
+
+struct StrPBrkOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ FT->getReturnType() != FT->getParamType(0))
+ return 0;
+
+ std::string S1, S2;
+ bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strpbrk(s, "") -> NULL
+ // strpbrk("", s) -> NULL
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2) {
+ size_t I = S1.find_first_of(S2);
+ if (I == std::string::npos) // No match.
+ return Constant::getNullValue(CI->getType());
+
+ return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+ }
+
+ // strpbrk(s, "a") -> strchr(s, 'a')
+ if (TD && HasS2 && S2.size() == 1)
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+
+ return 0;
+ }
+};
+
//===---------------------------------------===//
// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc.
@@ -501,7 +570,8 @@ struct StrToOpt : public LibCallOptimization {
Value *EndPtr = CI->getArgOperand(1);
if (isa<ConstantPointerNull>(EndPtr)) {
- CI->setOnlyReadsMemory();
+ // With a null EndPtr, this function won't capture the main argument.
+ // It would be readonly too, except that it still may write to errno.
CI->addAttribute(1, Attribute::NoCapture);
}
@@ -510,6 +580,67 @@ struct StrToOpt : public LibCallOptimization {
};
//===---------------------------------------===//
+// 'strspn' Optimizations
+
+struct StrSpnOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ std::string S1, S2;
+ bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strspn(s, "") -> 0
+ // strspn("", s) -> 0
+ if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2)
+ return ConstantInt::get(CI->getType(), strspn(S1.c_str(), S2.c_str()));
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
+// 'strcspn' Optimizations
+
+struct StrCSpnOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getParamType(0) != B.getInt8PtrTy() ||
+ FT->getParamType(1) != FT->getParamType(0) ||
+ !FT->getReturnType()->isIntegerTy())
+ return 0;
+
+ std::string S1, S2;
+ bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+ bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+ // strcspn("", s) -> 0
+ if (HasS1 && S1.empty())
+ return Constant::getNullValue(CI->getType());
+
+ // Constant folding.
+ if (HasS1 && HasS2)
+ return ConstantInt::get(CI->getType(), strcspn(S1.c_str(), S2.c_str()));
+
+ // strcspn(s, "") -> strlen(s)
+ if (TD && HasS2 && S2.empty())
+ return EmitStrLen(CI->getArgOperand(0), B, TD);
+
+ return 0;
+ }
+};
+
+//===---------------------------------------===//
// 'strstr' Optimizations
struct StrStrOpt : public LibCallOptimization {
@@ -637,8 +768,8 @@ struct MemCpyOpt : public LibCallOptimization {
return 0;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1, false, B, TD);
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
};
@@ -659,8 +790,8 @@ struct MemMoveOpt : public LibCallOptimization {
return 0;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
- EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1, false, B, TD);
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
};
@@ -681,9 +812,8 @@ struct MemSetOpt : public LibCallOptimization {
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
- Value *Val = B.CreateIntCast(CI->getArgOperand(1),
- Type::getInt8Ty(*Context), false);
- EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
};
@@ -765,12 +895,10 @@ struct Exp2Opt : public LibCallOptimization {
Value *LdExpArg = 0;
if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0),
- Type::getInt32Ty(*Context), "tmp");
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
} else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0),
- Type::getInt32Ty(*Context), "tmp");
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
}
if (LdExpArg) {
@@ -789,7 +917,7 @@ struct Exp2Opt : public LibCallOptimization {
Module *M = Caller->getParent();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
Op->getType(),
- Type::getInt32Ty(*Context),NULL);
+ B.getInt32Ty(), NULL);
CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -819,7 +947,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
Value *V = Cast->getOperand(0);
V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B,
Callee->getAttributes());
- return B.CreateFPExt(V, Type::getDoubleTy(*Context));
+ return B.CreateFPExt(V, B.getDoubleTy());
}
};
@@ -846,8 +974,8 @@ struct FFSOpt : public LibCallOptimization {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
if (CI->getValue() == 0) // ffs(0) -> 0.
return Constant::getNullValue(CI->getType());
- return ConstantInt::get(Type::getInt32Ty(*Context), // ffs(c) -> cttz(c)+1
- CI->getValue().countTrailingZeros()+1);
+ // ffs(c) -> cttz(c)+1
+ return B.getInt32(CI->getValue().countTrailingZeros() + 1);
}
// ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
@@ -856,11 +984,10 @@ struct FFSOpt : public LibCallOptimization {
Intrinsic::cttz, &ArgType, 1);
Value *V = B.CreateCall(F, Op, "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
- V = B.CreateIntCast(V, Type::getInt32Ty(*Context), false, "tmp");
+ V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
- return B.CreateSelect(Cond, V,
- ConstantInt::get(Type::getInt32Ty(*Context), 0));
+ return B.CreateSelect(Cond, V, B.getInt32(0));
}
};
@@ -877,10 +1004,8 @@ struct IsDigitOpt : public LibCallOptimization {
// isdigit(c) -> (c-'0') <u 10
Value *Op = CI->getArgOperand(0);
- Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'),
- "isdigittmp");
- Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10),
- "isdigit");
+ Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+ Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
return B.CreateZExt(Op, CI->getType());
}
};
@@ -898,8 +1023,7 @@ struct IsAsciiOpt : public LibCallOptimization {
// isascii(c) -> c <u 128
Value *Op = CI->getArgOperand(0);
- Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128),
- "isascii");
+ Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
return B.CreateZExt(Op, CI->getType());
}
};
@@ -917,8 +1041,7 @@ struct AbsOpt : public LibCallOptimization {
// abs(x) -> x >s -1 ? x : -x
Value *Op = CI->getArgOperand(0);
- Value *Pos = B.CreateICmpSGT(Op,
- Constant::getAllOnesValue(Op->getType()),
+ Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
"ispos");
Value *Neg = B.CreateNeg(Op, "neg");
return B.CreateSelect(Pos, Op, Neg);
@@ -969,11 +1092,15 @@ struct PrintFOpt : public LibCallOptimization {
return CI->use_empty() ? (Value*)CI :
ConstantInt::get(CI->getType(), 0);
- // printf("x") -> putchar('x'), even for '%'. Return the result of putchar
- // in case there is an error writing to stdout.
+ // Do not do any of the following transformations if the printf return value
+ // is used, in general the printf return value is not compatible with either
+ // putchar() or puts().
+ if (!CI->use_empty())
+ return 0;
+
+ // printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context),
- FormatStr[0]), B, TD);
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD);
if (CI->use_empty()) return CI;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1004,8 +1131,7 @@ struct PrintFOpt : public LibCallOptimization {
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
- CI->getArgOperand(1)->getType()->isPointerTy() &&
- CI->use_empty()) {
+ CI->getArgOperand(1)->getType()->isPointerTy()) {
EmitPutS(CI->getArgOperand(1), B, TD);
return CI;
}
@@ -1042,9 +1168,9 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the
- ConstantInt::get(TD->getIntPtrType(*Context), // nul byte.
- FormatStr.size() + 1), 1, false, B, TD);
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+ FormatStr.size() + 1), 1); // nul byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1058,13 +1184,11 @@ struct SPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- Value *V = B.CreateTrunc(CI->getArgOperand(2),
- Type::getInt8Ty(*Context), "char");
+ Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
B.CreateStore(V, Ptr);
- Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
- "nul");
- B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr);
+ Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+ B.CreateStore(B.getInt8(0), Ptr);
return ConstantInt::get(CI->getType(), 1);
}
@@ -1080,8 +1204,7 @@ struct SPrintFOpt : public LibCallOptimization {
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2),
- IncLen, 1, false, B, TD);
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -1208,6 +1331,34 @@ struct FPrintFOpt : public LibCallOptimization {
}
};
+//===---------------------------------------===//
+// 'puts' Optimizations
+
+struct PutsOpt : public LibCallOptimization {
+ virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Require one fixed pointer argument and an integer/void result.
+ const FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
+ FT->getReturnType()->isVoidTy()))
+ return 0;
+
+ // Check for a constant string.
+ std::string Str;
+ if (!GetConstantStringInfo(CI->getArgOperand(0), Str))
+ return 0;
+
+ if (Str.empty() && CI->use_empty()) {
+ // puts("") -> putchar('\n')
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, TD);
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
+ }
+
+ return 0;
+ }
+};
+
} // end anonymous namespace.
//===----------------------------------------------------------------------===//
@@ -1220,10 +1371,10 @@ namespace {
class SimplifyLibCalls : public FunctionPass {
StringMap<LibCallOptimization*> Optimizations;
// String and Memory LibCall Optimizations
- StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp;
- StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
- StrNCpyOpt StrNCpy; StrLenOpt StrLen;
- StrToOpt StrTo; StrStrOpt StrStr;
+ StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
+ StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
+ StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk;
+ StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
// Math Library Optimizations
PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
@@ -1233,11 +1384,14 @@ namespace {
// Formatting and IO Optimizations
SPrintFOpt SPrintF; PrintFOpt PrintF;
FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
+ PutsOpt Puts;
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {}
+ SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {
+ initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
+ }
void InitOptimizations();
bool runOnFunction(Function &F);
@@ -1255,7 +1409,7 @@ namespace {
} // end anonymous namespace.
INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls",
- "Simplify well-known library calls", false, false);
+ "Simplify well-known library calls", false, false)
// Public interface to the Simplify LibCalls pass.
FunctionPass *llvm::createSimplifyLibCallsPass() {
@@ -1269,11 +1423,13 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["strcat"] = &StrCat;
Optimizations["strncat"] = &StrNCat;
Optimizations["strchr"] = &StrChr;
+ Optimizations["strrchr"] = &StrRChr;
Optimizations["strcmp"] = &StrCmp;
Optimizations["strncmp"] = &StrNCmp;
Optimizations["strcpy"] = &StrCpy;
Optimizations["strncpy"] = &StrNCpy;
Optimizations["strlen"] = &StrLen;
+ Optimizations["strpbrk"] = &StrPBrk;
Optimizations["strtol"] = &StrTo;
Optimizations["strtod"] = &StrTo;
Optimizations["strtof"] = &StrTo;
@@ -1281,6 +1437,8 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["strtoll"] = &StrTo;
Optimizations["strtold"] = &StrTo;
Optimizations["strtoull"] = &StrTo;
+ Optimizations["strspn"] = &StrSpn;
+ Optimizations["strcspn"] = &StrCSpn;
Optimizations["strstr"] = &StrStr;
Optimizations["memcmp"] = &MemCmp;
Optimizations["memcpy"] = &MemCpy;
@@ -1341,6 +1499,7 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["fwrite"] = &FWrite;
Optimizations["fputs"] = &FPuts;
Optimizations["fprintf"] = &FPrintF;
+ Optimizations["puts"] = &Puts;
}
@@ -2155,9 +2314,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// * pow(sqrt(x),y) -> pow(x,y*0.5)
// * pow(pow(x,y),z)-> pow(x,y*z)
//
-// puts:
-// * puts("") -> putchar('\n')
-//
// round, roundf, roundl:
// * round(cnst) -> cnst'
//
@@ -2173,24 +2329,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// stpcpy:
// * stpcpy(str, "literal") ->
// llvm.memcpy(str,"literal",strlen("literal")+1,1)
-// strrchr:
-// * strrchr(s,c) -> reverse_offset_of_in(c,s)
-// (if c is a constant integer and s is a constant string)
-// * strrchr(s1,0) -> strchr(s1,0)
-//
-// strpbrk:
-// * strpbrk(s,a) -> offset_in_for(s,a)
-// (if s and a are both constant strings)
-// * strpbrk(s,"") -> 0
-// * strpbrk(s,a) -> strchr(s,a[0]) (if a is constant string of length 1)
-//
-// strspn, strcspn:
-// * strspn(s,a) -> const_int (if both args are constant)
-// * strspn("",a) -> 0
-// * strspn(s,"") -> 0
-// * strcspn(s,a) -> const_int (if both args are constant)
-// * strcspn("",a) -> 0
-// * strcspn(s,"") -> strlen(a)
//
// tan, tanf, tanl:
// * tan(atan(x)) -> x
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index 95d3ded..705f442 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -35,7 +35,9 @@ namespace {
public:
static char ID; // Pass identification
- Sinking() : FunctionPass(ID) {}
+ Sinking() : FunctionPass(ID) {
+ initializeSinkingPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -56,7 +58,11 @@ namespace {
} // end anonymous namespace
char Sinking::ID = 0;
-INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false);
+INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
@@ -150,11 +156,10 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
if (L->isVolatile()) return false;
- Value *Ptr = L->getPointerOperand();
- unsigned Size = AA->getTypeStoreSize(L->getType());
+ AliasAnalysis::Location Loc = AA->getLocation(L);
for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
E = Stores.end(); I != E; ++I)
- if (AA->getModRefInfo(*I, Ptr, Size) & AliasAnalysis::Mod)
+ if (AA->getModRefInfo(*I, Loc) & AliasAnalysis::Mod)
return false;
}
@@ -163,7 +168,10 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
return false;
}
- return Inst->isSafeToSpeculativelyExecute();
+ if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
+ return false;
+
+ return true;
}
/// SinkInstruction - Determine whether it is safe to sink the specified machine
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
index 2e437ac..9dd83c0 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailDuplication.cpp
@@ -26,14 +26,14 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Type.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/CFG.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <map>
using namespace llvm;
@@ -49,7 +49,9 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- TailDup() : FunctionPass(ID) {}
+ TailDup() : FunctionPass(ID) {
+ initializeTailDupPass(*PassRegistry::getPassRegistry());
+ }
private:
inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
@@ -59,7 +61,7 @@ namespace {
}
char TailDup::ID = 0;
-INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false);
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false)
// Public interface to the Tail Duplication pass
FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
@@ -360,8 +362,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
Instruction *Inst = BI++;
if (isInstructionTriviallyDead(Inst))
Inst->eraseFromParent();
- else if (Constant *C = ConstantFoldInstruction(Inst)) {
- Inst->replaceAllUsesWith(C);
+ else if (Value *V = SimplifyInstruction(Inst)) {
+ Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
}
}
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 3717254..5b6bc04 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -52,31 +52,52 @@
#define DEBUG_TYPE "tailcallelim"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
using namespace llvm;
STATISTIC(NumEliminated, "Number of tail calls removed");
+STATISTIC(NumRetDuped, "Number of return duplicated");
STATISTIC(NumAccumAdded, "Number of accumulators introduced");
namespace {
struct TailCallElim : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- TailCallElim() : FunctionPass(ID) {}
+ TailCallElim() : FunctionPass(ID) {
+ initializeTailCallElimPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
private:
+ CallInst *FindTRECandidate(Instruction *I,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
+ bool FoldReturnAndProcessPred(BasicBlock *BB,
+ ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail);
bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
SmallVector<PHINode*, 8> &ArgumentPHIs,
@@ -88,7 +109,7 @@ namespace {
char TailCallElim::ID = 0;
INITIALIZE_PASS(TailCallElim, "tailcallelim",
- "Tail Call Elimination", false, false);
+ "Tail Call Elimination", false, false)
// Public interface to the TailCallElimination pass
FunctionPass *llvm::createTailCallEliminationPass() {
@@ -133,7 +154,6 @@ bool TailCallElim::runOnFunction(Function &F) {
bool TailCallsAreMarkedTail = false;
SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
-
bool FunctionContainsEscapingAllocas = false;
// CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
@@ -160,10 +180,17 @@ bool TailCallElim::runOnFunction(Function &F) {
return false;
// Second pass, change any tail calls to loops.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator()))
- MadeChange |= ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs,CannotTCETailMarkedCall);
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+ Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+ TailCallsAreMarkedTail, ArgumentPHIs,
+ CannotTCETailMarkedCall);
+ MadeChange |= Change;
+ }
+ }
// If we eliminated any tail recursions, it's possible that we inserted some
// silly PHI nodes which just merge an initial value (the incoming operand)
@@ -175,7 +202,7 @@ bool TailCallElim::runOnFunction(Function &F) {
PHINode *PN = ArgumentPHIs[i];
// If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = PN->hasConstantValue()) {
+ if (Value *PNV = SimplifyInstruction(PN)) {
PN->replaceAllUsesWith(PNV);
PN->eraseFromParent();
}
@@ -322,41 +349,47 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI);
}
-bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
- bool &TailCallsAreMarkedTail,
- SmallVector<PHINode*, 8> &ArgumentPHIs,
- bool CannotTailCallElimCallsMarkedTail) {
- BasicBlock *BB = Ret->getParent();
+static Instruction *FirstNonDbg(BasicBlock::iterator I) {
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ return &*I;
+}
+
+CallInst*
+TailCallElim::FindTRECandidate(Instruction *TI,
+ bool CannotTailCallElimCallsMarkedTail) {
+ BasicBlock *BB = TI->getParent();
Function *F = BB->getParent();
- if (&BB->front() == Ret) // Make sure there is something before the ret...
- return false;
+ if (&BB->front() == TI) // Make sure there is something before the terminator.
+ return 0;
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
- CallInst *CI;
- BasicBlock::iterator BBI = Ret;
- while (1) {
+ CallInst *CI = 0;
+ BasicBlock::iterator BBI = TI;
+ while (true) {
CI = dyn_cast<CallInst>(BBI);
if (CI && CI->getCalledFunction() == F)
break;
if (BBI == BB->begin())
- return false; // Didn't find a potential tail call.
+ return 0; // Didn't find a potential tail call.
--BBI;
}
// If this call is marked as a tail call, and if there are dynamic allocas in
// the function, we cannot perform this optimization.
if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
- return false;
+ return 0;
// As a special case, detect code like this:
// double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
// and disable this xform in this case, because the code generator will
// lower the call to fabs into inline code.
if (BB == &F->getEntryBlock() &&
- &BB->front() == CI && &*++BB->begin() == Ret &&
+ FirstNonDbg(BB->front()) == CI &&
+ FirstNonDbg(llvm::next(BB->begin())) == TI &&
callIsSmall(F)) {
// A single-block function with just a call and a return. Check that
// the arguments match.
@@ -367,9 +400,17 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
for (; I != E && FI != FE; ++I, ++FI)
if (*I != &*FI) break;
if (I == E && FI == FE)
- return false;
+ return 0;
}
+ return CI;
+}
+
+bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+ BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
// If we are introducing accumulator recursion to eliminate operations after
// the call instruction that are both associative and commutative, the initial
// value for the accumulator is placed in this variable. If this value is set
@@ -387,7 +428,8 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
// tail call if all of the instructions between the call and the return are
// movable to above the call itself, leaving the call next to the return.
// Check that this is the case now.
- for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
+ BasicBlock::iterator BBI = CI;
+ for (++BBI; &*BBI != Ret; ++BBI) {
if (CanMoveAboveCall(BBI, CI)) continue;
// If we can't move the instruction above the call, it might be because it
@@ -424,6 +466,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
return false;
}
+ BasicBlock *BB = Ret->getParent();
+ Function *F = BB->getParent();
+
// OK! We can transform this tail call. If this is the first one found,
// create the new entry block, allowing us to branch back to the old entry.
if (OldEntry == 0) {
@@ -533,3 +578,53 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
++NumEliminated;
return true;
}
+
+bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
+ ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ bool Change = false;
+
+ // If the return block contains nothing but the return and PHI's,
+ // there might be an opportunity to duplicate the return in its
+ // predecessors and perform TRC there. Look for predecessors that end
+ // in unconditional branch and recursive call(s).
+ SmallVector<BranchInst*, 8> UncondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ TerminatorInst *PTI = Pred->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(BI);
+ }
+
+ while (!UncondBranchPreds.empty()) {
+ BranchInst *BI = UncondBranchPreds.pop_back_val();
+ BasicBlock *Pred = BI->getParent();
+ if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred),
+ OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+ CannotTailCallElimCallsMarkedTail);
+ ++NumRetDuped;
+ Change = true;
+ }
+ }
+
+ return Change;
+}
+
+bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+ bool &TailCallsAreMarkedTail,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
+ bool CannotTailCallElimCallsMarkedTail) {
+ CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
+ if (!CI)
+ return false;
+
+ return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs,
+ CannotTailCallElimCallsMarkedTail);
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
index 4d64c85..be7bed1 100644
--- a/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CallSite.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -379,27 +380,10 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
/// return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
const TargetLowering &TLI) {
- std::vector<InlineAsm::ConstraintInfo>
- Constraints = IA->ParseConstraints();
-
- unsigned ArgNo = 0; // The argument of the CallInst.
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- TargetLowering::AsmOperandInfo OpInfo(Constraints[i]);
-
- // Compute the value type for each operand.
- switch (OpInfo.Type) {
- case InlineAsm::isOutput:
- if (OpInfo.isIndirect)
- OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
- break;
- case InlineAsm::isInput:
- OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
- break;
- case InlineAsm::isClobber:
- // Nothing to do.
- break;
- }
-
+ TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(OpInfo, SDValue());
@@ -584,7 +568,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
MemoryInst, Result);
Matcher.IgnoreProfitability = true;
bool Success = Matcher.MatchAddr(Address, 0);
- Success = Success; assert(Success && "Couldn't select *anything*?");
+ (void)Success; assert(Success && "Couldn't select *anything*?");
// If the match didn't cover I, then it won't be shared by it.
if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
diff --git a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 093083a..acaea19 100644
--- a/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -19,8 +19,9 @@
#include "llvm/Constant.h"
#include "llvm/Type.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Scalar.h"
@@ -63,12 +64,27 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
/// any single-entry PHI nodes in it, fold them away. This handles the case
/// when all entries to the PHI nodes in a block are guaranteed equal, such as
/// when the block has exactly one predecessor.
-void llvm::FoldSingleEntryPHINodes(BasicBlock *BB) {
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
+ if (!isa<PHINode>(BB->begin())) return;
+
+ AliasAnalysis *AA = 0;
+ MemoryDependenceAnalysis *MemDep = 0;
+ if (P) {
+ AA = P->getAnalysisIfAvailable<AliasAnalysis>();
+ MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
+ }
+
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
if (PN->getIncomingValue(0) != PN)
PN->replaceAllUsesWith(PN->getIncomingValue(0));
else
PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+
+ if (MemDep)
+ MemDep->removeInstruction(PN); // Memdep updates AA itself.
+ else if (AA && isa<PointerType>(PN->getType()))
+ AA->deleteValue(PN);
+
PN->eraseFromParent();
}
}
@@ -110,7 +126,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
if (isa<InvokeInst>(PredBB->getTerminator())) return false;
succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
- BasicBlock* OnlySucc = BB;
+ BasicBlock *OnlySucc = BB;
for (; SI != SE; ++SI)
if (*SI != OnlySucc) {
OnlySucc = 0; // There are multiple distinct successors!
@@ -131,10 +147,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
}
// Begin by getting rid of unneeded PHIs.
- while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
- PN->replaceAllUsesWith(PN->getIncomingValue(0));
- BB->getInstList().pop_front(); // Delete the phi node...
- }
+ if (isa<PHINode>(BB->front()))
+ FoldSingleEntryPHINodes(BB, P);
// Delete the unconditional branch from the predecessor...
PredBB->getInstList().pop_back();
@@ -152,24 +166,27 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
// Finally, erase the old block and update dominator info.
if (P) {
- if (DominatorTree* DT = P->getAnalysisIfAvailable<DominatorTree>()) {
- DomTreeNode* DTN = DT->getNode(BB);
- DomTreeNode* PredDTN = DT->getNode(PredBB);
-
- if (DTN) {
- SmallPtrSet<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
- for (SmallPtrSet<DomTreeNode*, 8>::iterator DI = Children.begin(),
+ if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ if (DomTreeNode *DTN = DT->getNode(BB)) {
+ DomTreeNode *PredDTN = DT->getNode(PredBB);
+ SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
+ for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(),
DE = Children.end(); DI != DE; ++DI)
DT->changeImmediateDominator(*DI, PredDTN);
DT->eraseNode(BB);
}
+
+ if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+ LI->removeBlock(BB);
+
+ if (MemoryDependenceAnalysis *MD =
+ P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
+ MD->invalidateCachedPredecessors();
}
}
BB->eraseFromParent();
-
-
return true;
}
@@ -218,52 +235,6 @@ void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
}
-/// RemoveSuccessor - Change the specified terminator instruction such that its
-/// successor SuccNum no longer exists. Because this reduces the outgoing
-/// degree of the current basic block, the actual terminator instruction itself
-/// may have to be changed. In the case where the last successor of the block
-/// is deleted, a return instruction is inserted in its place which can cause a
-/// surprising change in program behavior if it is not expected.
-///
-void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
- assert(SuccNum < TI->getNumSuccessors() &&
- "Trying to remove a nonexistant successor!");
-
- // If our old successor block contains any PHI nodes, remove the entry in the
- // PHI nodes that comes from this branch...
- //
- BasicBlock *BB = TI->getParent();
- TI->getSuccessor(SuccNum)->removePredecessor(BB);
-
- TerminatorInst *NewTI = 0;
- switch (TI->getOpcode()) {
- case Instruction::Br:
- // If this is a conditional branch... convert to unconditional branch.
- if (TI->getNumSuccessors() == 2) {
- cast<BranchInst>(TI)->setUnconditionalDest(TI->getSuccessor(1-SuccNum));
- } else { // Otherwise convert to a return instruction...
- Value *RetVal = 0;
-
- // Create a value to return... if the function doesn't return null...
- if (!BB->getParent()->getReturnType()->isVoidTy())
- RetVal = Constant::getNullValue(BB->getParent()->getReturnType());
-
- // Create the return...
- NewTI = ReturnInst::Create(TI->getContext(), RetVal);
- }
- break;
-
- case Instruction::Invoke: // Should convert to call
- case Instruction::Switch: // Should remove entry
- default:
- case Instruction::Ret: // Cannot happen, has no successors!
- llvm_unreachable("Unhandled terminator inst type in RemoveSuccessor!");
- }
-
- if (NewTI) // If it's a different instruction, replace.
- ReplaceInstWithInst(TI, NewTI);
-}
-
/// GetSuccessorNumber - Search for the specified successor of basic block BB
/// and return its position in the terminator instruction's list of
/// successors. It is an error to call this with a block that is not a
@@ -300,13 +271,13 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
assert(SP == BB && "CFG broken");
SP = NULL;
return SplitBlock(Succ, Succ->begin(), P);
- } else {
- // Otherwise, if BB has a single successor, split it at the bottom of the
- // block.
- assert(BB->getTerminator()->getNumSuccessors() == 1 &&
- "Should have a single succ!");
- return SplitBlock(BB, BB->getTerminator(), P);
}
+
+ // Otherwise, if BB has a single successor, split it at the bottom of the
+ // block.
+ assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+ "Should have a single succ!");
+ return SplitBlock(BB, BB->getTerminator(), P);
}
/// SplitBlock - Split the specified block at the specified instruction - every
@@ -322,12 +293,12 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
// The new block lives in whichever loop the old one did. This preserves
// LCSSA as well, because we force the split point to be after any PHI nodes.
- if (LoopInfo* LI = P->getAnalysisIfAvailable<LoopInfo>())
+ if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
if (Loop *L = LI->getLoopFor(Old))
L->addBasicBlockToLoop(New, LI->getBase());
if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
- // Old dominates New. New node domiantes all other nodes dominated by Old.
+ // Old dominates New. New node dominates all other nodes dominated by Old.
DomTreeNode *OldNode = DT->getNode(Old);
std::vector<DomTreeNode *> Children;
for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
@@ -340,9 +311,6 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
DT->changeImmediateDominator(*I, NewNode);
}
- if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>())
- DF->splitBlock(Old);
-
return New;
}
@@ -354,10 +322,9 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
/// suffix of 'Suffix'.
///
/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
-/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
-/// In particular, it does not preserve LoopSimplify (because it's
-/// complicated to handle the case where one of the edges being split
-/// is an exit of a loop with other exits).
+/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
+/// preserve LoopSimplify (because it's complicated to handle the case where one
+/// of the edges being split is an exit of a loop with other exits).
///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
BasicBlock *const *Preds,
@@ -407,13 +374,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
}
}
- // Update dominator tree and dominator frontier if available.
+ // Update dominator tree if available.
DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
if (DT)
DT->splitBlock(NewBB);
- if (DominanceFrontier *DF =
- P ? P->getAnalysisIfAvailable<DominanceFrontier>() : 0)
- DF->splitBlock(NewBB);
// Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
// node becomes an incoming value for BB's phi node. However, if the Preds
@@ -545,7 +509,32 @@ void llvm::FindFunctionBackedges(const Function &F,
// Go up one level.
InStack.erase(VisitStack.pop_back_val().first);
}
- } while (!VisitStack.empty());
-
-
+ } while (!VisitStack.empty());
+}
+
+/// FoldReturnIntoUncondBranch - This method duplicates the specified return
+/// instruction into a predecessor which ends in an unconditional branch. If
+/// the return instruction returns a value defined by a PHI, propagate the
+/// right value into the return. It returns the new return instruction in the
+/// predecessor.
+ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+ BasicBlock *Pred) {
+ Instruction *UncondBranch = Pred->getTerminator();
+ // Clone the return and add it to the end of the predecessor.
+ Instruction *NewRet = RI->clone();
+ Pred->getInstList().push_back(NewRet);
+
+ // If the return instruction returns a value, and if the value was a
+ // PHI node in "BB", propagate the right value into the return.
+ for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
+ i != e; ++i)
+ if (PHINode *PN = dyn_cast<PHINode>(*i))
+ if (PN->getParent() == BB)
+ *i = PN->getIncomingValueForBlock(Pred);
+
+ // Update any PHI nodes in the returning block to realize that we no
+ // longer branch to them.
+ BB->removePredecessor(Pred);
+ UncondBranch->eraseFromParent();
+ return cast<ReturnInst>(NewRet);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index f75ffe6..616b066 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -11,8 +11,7 @@
// inserting a dummy basic block. This pass may be "required" by passes that
// cannot deal with critical edges. For this usage, the structure type is
// forward declared. This pass obviously invalidates the CFG, but can update
-// forward dominator (set, immediate dominators, tree, and frontier)
-// information.
+// dominator trees.
//
//===----------------------------------------------------------------------===//
@@ -36,13 +35,14 @@ STATISTIC(NumBroken, "Number of blocks inserted");
namespace {
struct BreakCriticalEdges : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- BreakCriticalEdges() : FunctionPass(ID) {}
+ BreakCriticalEdges() : FunctionPass(ID) {
+ initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
AU.addPreserved<LoopInfo>();
AU.addPreserved<ProfileInfo>();
@@ -54,7 +54,7 @@ namespace {
char BreakCriticalEdges::ID = 0;
INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
- "Break critical edges in CFG", false, false);
+ "Break critical edges in CFG", false, false)
// Publically exposed interface to pass...
char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
@@ -150,10 +150,9 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
}
/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
-/// split the critical edge. This will update DominatorTree and
-/// DominatorFrontier information if it is available, thus calling this pass
-/// will not invalidate either of them. This returns the new block if the edge
-/// was split, null otherwise.
+/// split the critical edge. This will update DominatorTree information if it
+/// is available, thus calling this pass will not invalidate either of them.
+/// This returns the new block if the edge was split, null otherwise.
///
/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
/// specified successor will be merged into the same critical edge block.
@@ -255,12 +254,11 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (P == 0) return NewBB;
DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
- DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>();
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
// If we have nothing to update, just return.
- if (DT == 0 && DF == 0 && LI == 0 && PI == 0)
+ if (DT == 0 && LI == 0 && PI == 0)
return NewBB;
// Now update analysis information. Since the only predecessor of NewBB is
@@ -281,7 +279,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
I != E; ++I) {
BasicBlock *P = *I;
if (P != NewBB)
- OtherPreds.push_back(P);
+ OtherPreds.push_back(P);
}
}
@@ -318,40 +316,6 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
}
- // Should we update DominanceFrontier information?
- if (DF) {
- // If NewBBDominatesDestBB hasn't been computed yet, do so with DF.
- if (!OtherPreds.empty()) {
- // FIXME: IMPLEMENT THIS!
- llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset."
- " not implemented yet!");
- }
-
- // Since the new block is dominated by its only predecessor TIBB,
- // it cannot be in any block's dominance frontier. If NewBB dominates
- // DestBB, its dominance frontier is the same as DestBB's, otherwise it is
- // just {DestBB}.
- DominanceFrontier::DomSetType NewDFSet;
- if (NewBBDominatesDestBB) {
- DominanceFrontier::iterator I = DF->find(DestBB);
- if (I != DF->end()) {
- DF->addBasicBlock(NewBB, I->second);
-
- if (I->second.count(DestBB)) {
- // However NewBB's frontier does not include DestBB.
- DominanceFrontier::iterator NF = DF->find(NewBB);
- DF->removeFromFrontier(NF, DestBB);
- }
- }
- else
- DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType());
- } else {
- DominanceFrontier::DomSetType NewDFSet;
- NewDFSet.insert(DestBB);
- DF->addBasicBlock(NewBB, NewDFSet);
- }
- }
-
// Update LoopInfo if it is around.
if (LI) {
if (Loop *TIL = LI->getLoopFor(TIBB)) {
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index c313949..4a90751 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -131,21 +131,6 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
return CI;
}
-
-/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always
-/// expects that Len has type 'intptr_t' and Dst/Src are pointers.
-Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align,
- bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- Dst = CastToCStr(Dst, B);
- Src = CastToCStr(Src, B);
- const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() };
- Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3);
- return B.CreateCall5(MemCpy, Dst, Src, Len,
- ConstantInt::get(B.getInt32Ty(), Align),
- ConstantInt::get(B.getInt1Ty(), isVolatile));
-}
-
/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
/// are pointers.
@@ -170,22 +155,6 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
return CI;
}
-/// EmitMemMove - Emit a call to the memmove function to the builder. This
-/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align,
- bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- const Type *ArgTys[3] = { Dst->getType(), Src->getType(),
- TD->getIntPtrType(Context) };
- Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, ArgTys, 3);
- Dst = CastToCStr(Dst, B);
- Src = CastToCStr(Src, B);
- Value *A = ConstantInt::get(B.getInt32Ty(), Align);
- Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile);
- return B.CreateCall5(MemMove, Dst, Src, Len, A, Vol);
-}
-
/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
@@ -233,18 +202,6 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
return CI;
}
-/// EmitMemSet - Emit a call to the memset function
-Value *llvm::EmitMemSet(Value *Dst, Value *Val, Value *Len, bool isVolatile,
- IRBuilder<> &B, const TargetData *TD) {
- Module *M = B.GetInsertBlock()->getParent()->getParent();
- Intrinsic::ID IID = Intrinsic::memset;
- const Type *Tys[2] = { Dst->getType(), Len->getType() };
- Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 2);
- Value *Align = ConstantInt::get(B.getInt32Ty(), 1);
- Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile);
- return B.CreateCall5(MemSet, CastToCStr(Dst, B), Val, Len, Align, Vol);
-}
-
/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
/// 'floor'). This function is known to take a single of type matching 'Op' and
/// returns one value with the same type. If 'Op' is a long double, 'l' is
@@ -422,8 +379,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
return false;
if (isFoldable(3, 2, false)) {
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1, false, B, TD);
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
replaceCall(CI->getArgOperand(0));
return true;
}
@@ -445,8 +402,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
return false;
if (isFoldable(3, 2, false)) {
- EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), 1, false, B, TD);
+ B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1);
replaceCall(CI->getArgOperand(0));
return true;
}
@@ -465,8 +422,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
if (isFoldable(3, 2, false)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
false);
- EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2),
- false, B, TD);
+ B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
replaceCall(CI->getArgOperand(0));
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index f43186e..d967ceb 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -112,8 +112,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
const BasicBlock &BB = *BI;
// Create a new basic block and copy instructions into it!
- BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc,
- CodeInfo);
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
VMap[&BB] = CBB; // Add basic block mapping.
if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
@@ -122,12 +121,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Loop over all of the instructions in the function, fixing up operand
// references as we go. This uses VMap to do all the hard work.
- //
for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
BE = NewFunc->end(); BB != BE; ++BB)
// Loop over all instructions, fixing each one as we find it...
for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
- RemapInstruction(II, VMap, ModuleLevelChanges);
+ RemapInstruction(II, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
}
/// CloneFunction - Return a copy of the specified function, but without
@@ -138,8 +137,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
/// updated to include mappings from all of the instructions and basicblocks in
/// the function from their old to new values.
///
-Function *llvm::CloneFunction(const Function *F,
- ValueToValueMapTy &VMap,
+Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
ClonedCodeInfo *CodeInfo) {
std::vector<const Type*> ArgTypes;
@@ -216,7 +214,7 @@ namespace {
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
std::vector<const BasicBlock*> &ToClone){
- Value *&BBEntry = VMap[BB];
+ TrackingVH<Value> &BBEntry = VMap[BB];
// Have we already cloned this block?
if (BBEntry) return;
@@ -262,8 +260,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// If the condition was a known constant in the callee...
ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
// Or is a known constant in the caller...
- if (Cond == 0)
- Cond = dyn_cast_or_null<ConstantInt>(VMap[BI->getCondition()]);
+ if (Cond == 0) {
+ Value *V = VMap[BI->getCondition()];
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
// Constant fold to uncond branch!
if (Cond) {
@@ -276,8 +276,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
} else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
// If switching on a value known constant in the caller.
ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
- if (Cond == 0) // Or known constant after constant prop in the callee...
- Cond = dyn_cast_or_null<ConstantInt>(VMap[SI->getCondition()]);
+ if (Cond == 0) { // Or known constant after constant prop in the callee...
+ Value *V = VMap[SI->getCondition()];
+ Cond = dyn_cast_or_null<ConstantInt>(V);
+ }
if (Cond) { // Constant fold to uncond branch!
BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
VMap[OldTI] = BranchInst::Create(Dest, NewBB);
@@ -318,7 +320,8 @@ ConstantFoldMappedInstruction(const Instruction *I) {
SmallVector<Constant*, 8> Ops;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
- VMap, ModuleLevelChanges)))
+ VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges)))
Ops.push_back(Op);
else
return 0; // All operands not constant!
@@ -394,7 +397,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
SmallVector<const PHINode*, 16> PHIToResolve;
for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
BI != BE; ++BI) {
- BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]);
+ Value *V = VMap[BI];
+ BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
if (NewBB == 0) continue; // Dead block.
// Add the new block to the new function.
@@ -455,7 +459,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
I->setDebugLoc(DebugLoc());
}
}
- RemapInstruction(I, VMap, ModuleLevelChanges);
+ RemapInstruction(I, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
}
}
@@ -474,10 +479,11 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
OPN = PHIToResolve[phino];
PHINode *PN = cast<PHINode>(VMap[OPN]);
for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
- if (BasicBlock *MappedBlock =
- cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) {
+ Value *V = VMap[PN->getIncomingBlock(pred)];
+ if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
Value *InVal = MapValue(PN->getIncomingValue(pred),
- VMap, ModuleLevelChanges);
+ VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
assert(InVal && "Unknown input value?");
PN->setIncomingValue(pred, InVal);
PN->setIncomingBlock(pred, MappedBlock);
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp b/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp
index 551b630..87dd141 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneLoop.cpp
@@ -19,15 +19,14 @@
using namespace llvm;
-/// CloneDominatorInfo - Clone basicblock's dominator tree and, if available,
-/// dominance info. It is expected that basic block is already cloned.
+/// CloneDominatorInfo - Clone a basic block's dominator tree. It is expected
+/// that the basic block is already cloned.
static void CloneDominatorInfo(BasicBlock *BB,
- ValueMap<const Value *, Value *> &VMap,
- DominatorTree *DT,
- DominanceFrontier *DF) {
+ ValueToValueMapTy &VMap,
+ DominatorTree *DT) {
assert (DT && "DominatorTree is not available");
- ValueMap<const Value *, Value*>::iterator BI = VMap.find(BB);
+ ValueToValueMapTy::iterator BI = VMap.find(BB);
assert (BI != VMap.end() && "BasicBlock clone is missing");
BasicBlock *NewBB = cast<BasicBlock>(BI->second);
@@ -42,45 +41,23 @@ static void CloneDominatorInfo(BasicBlock *BB,
// NewBB's dominator is either BB's dominator or BB's dominator's clone.
BasicBlock *NewBBDom = BBDom;
- ValueMap<const Value *, Value*>::iterator BBDomI = VMap.find(BBDom);
+ ValueToValueMapTy::iterator BBDomI = VMap.find(BBDom);
if (BBDomI != VMap.end()) {
NewBBDom = cast<BasicBlock>(BBDomI->second);
if (!DT->getNode(NewBBDom))
- CloneDominatorInfo(BBDom, VMap, DT, DF);
+ CloneDominatorInfo(BBDom, VMap, DT);
}
DT->addNewBlock(NewBB, NewBBDom);
-
- // Copy cloned dominance frontiner set
- if (DF) {
- DominanceFrontier::DomSetType NewDFSet;
- DominanceFrontier::iterator DFI = DF->find(BB);
- if ( DFI != DF->end()) {
- DominanceFrontier::DomSetType S = DFI->second;
- for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end();
- I != E; ++I) {
- BasicBlock *DB = *I;
- ValueMap<const Value*, Value*>::iterator IDM = VMap.find(DB);
- if (IDM != VMap.end())
- NewDFSet.insert(cast<BasicBlock>(IDM->second));
- else
- NewDFSet.insert(DB);
- }
- }
- DF->addBasicBlock(NewBB, NewDFSet);
- }
}
/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
/// using old blocks to new blocks mapping.
Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
- ValueMap<const Value *, Value *> &VMap, Pass *P) {
+ ValueToValueMapTy &VMap, Pass *P) {
DominatorTree *DT = NULL;
- DominanceFrontier *DF = NULL;
- if (P) {
+ if (P)
DT = P->getAnalysisIfAvailable<DominatorTree>();
- DF = P->getAnalysisIfAvailable<DominanceFrontier>();
- }
SmallVector<BasicBlock *, 16> NewBlocks;
@@ -116,7 +93,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I) {
BasicBlock *BB = *I;
- CloneDominatorInfo(BB, VMap, DT, DF);
+ CloneDominatorInfo(BB, VMap, DT);
}
// Process sub loops
@@ -134,7 +111,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
for (unsigned index = 0, num_ops = Insn->getNumOperands();
index != num_ops; ++index) {
Value *Op = Insn->getOperand(index);
- ValueMap<const Value *, Value *>::iterator OpItr = VMap.find(Op);
+ ValueToValueMapTy::iterator OpItr = VMap.find(Op);
if (OpItr != VMap.end())
Insn->setOperand(index, OpItr->second);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
index b347bf5..1046c38 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -89,8 +89,7 @@ Module *llvm::CloneModule(const Module *M,
GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
if (I->hasInitializer())
GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
- VMap,
- true)));
+ VMap, RF_None)));
GV->setLinkage(I->getLinkage());
GV->setThreadLocal(I->isThreadLocal());
GV->setConstant(I->isConstant());
@@ -121,7 +120,7 @@ Module *llvm::CloneModule(const Module *M,
GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
GA->setLinkage(I->getLinkage());
if (const Constant* C = I->getAliasee())
- GA->setAliasee(cast<Constant>(MapValue(C, VMap, true)));
+ GA->setAliasee(cast<Constant>(MapValue(C, VMap, RF_None)));
}
// And named metadata....
@@ -130,7 +129,8 @@ Module *llvm::CloneModule(const Module *M,
const NamedMDNode &NMD = *I;
NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
- NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, true)));
+ NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap,
+ RF_None)));
}
return New;
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index b51f751..e633772 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -186,8 +186,8 @@ void CodeExtractor::splitReturnBlocks() {
if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
if (DT) {
- // Old dominates New. New node domiantes all other nodes dominated
- //by Old.
+ // Old dominates New. New node dominates all other nodes dominated
+ // by Old.
DomTreeNode *OldNode = DT->getNode(*I);
SmallVector<DomTreeNode*, 8> Children;
for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
diff --git a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 8e82a02..8cc2649 100644
--- a/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -129,7 +129,7 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
assert(II->getParent() != P->getIncomingBlock(i) &&
- "Invoke edge not supported yet"); II=II;
+ "Invoke edge not supported yet"); (void)II;
}
new StoreInst(P->getIncomingValue(i), Slot,
P->getIncomingBlock(i)->getTerminator());
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 88979e86..c1faf24 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -22,7 +22,9 @@
#include "llvm/Attributes.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CallSite.h"
@@ -170,7 +172,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
/// some edges of the callgraph may remain.
static void UpdateCallGraphAfterInlining(CallSite CS,
Function::iterator FirstNewBlock,
- ValueMap<const Value*, Value*> &VMap,
+ ValueToValueMapTy &VMap,
InlineFunctionInfo &IFI) {
CallGraph &CG = *IFI.CG;
const Function *Caller = CS.getInstruction()->getParent()->getParent();
@@ -193,7 +195,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
for (; I != E; ++I) {
const Value *OrigCall = I->first;
- ValueMap<const Value*, Value*>::iterator VMI = VMap.find(OrigCall);
+ ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
// Only copy the edge if the call was inlined!
if (VMI == VMap.end() || VMI->second == 0)
continue;
@@ -228,6 +230,90 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
CallerNode->removeCallEdgeFor(CS);
}
+/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
+static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+ const Function *CalledFunc,
+ InlineFunctionInfo &IFI,
+ unsigned ByValAlignment) {
+ const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+
+ // If the called function is readonly, then it could not mutate the caller's
+ // copy of the byval'd memory. In this case, it is safe to elide the copy and
+ // temporary.
+ if (CalledFunc->onlyReadsMemory()) {
+ // If the byval argument has a specified alignment that is greater than the
+ // passed in pointer, then we either have to round up the input pointer or
+ // give up on this transformation.
+ if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
+ return Arg;
+
+ // If the pointer is already known to be sufficiently aligned, or if we can
+ // round it up to a larger alignment, then we don't need a temporary.
+ if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
+ IFI.TD) >= ByValAlignment)
+ return Arg;
+
+ // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
+ // for code quality, but rarely happens and is required for correctness.
+ }
+
+ LLVMContext &Context = Arg->getContext();
+
+ const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+
+ // Create the alloca. If we have TargetData, use nice alignment.
+ unsigned Align = 1;
+ if (IFI.TD)
+ Align = IFI.TD->getPrefTypeAlignment(AggTy);
+
+ // If the byval had an alignment specified, we *must* use at least that
+ // alignment, as it is required by the byval argument (and uses of the
+ // pointer inside the callee).
+ Align = std::max(Align, ByValAlignment);
+
+ Function *Caller = TheCall->getParent()->getParent();
+
+ Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
+ &*Caller->begin()->begin());
+ // Emit a memcpy.
+ const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+ Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
+ Intrinsic::memcpy,
+ Tys, 3);
+ Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
+ Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
+
+ Value *Size;
+ if (IFI.TD == 0)
+ Size = ConstantExpr::getSizeOf(AggTy);
+ else
+ Size = ConstantInt::get(Type::getInt64Ty(Context),
+ IFI.TD->getTypeStoreSize(AggTy));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Value *CallArgs[] = {
+ DestCast, SrcCast, Size,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ ConstantInt::getFalse(Context) // isVolatile
+ };
+ CallInst *TheMemCpy =
+ CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
+
+ // If we have a call graph, update it.
+ if (CallGraph *CG = IFI.CG) {
+ CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
+ CallGraphNode *CallerNode = (*CG)[Caller];
+ CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
+ }
+
+ // Uses of the argument in the function should use our new alloca
+ // instead.
+ return NewAlloca;
+}
+
// InlineFunction - This function inlines the called function into the basic
// block of the caller. This returns false if it is not possible to inline this
// call. The program is still in a well defined state if this occurs though.
@@ -251,7 +337,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
CalledFunc->isDeclaration() || // call, or call to a vararg function!
CalledFunc->getFunctionType()->isVarArg()) return false;
-
// If the call to the callee is not a tail call, we must clear the 'tail'
// flags on any calls that we inline.
bool MustClearTailCallFlags =
@@ -287,7 +372,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
Function::iterator FirstNewBlock;
{ // Scope to destroy VMap after cloning.
- ValueMap<const Value*, Value*> VMap;
+ ValueToValueMapTy VMap;
assert(CalledFunc->arg_size() == CS.arg_size() &&
"No varargs calls can be inlined!");
@@ -304,58 +389,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// by them explicit. However, we don't do this if the callee is readonly
// or readnone, because the copy would be unneeded: the callee doesn't
// modify the struct.
- if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&
- !CalledFunc->onlyReadsMemory()) {
- const Type *AggTy = cast<PointerType>(I->getType())->getElementType();
- const Type *VoidPtrTy =
- Type::getInt8PtrTy(Context);
-
- // Create the alloca. If we have TargetData, use nice alignment.
- unsigned Align = 1;
- if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy);
- Value *NewAlloca = new AllocaInst(AggTy, 0, Align,
- I->getName(),
- &*Caller->begin()->begin());
- // Emit a memcpy.
- const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
- Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
- Intrinsic::memcpy,
- Tys, 3);
- Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
- Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall);
-
- Value *Size;
- if (IFI.TD == 0)
- Size = ConstantExpr::getSizeOf(AggTy);
- else
- Size = ConstantInt::get(Type::getInt64Ty(Context),
- IFI.TD->getTypeStoreSize(AggTy));
-
- // Always generate a memcpy of alignment 1 here because we don't know
- // the alignment of the src pointer. Other optimizations can infer
- // better alignment.
- Value *CallArgs[] = {
- DestCast, SrcCast, Size,
- ConstantInt::get(Type::getInt32Ty(Context), 1),
- ConstantInt::get(Type::getInt1Ty(Context), 0)
- };
- CallInst *TheMemCpy =
- CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
-
- // If we have a call graph, update it.
- if (CallGraph *CG = IFI.CG) {
- CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
- CallGraphNode *CallerNode = (*CG)[Caller];
- CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
- }
-
- // Uses of the argument in the function should use our new alloca
- // instead.
- ActualArg = NewAlloca;
-
+ if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) {
+ ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+ CalledFunc->getParamAlignment(ArgNo+1));
+
// Calls that we inline may use the new alloca, so we need to clear
- // their 'tail' flags.
- MustClearTailCallFlags = true;
+ // their 'tail' flags if HandleByValArgument introduced a new alloca and
+ // the callee has calls.
+ MustClearTailCallFlags |= ActualArg != *AI;
}
VMap[I] = ActualArg;
@@ -399,8 +440,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
if (!isa<Constant>(AI->getArraySize()))
continue;
- // Keep track of the static allocas that we inline into the caller if the
- // StaticAllocas pointer is non-null.
+ // Keep track of the static allocas that we inline into the caller.
IFI.StaticAllocas.push_back(AI);
// Scan for the block of allocas that we can move over, and move them
@@ -579,10 +619,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// any users of the original call/invoke instruction.
const Type *RTy = CalledFunc->getReturnType();
+ PHINode *PHI = 0;
if (Returns.size() > 1) {
// The PHI node should go at the front of the new basic block to merge all
// possible incoming values.
- PHINode *PHI = 0;
if (!TheCall->use_empty()) {
PHI = PHINode::Create(RTy, TheCall->getName(),
AfterCallBB->begin());
@@ -600,14 +640,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
"Ret value not consistent in function!");
PHI->addIncoming(RI->getReturnValue(), RI->getParent());
}
-
- // Now that we inserted the PHI, check to see if it has a single value
- // (e.g. all the entries are the same or undef). If so, remove the PHI so
- // it doesn't block other optimizations.
- if (Value *V = PHI->hasConstantValue()) {
- PHI->replaceAllUsesWith(V);
- PHI->eraseFromParent();
- }
}
@@ -664,5 +696,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// Now we can remove the CalleeEntry block, which is now empty.
Caller->getBasicBlockList().erase(CalleeEntry);
+ // If we inserted a phi node, check to see if it has a single value (e.g. all
+ // the entries are the same or undef). If so, remove the PHI so it doesn't
+ // block other optimizations.
+ if (PHI)
+ if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+ PHI->replaceAllUsesWith(V);
+ PHI->eraseFromParent();
+ }
+
return true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index 5ca8299..45c15de 100644
--- a/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -23,7 +23,9 @@ using namespace llvm;
namespace {
struct InstNamer : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- InstNamer() : FunctionPass(ID) {}
+ InstNamer() : FunctionPass(ID) {
+ initializeInstNamerPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &Info) const {
Info.setPreservesAll();
@@ -48,11 +50,10 @@ namespace {
};
char InstNamer::ID = 0;
- INITIALIZE_PASS(InstNamer, "instnamer",
- "Assign names to anonymous instructions", false, false);
}
-
+INITIALIZE_PASS(InstNamer, "instnamer",
+ "Assign names to anonymous instructions", false, false)
char &llvm::InstructionNamerID = InstNamer::ID;
//===----------------------------------------------------------------------===//
//
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 275b265..b2e5fa6 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -47,7 +47,9 @@ STATISTIC(NumLCSSA, "Number of live out of a loop variables");
namespace {
struct LCSSA : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LCSSA() : LoopPass(ID) {}
+ LCSSA() : LoopPass(ID) {
+ initializeLCSSAPass(*PassRegistry::getPassRegistry());
+ }
// Cached analysis information for the current function.
DominatorTree *DT;
@@ -65,10 +67,7 @@ namespace {
AU.setPreservesCFG();
AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreserved<ScalarEvolution>();
}
@@ -90,7 +89,10 @@ namespace {
}
char LCSSA::ID = 0;
-INITIALIZE_PASS(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false);
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
Pass *llvm::createLCSSAPass() { return new LCSSA(); }
char &llvm::LCSSAID = LCSSA::ID;
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index 52f0499..063c76e 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -22,9 +22,11 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
@@ -66,9 +68,9 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
assert(BI->getParent() && "Terminator not inserted in block!");
OldDest->removePredecessor(BI->getParent());
- // Set the unconditional destination, and change the insn to be an
- // unconditional branch.
- BI->setUnconditionalDest(Destination);
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Destination, BI);
+ BI->eraseFromParent();
return true;
}
@@ -81,8 +83,9 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
assert(BI->getParent() && "Terminator not inserted in block!");
Dest1->removePredecessor(BI->getParent());
- // Change a conditional branch to unconditional.
- BI->setUnconditionalDest(Dest1);
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest1, BI);
+ BI->eraseFromParent();
return true;
}
return false;
@@ -209,9 +212,6 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
// We don't want debug info removed by anything this general.
if (isa<DbgInfoIntrinsic>(I)) return false;
- // Likewise for memory use markers.
- if (isa<MemoryUseIntrinsic>(I)) return false;
-
if (!I->mayHaveSideEffects()) return true;
// Special case intrinsics that "may have side effects" but can be deleted
@@ -260,29 +260,45 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
return true;
}
+/// areAllUsesEqual - Check whether the uses of a value are all the same.
+/// This is similar to Instruction::hasOneUse() except this will also return
+/// true when there are multiple uses that all refer to the same value.
+static bool areAllUsesEqual(Instruction *I) {
+ Value::use_iterator UI = I->use_begin();
+ Value::use_iterator UE = I->use_end();
+ if (UI == UE)
+ return false;
+
+ User *TheUse = *UI;
+ for (++UI; UI != UE; ++UI) {
+ if (*UI != TheUse)
+ return false;
+ }
+ return true;
+}
+
/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
/// dead PHI node, due to being a def-use chain of single-use nodes that
/// either forms a cycle or is terminated by a trivially dead instruction,
/// delete it. If that makes any of its operands trivially dead, delete them
/// too, recursively. Return true if the PHI node is actually deleted.
-bool
-llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
// We can remove a PHI if it is on a cycle in the def-use graph
// where each node in the cycle has degree one, i.e. only one use,
// and is an instruction with no side effects.
- if (!PN->hasOneUse())
+ if (!areAllUsesEqual(PN))
return false;
bool Changed = false;
SmallPtrSet<PHINode *, 4> PHIs;
PHIs.insert(PN);
for (Instruction *J = cast<Instruction>(*PN->use_begin());
- J->hasOneUse() && !J->mayHaveSideEffects();
+ areAllUsesEqual(J) && !J->mayHaveSideEffects();
J = cast<Instruction>(*J->use_begin()))
// If we find a PHI more than once, we're on a cycle that
// won't prove fruitful.
if (PHINode *JP = dyn_cast<PHINode>(J))
- if (!PHIs.insert(cast<PHINode>(JP))) {
+ if (!PHIs.insert(JP)) {
// Break the cycle and delete the PHI and its operands.
JP->replaceAllUsesWith(UndefValue::get(JP->getType()));
(void)RecursivelyDeleteTriviallyDeadInstructions(JP);
@@ -346,13 +362,13 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
WeakVH PhiIt = &BB->front();
while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
-
- Value *PNV = PN->hasConstantValue();
+
+ Value *PNV = SimplifyInstruction(PN, TD);
if (PNV == 0) continue;
-
+
// If we're able to simplify the phi to a single value, substitute the new
// value into all of its uses.
- assert(PNV != PN && "hasConstantValue broken");
+ assert(PNV != PN && "SimplifyInstruction broken!");
Value *OldPhiIt = PhiIt;
ReplaceAndSimplifyAllUses(PN, PNV, TD);
@@ -402,6 +418,12 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
PredBB->replaceAllUsesWith(DestBB);
if (P) {
+ DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ if (DT) {
+ BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+ DT->changeImmediateDominator(DestBB, PredBBIDom);
+ DT->eraseNode(PredBB);
+ }
ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
if (PI) {
PI->replaceAllUses(PredBB, DestBB);
@@ -645,3 +667,95 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
return Changed;
}
+
+/// enforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned enforceKnownAlignment(Value *V, unsigned Align,
+ unsigned PrefAlign) {
+
+ User *U = dyn_cast<User>(V);
+ if (!U) return Align;
+
+ switch (Operator::getOpcode(U)) {
+ default: break;
+ case Instruction::BitCast:
+ return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+ case Instruction::GetElementPtr: {
+ // If all indexes are zero, it is just the alignment of the base pointer.
+ bool AllZeroOperands = true;
+ for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
+ if (!isa<Constant>(*i) ||
+ !cast<Constant>(*i)->isNullValue()) {
+ AllZeroOperands = false;
+ break;
+ }
+
+ if (AllZeroOperands) {
+ // Treat this like a bitcast.
+ return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+ }
+ return Align;
+ }
+ case Instruction::Alloca: {
+ AllocaInst *AI = cast<AllocaInst>(V);
+ // If there is a requested alignment and if this is an alloca, round up.
+ if (AI->getAlignment() >= PrefAlign)
+ return AI->getAlignment();
+ AI->setAlignment(PrefAlign);
+ return PrefAlign;
+ }
+ }
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // If there is a large requested alignment and we can, bump up the alignment
+ // of the global.
+ if (GV->isDeclaration()) return Align;
+
+ if (GV->getAlignment() >= PrefAlign)
+ return GV->getAlignment();
+ // We can only increase the alignment of the global if it has no alignment
+ // specified or if it is not assigned a section. If it is assigned a
+ // section, the global could be densely packed with other objects in the
+ // section, increasing the alignment could cause padding issues.
+ if (!GV->hasSection() || GV->getAlignment() == 0)
+ GV->setAlignment(PrefAlign);
+ return GV->getAlignment();
+ }
+
+ return Align;
+}
+
+/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0. If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
+ const TargetData *TD) {
+ assert(V->getType()->isPointerTy() &&
+ "getOrEnforceKnownAlignment expects a pointer!");
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+ unsigned TrailZ = KnownZero.countTrailingOnes();
+
+ // Avoid trouble with rediculously large TrailZ values, such as
+ // those computed from a null pointer.
+ TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+
+ unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+
+ // LLVM doesn't support alignments larger than this currently.
+ Align = std::min(Align, +Value::MaximumAlignment);
+
+ if (PrefAlign > Align)
+ Align = enforceKnownAlignment(V, Align, PrefAlign);
+
+ // We don't need to make any adjustment.
+ return Align;
+}
+
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index b3c4801..2462630 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -37,7 +37,7 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loopsimplify"
+#define DEBUG_TYPE "loop-simplify"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
@@ -46,9 +46,10 @@
#include "llvm/LLVMContext.h"
#include "llvm/Type.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
@@ -65,7 +66,9 @@ STATISTIC(NumNested , "Number of nested loops split out");
namespace {
struct LoopSimplify : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LoopSimplify() : LoopPass(ID) {}
+ LoopSimplify() : LoopPass(ID) {
+ initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+ }
// AA - If we have an alias analysis object to update, this is it, otherwise
// this is null.
@@ -87,8 +90,6 @@ namespace {
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
- AU.addPreserved<DominanceFrontier>();
- AU.addPreservedID(LCSSAID);
}
/// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
@@ -107,8 +108,12 @@ namespace {
}
char LoopSimplify::ID = 0;
-INITIALIZE_PASS(LoopSimplify, "loopsimplify",
- "Canonicalize natural loops", true, false);
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
// Publically exposed interface to pass...
char &llvm::LoopSimplifyID = LoopSimplify::ID;
@@ -157,9 +162,8 @@ ReprocessLoop:
for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
E = BadPreds.end(); I != E; ++I) {
- DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor ";
- WriteAsOperand(dbgs(), *I, false);
- dbgs() << "\n");
+ DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+ << (*I)->getName() << "\n");
// Inform each successor of each dead pred.
for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
@@ -184,9 +188,8 @@ ReprocessLoop:
if (BI->isConditional()) {
if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
- DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in ";
- WriteAsOperand(dbgs(), *I, false);
- dbgs() << "\n");
+ DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+ << (*I)->getName() << "\n");
BI->setCondition(ConstantInt::get(Cond->getType(),
!L->contains(BI->getSuccessor(0))));
@@ -262,8 +265,9 @@ ReprocessLoop:
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = PN->hasConstantValue(DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, DT)) {
if (AA) AA->deleteValue(PN);
+ if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
}
@@ -317,29 +321,22 @@ ReprocessLoop:
if (!FoldBranchToCommonDest(BI)) continue;
// Success. The block is now dead, so remove it from the loop,
- // update the dominator tree and dominance frontier, and delete it.
-
- DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block ";
- WriteAsOperand(dbgs(), ExitingBlock, false);
- dbgs() << "\n");
+ // update the dominator tree and delete it.
+ DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+ << ExitingBlock->getName() << "\n");
assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
Changed = true;
LI->removeBlock(ExitingBlock);
- DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
DomTreeNode *Node = DT->getNode(ExitingBlock);
const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
Node->getChildren();
while (!Children.empty()) {
DomTreeNode *Child = Children.front();
DT->changeImmediateDominator(Child, Node->getIDom());
- if (DF) DF->changeImmediateDominator(Child->getBlock(),
- Node->getIDom()->getBlock(),
- DT);
}
DT->eraseNode(ExitingBlock);
- if (DF) DF->removeBlock(ExitingBlock);
BI->getSuccessor(0)->removePredecessor(ExitingBlock);
BI->getSuccessor(1)->removePredecessor(ExitingBlock);
@@ -378,9 +375,8 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
".preheader", this);
- DEBUG(dbgs() << "LoopSimplify: Creating pre-header ";
- WriteAsOperand(dbgs(), NewBB, false);
- dbgs() << "\n");
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
+ << "\n");
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -409,10 +405,8 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
LoopBlocks.size(), ".loopexit",
this);
- DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block ";
- WriteAsOperand(dbgs(), NewBB, false);
- dbgs() << "\n");
-
+ DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+ << NewBB->getName() << "\n");
return NewBB;
}
@@ -438,11 +432,11 @@ static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
/// PHI node that tells us how to partition the loops.
static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
- AliasAnalysis *AA) {
+ AliasAnalysis *AA, LoopInfo *LI) {
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = PN->hasConstantValue(DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, DT)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
if (AA) AA->deleteValue(PN);
@@ -516,7 +510,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
/// created.
///
Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
- PHINode *PN = FindPHIToPartitionLoops(L, DT, AA);
+ PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
if (PN == 0) return 0; // No known way to partition.
// Pull out all predecessors that have varying values in the loop. This
@@ -643,9 +637,8 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
Header->getName()+".backedge", F);
BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
- DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block ";
- WriteAsOperand(dbgs(), BEBlock, false);
- dbgs() << "\n");
+ DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+ << BEBlock->getName() << "\n");
// Move the new backedge block to right after the last backedge block.
Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
@@ -721,8 +714,6 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
// Update dominator information
DT->splitBlock(BEBlock);
- if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>())
- DF->splitBlock(BEBlock);
return BEBlock;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 236bbe9..7da7271 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -16,13 +16,14 @@
//
// The process of unrolling can produce extraneous basic blocks linked with
// unconditional branches. This will be corrected in the future.
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "loop-unroll"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include "llvm/BasicBlock.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Support/Debug.h"
@@ -30,20 +31,19 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
-
using namespace llvm;
// TODO: Should these be here or in LoopUnroll?
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
-STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
/// RemapInstruction - Convert the instruction operands from referencing the
/// current values into those specified by VMap.
static inline void RemapInstruction(Instruction *I,
- ValueMap<const Value *, Value*> &VMap) {
+ ValueToValueMapTy &VMap) {
for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
Value *Op = I->getOperand(op);
- ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+ ValueToValueMapTy::iterator It = VMap.find(Op);
if (It != VMap.end())
I->setOperand(op, It->second);
}
@@ -96,7 +96,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
}
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
-/// if unrolling was succesful, or false if the loop was unmodified. Unrolling
+/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
@@ -105,7 +105,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
-bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) {
+bool llvm::UnrollLoop(Loop *L, unsigned Count,
+ LoopInfo *LI, LPPassManager *LPM) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -127,6 +128,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
" Can't unroll; loop not terminated by a conditional branch.\n");
return false;
}
+
+ if (Header->hasAddressTaken()) {
+ // The loop-rotate pass can be helpful to avoid this in many cases.
+ DEBUG(dbgs() <<
+ " Won't unroll loop: address of header block is taken.\n");
+ return false;
+ }
// Notify ScalarEvolution that the loop will be substantially changed,
// if not outright eliminated.
@@ -189,7 +197,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
- typedef ValueMap<const Value*, Value*> ValueToValueMapTy;
ValueToValueMapTy LastValueMap;
std::vector<PHINode*> OrigPHINode;
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
@@ -274,7 +281,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
for (unsigned i = 0; i < NewBlocks.size(); ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, LastValueMap);
+ ::RemapInstruction(I, LastValueMap);
}
// The latch block exits the loop. If there are any PHI nodes in the
@@ -342,7 +349,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
// iteration.
Term->setSuccessor(!ContinueOnTrue, Dest);
} else {
- Term->setUnconditionalDest(Dest);
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest, Term);
+ Term->eraseFromParent();
// Merge adjacent basic blocks, if possible.
if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) {
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
@@ -362,10 +371,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
if (isInstructionTriviallyDead(Inst))
(*BB)->getInstList().erase(Inst);
- else if (Constant *C = ConstantFoldInstruction(Inst)) {
- Inst->replaceAllUsesWith(C);
- (*BB)->getInstList().erase(Inst);
- }
+ else if (Value *V = SimplifyInstruction(Inst))
+ if (LI->replacementPreservesLCSSAForm(Inst, V)) {
+ Inst->replaceAllUsesWith(V);
+ (*BB)->getInstList().erase(Inst);
+ }
}
NumCompletelyUnrolled += CompletelyUnroll;
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index a46dd84..025ae0d 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -79,7 +79,9 @@ namespace {
explicit LowerInvoke(const TargetLowering *tli = NULL,
bool useExpensiveEHSupport = ExpensiveEHSupport)
: FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
- TLI(tli) { }
+ TLI(tli) {
+ initializeLowerInvokePass(*PassRegistry::getPassRegistry());
+ }
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
@@ -102,7 +104,7 @@ namespace {
char LowerInvoke::ID = 0;
INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
"Lower invoke and unwind, for unwindless code generators",
- false, false);
+ false, false)
char &llvm::LowerInvokePassID = LowerInvoke::ID;
@@ -148,19 +150,20 @@ bool LowerInvoke::doInitialization(Module &M) {
"llvm.sjljeh.jblist");
}
-// VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>,
-// so it looks like Intrinsic::_setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
#endif
SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_visual_studio)
-// let's return it to _setjmp state in case anyone ever needs it after this
-// point under VisualStudio
-#define setjmp _setjmp
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+# pragma pop_macro("setjmp")
+# undef setjmp_undefined_for_msvc
#endif
LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
@@ -186,6 +189,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
II->replaceAllUsesWith(NewCall);
// Insert an unconditional branch to the normal destination.
@@ -266,6 +270,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
II->replaceAllUsesWith(NewCall);
// Replace the invoke with an uncond branch.
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 5530b47..914a439 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -33,7 +33,9 @@ namespace {
class LowerSwitch : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- LowerSwitch() : FunctionPass(ID) {}
+ LowerSwitch() : FunctionPass(ID) {
+ initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
@@ -80,7 +82,7 @@ namespace {
char LowerSwitch::ID = 0;
INITIALIZE_PASS(LowerSwitch, "lowerswitch",
- "Lower SwitchInst's to branches", false, false);
+ "Lower SwitchInst's to branches", false, false)
// Publically exposed interface to pass...
char &llvm::LowerSwitchID = LowerSwitch::ID;
@@ -107,7 +109,8 @@ bool LowerSwitch::runOnFunction(Function &F) {
// operator<< - Used for debugging purposes.
//
static raw_ostream& operator<<(raw_ostream &O,
- const LowerSwitch::CaseVector &C) ATTRIBUTE_USED;
+ const LowerSwitch::CaseVector &C)
+ LLVM_ATTRIBUTE_USED;
static raw_ostream& operator<<(raw_ostream &O,
const LowerSwitch::CaseVector &C) {
O << "[";
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
index 101645b..f4ca81a 100644
--- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -27,18 +27,17 @@ STATISTIC(NumPromoted, "Number of alloca's promoted");
namespace {
struct PromotePass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- PromotePass() : FunctionPass(ID) {}
+ PromotePass() : FunctionPass(ID) {
+ initializePromotePassPass(*PassRegistry::getPassRegistry());
+ }
// runOnFunction - To run this pass, first we calculate the alloca
// instructions that are safe for promotion, then we promote each one.
//
virtual bool runOnFunction(Function &F);
- // getAnalysisUsage - We need dominance frontiers
- //
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>();
AU.setPreservesCFG();
// This is a cluster of orthogonal Transforms
AU.addPreserved<UnifyFunctionExitNodes>();
@@ -49,8 +48,11 @@ namespace {
} // end of anonymous namespace
char PromotePass::ID = 0;
-INITIALIZE_PASS(PromotePass, "mem2reg", "Promote Memory to Register",
- false, false);
+INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false)
bool PromotePass::runOnFunction(Function &F) {
std::vector<AllocaInst*> Allocas;
@@ -60,7 +62,6 @@ bool PromotePass::runOnFunction(Function &F) {
bool Changed = false;
DominatorTree &DT = getAnalysis<DominatorTree>();
- DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
while (1) {
Allocas.clear();
@@ -74,7 +75,7 @@ bool PromotePass::runOnFunction(Function &F) {
if (Allocas.empty()) break;
- PromoteMemToReg(Allocas, DT, DF);
+ PromoteMemToReg(Allocas, DT);
NumPromoted += Allocas.size();
Changed = true;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index a4e3029..e6a4373 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -9,10 +9,19 @@
//
// This file promotes memory references to be register references. It promotes
// alloca instructions which only have loads and stores as uses. An alloca is
-// transformed by using dominator frontiers to place PHI nodes, then traversing
-// the function in depth-first order to rewrite loads and stores as appropriate.
-// This is just the standard SSA construction algorithm to construct "pruned"
-// SSA form.
+// transformed by using iterated dominator frontiers to place PHI nodes, then
+// traversing the function in depth-first order to rewrite loads and stores as
+// appropriate.
+//
+// The algorithm used here is based on:
+//
+// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
+// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
+// Programming Languages
+// POPL '95. ACM, New York, NY, 62-73.
+//
+// It has been modified to not explicitly use the DJ graph data structure and to
+// directly compute pruned SSA using per-variable liveness information.
//
//===----------------------------------------------------------------------===//
@@ -24,9 +33,10 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Metadata.h"
+#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -34,6 +44,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CFG.h"
#include <algorithm>
+#include <map>
+#include <queue>
using namespace llvm;
STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
@@ -178,7 +190,6 @@ namespace {
///
std::vector<AllocaInst*> Allocas;
DominatorTree &DT;
- DominanceFrontier &DF;
DIFactory *DIF;
/// AST - An AliasSetTracker object to update. If null, don't update it.
@@ -187,7 +198,7 @@ namespace {
/// AllocaLookup - Reverse mapping of Allocas.
///
- std::map<AllocaInst*, unsigned> AllocaLookup;
+ DenseMap<AllocaInst*, unsigned> AllocaLookup;
/// NewPhiNodes - The PhiNodes we're adding.
///
@@ -216,12 +227,15 @@ namespace {
/// non-determinstic behavior.
DenseMap<BasicBlock*, unsigned> BBNumbers;
+ /// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
+ DenseMap<DomTreeNode*, unsigned> DomLevels;
+
/// BBNumPreds - Lazily compute the number of predecessors a block has.
DenseMap<const BasicBlock*, unsigned> BBNumPreds;
public:
PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
- DominanceFrontier &df, AliasSetTracker *ast)
- : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {}
+ AliasSetTracker *ast)
+ : Allocas(A), DT(dt), DIF(0), AST(ast) {}
~PromoteMem2Reg() {
delete DIF;
}
@@ -264,13 +278,12 @@ namespace {
void RenamePass(BasicBlock *BB, BasicBlock *Pred,
RenamePassData::ValVector &IncVals,
std::vector<RenamePassData> &Worklist);
- bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version,
- SmallPtrSet<PHINode*, 16> &InsertedPHINodes);
+ bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
};
struct AllocaInfo {
- std::vector<BasicBlock*> DefiningBlocks;
- std::vector<BasicBlock*> UsingBlocks;
+ SmallVector<BasicBlock*, 32> DefiningBlocks;
+ SmallVector<BasicBlock*, 32> UsingBlocks;
StoreInst *OnlyStore;
BasicBlock *OnlyBlock;
@@ -325,11 +338,19 @@ namespace {
DbgDeclare = FindAllocaDbgDeclare(AI);
}
};
+
+ typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
+
+ struct DomTreeNodeCompare {
+ bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
+ return LHS.second < RHS.second;
+ }
+ };
} // end of anonymous namespace
void PromoteMem2Reg::run() {
- Function &F = *DF.getRoot()->getParent();
+ Function &F = *DT.getRoot()->getParent();
if (AST) PointerAllocaValues.resize(Allocas.size());
AllocaDbgDeclares.resize(Allocas.size());
@@ -422,7 +443,26 @@ void PromoteMem2Reg::run() {
continue;
}
}
-
+
+ // If we haven't computed dominator tree levels, do so now.
+ if (DomLevels.empty()) {
+ SmallVector<DomTreeNode*, 32> Worklist;
+
+ DomTreeNode *Root = DT.getRootNode();
+ DomLevels[Root] = 0;
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ unsigned ChildLevel = DomLevels[Node] + 1;
+ for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
+ CI != CE; ++CI) {
+ DomLevels[*CI] = ChildLevel;
+ Worklist.push_back(*CI);
+ }
+ }
+ }
+
// If we haven't computed a numbering for the BB's in the function, do so
// now.
if (BBNumbers.empty()) {
@@ -484,9 +524,8 @@ void PromoteMem2Reg::run() {
Instruction *A = Allocas[i];
// If there are any uses of the alloca instructions left, they must be in
- // sections of dead code that were not processed on the dominance frontier.
- // Just delete the users now.
- //
+ // unreachable basic blocks that were not processed by walking the dominator
+ // tree. Just delete the users now.
if (!A->use_empty())
A->replaceAllUsesWith(UndefValue::get(A->getType()));
if (AST) AST->deleteValue(A);
@@ -509,9 +548,9 @@ void PromoteMem2Reg::run() {
for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
PHINode *PN = I->second;
-
+
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = PN->hasConstantValue(&DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
@@ -663,7 +702,6 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
/// avoiding insertion of dead phi nodes.
void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
AllocaInfo &Info) {
-
// Unique the set of defining blocks for efficient lookup.
SmallPtrSet<BasicBlock*, 32> DefBlocks;
DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
@@ -673,47 +711,78 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
- // Compute the locations where PhiNodes need to be inserted. Look at the
- // dominance frontier of EACH basic-block we have a write in.
- unsigned CurrentVersion = 0;
- SmallPtrSet<PHINode*, 16> InsertedPHINodes;
- std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks;
- while (!Info.DefiningBlocks.empty()) {
- BasicBlock *BB = Info.DefiningBlocks.back();
- Info.DefiningBlocks.pop_back();
-
- // Look up the DF for this write, add it to defining blocks.
- DominanceFrontier::const_iterator it = DF.find(BB);
- if (it == DF.end()) continue;
-
- const DominanceFrontier::DomSetType &S = it->second;
-
- // In theory we don't need the indirection through the DFBlocks vector.
- // In practice, the order of calling QueuePhiNode would depend on the
- // (unspecified) ordering of basic blocks in the dominance frontier,
- // which would give PHI nodes non-determinstic subscripts. Fix this by
- // processing blocks in order of the occurance in the function.
- for (DominanceFrontier::DomSetType::const_iterator P = S.begin(),
- PE = S.end(); P != PE; ++P) {
- // If the frontier block is not in the live-in set for the alloca, don't
- // bother processing it.
- if (!LiveInBlocks.count(*P))
- continue;
-
- DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P));
- }
-
- // Sort by which the block ordering in the function.
- if (DFBlocks.size() > 1)
- std::sort(DFBlocks.begin(), DFBlocks.end());
-
- for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) {
- BasicBlock *BB = DFBlocks[i].second;
- if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes))
- Info.DefiningBlocks.push_back(BB);
+ // Use a priority queue keyed on dominator tree level so that inserted nodes
+ // are handled from the bottom of the dominator tree upwards.
+ typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+ DomTreeNodeCompare> IDFPriorityQueue;
+ IDFPriorityQueue PQ;
+
+ for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
+ E = DefBlocks.end(); I != E; ++I) {
+ if (DomTreeNode *Node = DT.getNode(*I))
+ PQ.push(std::make_pair(Node, DomLevels[Node]));
+ }
+
+ SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
+ SmallPtrSet<DomTreeNode*, 32> Visited;
+ SmallVector<DomTreeNode*, 32> Worklist;
+ while (!PQ.empty()) {
+ DomTreeNodePair RootPair = PQ.top();
+ PQ.pop();
+ DomTreeNode *Root = RootPair.first;
+ unsigned RootLevel = RootPair.second;
+
+ // Walk all dominator tree children of Root, inspecting their CFG edges with
+ // targets elsewhere on the dominator tree. Only targets whose level is at
+ // most Root's level are added to the iterated dominance frontier of the
+ // definition set.
+
+ Worklist.clear();
+ Worklist.push_back(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ BasicBlock *BB = Node->getBlock();
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+ ++SI) {
+ DomTreeNode *SuccNode = DT.getNode(*SI);
+
+ // Quickly skip all CFG edges that are also dominator tree edges instead
+ // of catching them below.
+ if (SuccNode->getIDom() == Node)
+ continue;
+
+ unsigned SuccLevel = DomLevels[SuccNode];
+ if (SuccLevel > RootLevel)
+ continue;
+
+ if (!Visited.insert(SuccNode))
+ continue;
+
+ BasicBlock *SuccBB = SuccNode->getBlock();
+ if (!LiveInBlocks.count(SuccBB))
+ continue;
+
+ DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
+ if (!DefBlocks.count(SuccBB))
+ PQ.push(std::make_pair(SuccNode, SuccLevel));
+ }
+
+ for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
+ ++CI) {
+ if (!Visited.count(*CI))
+ Worklist.push_back(*CI);
+ }
}
- DFBlocks.clear();
}
+
+ if (DFBlocks.size() > 1)
+ std::sort(DFBlocks.begin(), DFBlocks.end());
+
+ unsigned CurrentVersion = 0;
+ for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
+ QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
}
/// RewriteSingleStoreAlloca - If there is only a single store to this value,
@@ -900,8 +969,7 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
// Alloca returns true if there wasn't already a phi-node for that variable
//
bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
- unsigned &Version,
- SmallPtrSet<PHINode*, 16> &InsertedPHINodes) {
+ unsigned &Version) {
// Look up the basic-block in question.
PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
@@ -916,8 +984,6 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
PN->reserveOperandSpace(getNumPreds(BB));
-
- InsertedPHINodes.insert(PN);
if (AST && PN->getType()->isPointerTy())
AST->copyValue(PointerAllocaValues[AllocaNo], PN);
@@ -986,7 +1052,7 @@ NextIteration:
AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
if (!Src) continue;
- std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
+ DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
if (AI == AllocaLookup.end()) continue;
Value *V = IncomingVals[AI->second];
@@ -1002,7 +1068,7 @@ NextIteration:
AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
if (!Dest) continue;
- std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+ DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
if (ai == AllocaLookup.end())
continue;
@@ -1036,18 +1102,17 @@ NextIteration:
}
/// PromoteMemToReg - Promote the specified list of alloca instructions into
-/// scalar registers, inserting PHI nodes as appropriate. This function makes
-/// use of DominanceFrontier information. This function does not modify the CFG
-/// of the function at all. All allocas must be from the same function.
+/// scalar registers, inserting PHI nodes as appropriate. This function does
+/// not modify the CFG of the function at all. All allocas must be from the
+/// same function.
///
/// If AST is specified, the specified tracker is updated to reflect changes
/// made to the IR.
///
void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
- DominatorTree &DT, DominanceFrontier &DF,
- AliasSetTracker *AST) {
+ DominatorTree &DT, AliasSetTracker *AST) {
// If there is nothing to do, bail out...
if (Allocas.empty()) return;
- PromoteMem2Reg(Allocas, DT, DF, AST).run();
+ PromoteMem2Reg(Allocas, DT, AST).run();
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index c855988..3896d98 100644
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "ssaupdater"
#include "llvm/Instructions.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CFG.h"
@@ -178,9 +179,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// See if the PHI node can be merged to a single value. This can happen in
// loop cases when we get a PHI of itself and one other value.
- if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
+ if (Value *V = SimplifyInstruction(InsertedPHI)) {
InsertedPHI->eraseFromParent();
- return ConstVal;
+ return V;
}
// If the client wants to know about all new instructions, tell it.
@@ -342,3 +343,169 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
return Impl.GetValue(BB);
}
+
+//===----------------------------------------------------------------------===//
+// LoadAndStorePromoter Implementation
+//===----------------------------------------------------------------------===//
+
+LoadAndStorePromoter::
+LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+ SSAUpdater &S, StringRef BaseName) : SSA(S) {
+ if (Insts.empty()) return;
+
+ Value *SomeVal;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+ SomeVal = LI;
+ else
+ SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
+
+ if (BaseName.empty())
+ BaseName = SomeVal->getName();
+ SSA.Initialize(SomeVal->getType(), BaseName);
+}
+
+
+void LoadAndStorePromoter::
+run(const SmallVectorImpl<Instruction*> &Insts) const {
+
+ // First step: bucket up uses of the alloca by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ // FIXME: Want a TinyVector<Instruction*> since there is often 0/1 element.
+ DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+ UsesByBlock[User->getParent()].push_back(User);
+ }
+
+ // Okay, now we can iterate over all the blocks in the function with uses,
+ // processing them. Keep track of which loads are loading a live-in value.
+ // Walk the uses in the use-list order to be determinstic.
+ SmallVector<LoadInst*, 32> LiveInLoads;
+ DenseMap<Value*, Value*> ReplacedLoads;
+
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+ BasicBlock *BB = User->getParent();
+ std::vector<Instruction*> &BlockUses = UsesByBlock[BB];
+
+ // If this block has already been processed, ignore this repeat use.
+ if (BlockUses.empty()) continue;
+
+ // Okay, this is the first use in the block. If this block just has a
+ // single user in it, we can rewrite it trivially.
+ if (BlockUses.size() == 1) {
+ // If it is a store, it is a trivial def of the value in the block.
+ if (StoreInst *SI = dyn_cast<StoreInst>(User))
+ SSA.AddAvailableValue(BB, SI->getOperand(0));
+ else
+ // Otherwise it is a load, queue it to rewrite as a live-in load.
+ LiveInLoads.push_back(cast<LoadInst>(User));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, check to see if this block is all loads.
+ bool HasStore = false;
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+ if (isa<StoreInst>(BlockUses[i])) {
+ HasStore = true;
+ break;
+ }
+ }
+
+ // If so, we can queue them all as live in loads. We don't have an
+ // efficient way to tell which on is first in the block and don't want to
+ // scan large blocks, so just add all loads as live ins.
+ if (!HasStore) {
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+ LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+ BlockUses.clear();
+ continue;
+ }
+
+ // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+ // Since SSAUpdater is purely for cross-block values, we need to determine
+ // the order of these instructions in the block. If the first use in the
+ // block is a load, then it uses the live in value. The last store defines
+ // the live out value. We handle this by doing a linear scan of the block.
+ Value *StoredValue = 0;
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ // If this is a load from an unrelated pointer, ignore it.
+ if (!isInstInList(L, Insts)) continue;
+
+ // If we haven't seen a store yet, this is a live in use, otherwise
+ // use the stored value.
+ if (StoredValue) {
+ replaceLoadWithValue(L, StoredValue);
+ L->replaceAllUsesWith(StoredValue);
+ ReplacedLoads[L] = StoredValue;
+ } else {
+ LiveInLoads.push_back(L);
+ }
+ continue;
+ }
+
+ if (StoreInst *S = dyn_cast<StoreInst>(II)) {
+ // If this is a store to an unrelated pointer, ignore it.
+ if (!isInstInList(S, Insts)) continue;
+
+ // Remember that this is the active value in the block.
+ StoredValue = S->getOperand(0);
+ }
+ }
+
+ // The last stored value that happened is the live-out for the block.
+ assert(StoredValue && "Already checked that there is a store in block");
+ SSA.AddAvailableValue(BB, StoredValue);
+ BlockUses.clear();
+ }
+
+ // Okay, now we rewrite all loads that use live-in values in the loop,
+ // inserting PHI nodes as necessary.
+ for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+ LoadInst *ALoad = LiveInLoads[i];
+ Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+ replaceLoadWithValue(ALoad, NewVal);
+
+ // Avoid assertions in unreachable code.
+ if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType());
+ ALoad->replaceAllUsesWith(NewVal);
+ ReplacedLoads[ALoad] = NewVal;
+ }
+
+ // Allow the client to do stuff before we start nuking things.
+ doExtraRewritesBeforeFinalDeletion();
+
+ // Now that everything is rewritten, delete the old instructions from the
+ // function. They should all be dead now.
+ for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+ Instruction *User = Insts[i];
+
+ // If this is a load that still has uses, then the load must have been added
+ // as a live value in the SSAUpdate data structure for a block (e.g. because
+ // the loaded value was stored later). In this case, we need to recursively
+ // propagate the updates until we get to the real value.
+ if (!User->use_empty()) {
+ Value *NewVal = ReplacedLoads[User];
+ assert(NewVal && "not a replaced load?");
+
+ // Propagate down to the ultimate replacee. The intermediately loads
+ // could theoretically already have been deleted, so we don't want to
+ // dereference the Value*'s.
+ DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+ while (RLI != ReplacedLoads.end()) {
+ NewVal = RLI->second;
+ RLI = ReplacedLoads.find(NewVal);
+ }
+
+ replaceLoadWithValue(cast<LoadInst>(User), NewVal);
+ User->replaceAllUsesWith(NewVal);
+ }
+
+ instructionDeleted(User);
+ User->eraseFromParent();
+ }
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 28d7afb..fb660db 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -19,33 +19,34 @@
#include "llvm/Type.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <functional>
#include <set>
#include <map>
using namespace llvm;
+static cl::opt<bool>
+DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
+ cl::desc("Duplicate return instructions into unconditional branches"));
+
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
namespace {
class SimplifyCFGOpt {
const TargetData *const TD;
- ConstantInt *GetConstantInt(Value *V);
- Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values);
- Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values);
- bool GatherValueComparisons(Instruction *Cond, Value *&CompVal,
- std::vector<ConstantInt*> &Values);
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases);
@@ -53,6 +54,14 @@ class SimplifyCFGOpt {
BasicBlock *Pred);
bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI);
+ bool SimplifyReturn(ReturnInst *RI);
+ bool SimplifyUnwind(UnwindInst *UI);
+ bool SimplifyUnreachable(UnreachableInst *UI);
+ bool SimplifySwitch(SwitchInst *SI);
+ bool SimplifyIndirectBr(IndirectBrInst *IBI);
+ bool SimplifyUncondBranch(BranchInst *BI);
+ bool SimplifyCondBranch(BranchInst *BI);
+
public:
explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {}
bool run(BasicBlock *BB);
@@ -91,8 +100,6 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
/// ExistPred, an existing predecessor of Succ.
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
BasicBlock *ExistPred) {
- assert(std::find(succ_begin(ExistPred), succ_end(ExistPred), Succ) !=
- succ_end(ExistPred) && "ExistPred is not a predecessor of Succ!");
if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
PHINode *PN;
@@ -102,28 +109,29 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
}
-/// GetIfCondition - Given a basic block (BB) with two predecessors (and
-/// presumably PHI nodes in it), check to see if the merge at this block is due
+/// GetIfCondition - Given a basic block (BB) with two predecessors (and at
+/// least one PHI node in it), check to see if the merge at this block is due
/// to an "if condition". If so, return the boolean condition that determines
/// which entry into BB will be taken. Also, return by references the block
/// that will be entered from if the condition is true, and the block that will
/// be entered if the condition is false.
///
-///
-static Value *GetIfCondition(BasicBlock *BB,
- BasicBlock *&IfTrue, BasicBlock *&IfFalse) {
- assert(std::distance(pred_begin(BB), pred_end(BB)) == 2 &&
+/// This does no checking to see if the true/false blocks have large or unsavory
+/// instructions in them.
+static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+ BasicBlock *&IfFalse) {
+ PHINode *SomePHI = cast<PHINode>(BB->begin());
+ assert(SomePHI->getNumIncomingValues() == 2 &&
"Function can only handle blocks with 2 predecessors!");
- BasicBlock *Pred1 = *pred_begin(BB);
- BasicBlock *Pred2 = *++pred_begin(BB);
+ BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
+ BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
// We can only handle branches. Other control flow will be lowered to
// branches if possible anyway.
- if (!isa<BranchInst>(Pred1->getTerminator()) ||
- !isa<BranchInst>(Pred2->getTerminator()))
+ BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+ BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+ if (Pred1Br == 0 || Pred2Br == 0)
return 0;
- BranchInst *Pred1Br = cast<BranchInst>(Pred1->getTerminator());
- BranchInst *Pred2Br = cast<BranchInst>(Pred2->getTerminator());
// Eliminate code duplication by ensuring that Pred1Br is conditional if
// either are.
@@ -140,6 +148,12 @@ static Value *GetIfCondition(BasicBlock *BB,
}
if (Pred1Br->isConditional()) {
+ // The only thing we have to watch out for here is to make sure that Pred2
+ // doesn't have incoming edges from other blocks. If it does, the condition
+ // doesn't dominate BB.
+ if (Pred2->getSinglePredecessor() == 0)
+ return 0;
+
// If we found a conditional branch predecessor, make sure that it branches
// to BB and Pred2Br. If it doesn't, this isn't an "if statement".
if (Pred1Br->getSuccessor(0) == BB &&
@@ -156,39 +170,29 @@ static Value *GetIfCondition(BasicBlock *BB,
return 0;
}
- // The only thing we have to watch out for here is to make sure that Pred2
- // doesn't have incoming edges from other blocks. If it does, the condition
- // doesn't dominate BB.
- if (++pred_begin(Pred2) != pred_end(Pred2))
- return 0;
-
return Pred1Br->getCondition();
}
// Ok, if we got here, both predecessors end with an unconditional branch to
// BB. Don't panic! If both blocks only have a single (identical)
// predecessor, and THAT is a conditional branch, then we're all ok!
- if (pred_begin(Pred1) == pred_end(Pred1) ||
- ++pred_begin(Pred1) != pred_end(Pred1) ||
- pred_begin(Pred2) == pred_end(Pred2) ||
- ++pred_begin(Pred2) != pred_end(Pred2) ||
- *pred_begin(Pred1) != *pred_begin(Pred2))
+ BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+ if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
return 0;
// Otherwise, if this is a conditional branch, then we can use it!
- BasicBlock *CommonPred = *pred_begin(Pred1);
- if (BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator())) {
- assert(BI->isConditional() && "Two successors but not conditional?");
- if (BI->getSuccessor(0) == Pred1) {
- IfTrue = Pred1;
- IfFalse = Pred2;
- } else {
- IfTrue = Pred2;
- IfFalse = Pred1;
- }
- return BI->getCondition();
+ BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+ if (BI == 0) return 0;
+
+ assert(BI->isConditional() && "Two successors but not conditional?");
+ if (BI->getSuccessor(0) == Pred1) {
+ IfTrue = Pred1;
+ IfFalse = Pred2;
+ } else {
+ IfTrue = Pred2;
+ IfFalse = Pred1;
}
- return 0;
+ return BI->getCondition();
}
/// DominatesMergePoint - If we have a merge point of an "if condition" as
@@ -201,7 +205,7 @@ static Value *GetIfCondition(BasicBlock *BB,
/// non-trapping. If both are true, the instruction is inserted into the set
/// and true is returned.
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
- std::set<Instruction*> *AggressiveInsts) {
+ SmallPtrSet<Instruction*, 4> *AggressiveInsts) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
// Non-instructions all dominate instructions, but not all constantexprs
@@ -219,56 +223,55 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// If this instruction is defined in a block that contains an unconditional
// branch to BB, then it must be in the 'conditional' part of the "if
- // statement".
- if (BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()))
- if (BI->isUnconditional() && BI->getSuccessor(0) == BB) {
- if (!AggressiveInsts) return false;
- // Okay, it looks like the instruction IS in the "condition". Check to
- // see if it's a cheap instruction to unconditionally compute, and if it
- // only uses stuff defined outside of the condition. If so, hoist it out.
- if (!I->isSafeToSpeculativelyExecute())
- return false;
+ // statement". If not, it definitely dominates the region.
+ BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
+ if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB)
+ return true;
- switch (I->getOpcode()) {
- default: return false; // Cannot hoist this out safely.
- case Instruction::Load: {
- // We have to check to make sure there are no instructions before the
- // load in its basic block, as we are going to hoist the loop out to
- // its predecessor.
- BasicBlock::iterator IP = PBB->begin();
- while (isa<DbgInfoIntrinsic>(IP))
- IP++;
- if (IP != BasicBlock::iterator(I))
- return false;
- break;
- }
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::ICmp:
- break; // These are all cheap and non-trapping instructions.
- }
+ // If we aren't allowing aggressive promotion anymore, then don't consider
+ // instructions in the 'if region'.
+ if (AggressiveInsts == 0) return false;
+
+ // Okay, it looks like the instruction IS in the "condition". Check to
+ // see if it's a cheap instruction to unconditionally compute, and if it
+ // only uses stuff defined outside of the condition. If so, hoist it out.
+ if (!I->isSafeToSpeculativelyExecute())
+ return false;
- // Okay, we can only really hoist these out if their operands are not
- // defined in the conditional region.
- for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, 0))
- return false;
- // Okay, it's safe to do this! Remember this instruction.
- AggressiveInsts->insert(I);
- }
+ switch (I->getOpcode()) {
+ default: return false; // Cannot hoist this out safely.
+ case Instruction::Load:
+ // We have to check to make sure there are no instructions before the
+ // load in its basic block, as we are going to hoist the load out to its
+ // predecessor.
+ if (PBB->getFirstNonPHIOrDbg() != I)
+ return false;
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::ICmp:
+ break; // These are all cheap and non-trapping instructions.
+ }
+ // Okay, we can only really hoist these out if their operands are not
+ // defined in the conditional region.
+ for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+ if (!DominatesMergePoint(*i, BB, 0))
+ return false;
+ // Okay, it's safe to do this! Remember this instruction.
+ AggressiveInsts->insert(I);
return true;
}
/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
/// and PointerNullValue. Return NULL if value is not a constant int.
-ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) {
+static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
// Normal constant int.
ConstantInt *CI = dyn_cast<ConstantInt>(V);
if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
@@ -296,77 +299,94 @@ ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) {
return 0;
}
-/// GatherConstantSetEQs - Given a potentially 'or'd together collection of
-/// icmp_eq instructions that compare a value against a constant, return the
-/// value being compared, and stick the constant into the Values vector.
-Value *SimplifyCFGOpt::
-GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values) {
- if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Inst->getOpcode() == Instruction::ICmp &&
- cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_EQ) {
- if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) {
- Values.push_back(C);
- return Inst->getOperand(0);
- } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) {
- Values.push_back(C);
- return Inst->getOperand(1);
+/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
+/// collection of icmp eq/ne instructions that compare a value against a
+/// constant, return the value being compared, and stick the constant into the
+/// Values vector.
+static Value *
+GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
+ const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return 0;
+
+ // If this is an icmp against a constant, handle this as one of the cases.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+ if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+ if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+ UsedICmps++;
+ Vals.push_back(C);
+ return I->getOperand(0);
}
- } else if (Inst->getOpcode() == Instruction::Or) {
- if (Value *LHS = GatherConstantSetEQs(Inst->getOperand(0), Values))
- if (Value *RHS = GatherConstantSetEQs(Inst->getOperand(1), Values))
- if (LHS == RHS)
- return LHS;
+
+ // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
+ // the set.
+ ConstantRange Span =
+ ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
+
+ // If this is an and/!= check then we want to optimize "x ugt 2" into
+ // x != 0 && x != 1.
+ if (!isEQ)
+ Span = Span.inverse();
+
+ // If there are a ton of values, we don't want to make a ginormous switch.
+ if (Span.getSetSize().ugt(8) || Span.isEmptySet() ||
+ // We don't handle wrapped sets yet.
+ Span.isWrappedSet())
+ return 0;
+
+ for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+ Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
+ UsedICmps++;
+ return I->getOperand(0);
}
+ return 0;
}
- return 0;
-}
+
+ // Otherwise, we can only handle an | or &, depending on isEQ.
+ if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
+ return 0;
+
+ unsigned NumValsBeforeLHS = Vals.size();
+ unsigned UsedICmpsBeforeLHS = UsedICmps;
+ if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
+ isEQ, UsedICmps)) {
+ unsigned NumVals = Vals.size();
+ unsigned UsedICmpsBeforeRHS = UsedICmps;
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ isEQ, UsedICmps)) {
+ if (LHS == RHS)
+ return LHS;
+ Vals.resize(NumVals);
+ UsedICmps = UsedICmpsBeforeRHS;
+ }
-/// GatherConstantSetNEs - Given a potentially 'and'd together collection of
-/// setne instructions that compare a value against a constant, return the value
-/// being compared, and stick the constant into the Values vector.
-Value *SimplifyCFGOpt::
-GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values) {
- if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Inst->getOpcode() == Instruction::ICmp &&
- cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_NE) {
- if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) {
- Values.push_back(C);
- return Inst->getOperand(0);
- } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) {
- Values.push_back(C);
- return Inst->getOperand(1);
- }
- } else if (Inst->getOpcode() == Instruction::And) {
- if (Value *LHS = GatherConstantSetNEs(Inst->getOperand(0), Values))
- if (Value *RHS = GatherConstantSetNEs(Inst->getOperand(1), Values))
- if (LHS == RHS)
- return LHS;
+ // The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
+ // set it and return success.
+ if (Extra == 0 || Extra == I->getOperand(1)) {
+ Extra = I->getOperand(1);
+ return LHS;
}
+
+ Vals.resize(NumValsBeforeLHS);
+ UsedICmps = UsedICmpsBeforeLHS;
+ return 0;
}
- return 0;
-}
-
-/// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a
-/// bunch of comparisons of one value against constants, return the value and
-/// the constants being compared.
-bool SimplifyCFGOpt::GatherValueComparisons(Instruction *Cond, Value *&CompVal,
- std::vector<ConstantInt*> &Values) {
- if (Cond->getOpcode() == Instruction::Or) {
- CompVal = GatherConstantSetEQs(Cond, Values);
-
- // Return true to indicate that the condition is true if the CompVal is
- // equal to one of the constants.
- return true;
- } else if (Cond->getOpcode() == Instruction::And) {
- CompVal = GatherConstantSetNEs(Cond, Values);
-
- // Return false to indicate that the condition is false if the CompVal is
- // equal to one of the constants.
- return false;
+
+ // If the LHS can't be folded in, but Extra is available and RHS can, try to
+ // use LHS as Extra.
+ if (Extra == 0 || Extra == I->getOperand(0)) {
+ Value *OldExtra = Extra;
+ Extra = I->getOperand(0);
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ isEQ, UsedICmps))
+ return RHS;
+ assert(Vals.size() == NumValsBeforeLHS);
+ Extra = OldExtra;
}
- return false;
+
+ return 0;
}
-
+
static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
Instruction* Cond = 0;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
@@ -374,6 +394,8 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (BI->isConditional())
Cond = dyn_cast<Instruction>(BI->getCondition());
+ } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
+ Cond = dyn_cast<Instruction>(IBI->getAddress());
}
TI->eraseFromParent();
@@ -395,7 +417,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
ICI->getPredicate() == ICmpInst::ICMP_NE) &&
- GetConstantInt(ICI->getOperand(1)))
+ GetConstantInt(ICI->getOperand(1), TD))
CV = ICI->getOperand(0);
// Unwrap any lossless ptrtoint cast.
@@ -420,7 +442,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
BranchInst *BI = cast<BranchInst>(TI);
ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
- Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1)),
+ Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1), TD),
BI->getSuccessor(ICI->getPredicate() ==
ICmpInst::ICMP_NE)));
return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
@@ -459,8 +481,8 @@ ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1,
}
// Otherwise, just sort both lists and compare element by element.
- std::sort(V1->begin(), V1->end());
- std::sort(V2->begin(), V2->end());
+ array_pod_sort(V1->begin(), V1->end());
+ array_pod_sort(V2->begin(), V2->end());
unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
while (i1 != e1 && i2 != e2) {
if ((*V1)[i1].first == (*V2)[i2].first)
@@ -506,90 +528,87 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
// If we are here, we know that the value is none of those cases listed in
// PredCases. If there are any cases in ThisCases that are in PredCases, we
// can simplify TI.
- if (ValuesOverlap(PredCases, ThisCases)) {
- if (isa<BranchInst>(TI)) {
- // Okay, one of the successors of this condbr is dead. Convert it to a
- // uncond br.
- assert(ThisCases.size() == 1 && "Branch can only have one case!");
- // Insert the new branch.
- Instruction *NI = BranchInst::Create(ThisDef, TI);
- (void) NI;
-
- // Remove PHI node entries for the dead edge.
- ThisCases[0].second->removePredecessor(TI->getParent());
-
- DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
-
- EraseTerminatorInstAndDCECond(TI);
- return true;
-
- } else {
- SwitchInst *SI = cast<SwitchInst>(TI);
- // Okay, TI has cases that are statically dead, prune them away.
- SmallPtrSet<Constant*, 16> DeadCases;
- for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- DeadCases.insert(PredCases[i].first);
+ if (!ValuesOverlap(PredCases, ThisCases))
+ return false;
+
+ if (isa<BranchInst>(TI)) {
+ // Okay, one of the successors of this condbr is dead. Convert it to a
+ // uncond br.
+ assert(ThisCases.size() == 1 && "Branch can only have one case!");
+ // Insert the new branch.
+ Instruction *NI = BranchInst::Create(ThisDef, TI);
+ (void) NI;
- DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI);
+ // Remove PHI node entries for the dead edge.
+ ThisCases[0].second->removePredecessor(TI->getParent());
- for (unsigned i = SI->getNumCases()-1; i != 0; --i)
- if (DeadCases.count(SI->getCaseValue(i))) {
- SI->getSuccessor(i)->removePredecessor(TI->getParent());
- SI->removeCase(i);
- }
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
- DEBUG(dbgs() << "Leaving: " << *TI << "\n");
- return true;
- }
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
}
-
- } else {
- // Otherwise, TI's block must correspond to some matched value. Find out
- // which value (or set of values) this is.
- ConstantInt *TIV = 0;
- BasicBlock *TIBB = TI->getParent();
+
+ SwitchInst *SI = cast<SwitchInst>(TI);
+ // Okay, TI has cases that are statically dead, prune them away.
+ SmallPtrSet<Constant*, 16> DeadCases;
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
- if (PredCases[i].second == TIBB) {
- if (TIV == 0)
- TIV = PredCases[i].first;
- else
- return false; // Cannot handle multiple values coming to this block.
- }
- assert(TIV && "No edge from pred to succ?");
-
- // Okay, we found the one constant that our value can be if we get into TI's
- // BB. Find out which successor will unconditionally be branched to.
- BasicBlock *TheRealDest = 0;
- for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
- if (ThisCases[i].first == TIV) {
- TheRealDest = ThisCases[i].second;
- break;
+ DeadCases.insert(PredCases[i].first);
+
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI);
+
+ for (unsigned i = SI->getNumCases()-1; i != 0; --i)
+ if (DeadCases.count(SI->getCaseValue(i))) {
+ SI->getSuccessor(i)->removePredecessor(TI->getParent());
+ SI->removeCase(i);
}
- // If not handled by any explicit cases, it is handled by the default case.
- if (TheRealDest == 0) TheRealDest = ThisDef;
+ DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+ return true;
+ }
+
+ // Otherwise, TI's block must correspond to some matched value. Find out
+ // which value (or set of values) this is.
+ ConstantInt *TIV = 0;
+ BasicBlock *TIBB = TI->getParent();
+ for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+ if (PredCases[i].second == TIBB) {
+ if (TIV != 0)
+ return false; // Cannot handle multiple values coming to this block.
+ TIV = PredCases[i].first;
+ }
+ assert(TIV && "No edge from pred to succ?");
+
+ // Okay, we found the one constant that our value can be if we get into TI's
+ // BB. Find out which successor will unconditionally be branched to.
+ BasicBlock *TheRealDest = 0;
+ for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+ if (ThisCases[i].first == TIV) {
+ TheRealDest = ThisCases[i].second;
+ break;
+ }
- // Remove PHI node entries for dead edges.
- BasicBlock *CheckEdge = TheRealDest;
- for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
- if (*SI != CheckEdge)
- (*SI)->removePredecessor(TIBB);
- else
- CheckEdge = 0;
+ // If not handled by any explicit cases, it is handled by the default case.
+ if (TheRealDest == 0) TheRealDest = ThisDef;
- // Insert the new branch.
- Instruction *NI = BranchInst::Create(TheRealDest, TI);
- (void) NI;
+ // Remove PHI node entries for dead edges.
+ BasicBlock *CheckEdge = TheRealDest;
+ for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
+ if (*SI != CheckEdge)
+ (*SI)->removePredecessor(TIBB);
+ else
+ CheckEdge = 0;
- DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
- << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+ // Insert the new branch.
+ Instruction *NI = BranchInst::Create(TheRealDest, TI);
+ (void) NI;
- EraseTerminatorInstAndDCECond(TI);
- return true;
- }
- return false;
+ DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+ << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+ EraseTerminatorInstAndDCECond(TI);
+ return true;
}
namespace {
@@ -603,6 +622,16 @@ namespace {
};
}
+static int ConstantIntSortPredicate(const void *P1, const void *P2) {
+ const ConstantInt *LHS = *(const ConstantInt**)P1;
+ const ConstantInt *RHS = *(const ConstantInt**)P2;
+ if (LHS->getValue().ult(RHS->getValue()))
+ return 1;
+ if (LHS->getValue() == RHS->getValue())
+ return 0;
+ return -1;
+}
+
/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
/// equality comparison instruction (either a switch or a branch on "X == c").
/// See if any of the predecessors of the terminator block are value comparisons
@@ -798,7 +827,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
if (!I2->use_empty())
I2->replaceAllUsesWith(I1);
I1->intersectOptionalDataWith(I2);
- BB2->getInstList().erase(I2);
+ I2->eraseFromParent();
I1 = BB1_Itr++;
while (isa<DbgInfoIntrinsic>(I1))
@@ -836,18 +865,18 @@ HoistTerminator:
(PN = dyn_cast<PHINode>(BBI)); ++BBI) {
Value *BB1V = PN->getIncomingValueForBlock(BB1);
Value *BB2V = PN->getIncomingValueForBlock(BB2);
- if (BB1V != BB2V) {
- // These values do not agree. Insert a select instruction before NT
- // that determines the right value.
- SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
- if (SI == 0)
- SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V,
- BB1V->getName()+"."+BB2V->getName(), NT);
- // Make the PHI node use the select for all incoming values for BB1/BB2
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
- PN->setIncomingValue(i, SI);
- }
+ if (BB1V == BB2V) continue;
+
+ // These values do not agree. Insert a select instruction before NT
+ // that determines the right value.
+ SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+ if (SI == 0)
+ SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V,
+ BB1V->getName()+"."+BB2V->getName(), NT);
+ // Make the PHI node use the select for all incoming values for BB1/BB2
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+ PN->setIncomingValue(i, SI);
}
}
@@ -872,21 +901,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
BBI != BBE; ++BBI) {
Instruction *I = BBI;
// Skip debug info.
- if (isa<DbgInfoIntrinsic>(I)) continue;
- if (I == Term) break;
+ if (isa<DbgInfoIntrinsic>(I)) continue;
+ if (I == Term) break;
- if (!HInst)
- HInst = I;
- else
+ if (HInst)
return false;
+ HInst = I;
}
if (!HInst)
return false;
// Be conservative for now. FP select instruction can often be expensive.
Value *BrCond = BI->getCondition();
- if (isa<Instruction>(BrCond) &&
- cast<Instruction>(BrCond)->getOpcode() == Instruction::FCmp)
+ if (isa<FCmpInst>(BrCond))
return false;
// If BB1 is actually on the false edge of the conditional branch, remember
@@ -990,12 +1017,12 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end();
UI != UE; ++UI) {
Instruction *Use = cast<Instruction>(*UI);
- if (BB1Insns.count(Use)) {
- // If BrCond uses the instruction that place it just before
- // branch instruction.
- InsertPos = BI;
- break;
- }
+ if (!BB1Insns.count(Use)) continue;
+
+ // If BrCond uses the instruction that place it just before
+ // branch instruction.
+ InsertPos = BI;
+ break;
}
} else
InsertPos = BI;
@@ -1016,8 +1043,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) {
PHINode *PN = PHIUses[i];
for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j)
- if (PN->getIncomingBlock(j) == BB1 ||
- PN->getIncomingBlock(j) == BIParent)
+ if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent)
PN->setIncomingValue(j, SI);
}
@@ -1055,7 +1081,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// that is defined in the same block as the branch and if any PHI entries are
/// constants, thread edges corresponding to that entry to be branches to their
/// ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
@@ -1075,78 +1101,73 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
// Okay, this is a simple enough basic block. See if any phi values are
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- ConstantInt *CB;
- if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
- CB->getType()->isIntegerTy(1)) {
- // Okay, we now know that all edges from PredBB should be revectored to
- // branch to RealDest.
- BasicBlock *PredBB = PN->getIncomingBlock(i);
- BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+ ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
+ if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
+
+ // Okay, we now know that all edges from PredBB should be revectored to
+ // branch to RealDest.
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+
+ if (RealDest == BB) continue; // Skip self loops.
+
+ // The dest block might have PHI nodes, other predecessors and other
+ // difficult cases. Instead of being smart about this, just insert a new
+ // block that jumps to the destination block, effectively splitting
+ // the edge we are about to create.
+ BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+ RealDest->getName()+".critedge",
+ RealDest->getParent(), RealDest);
+ BranchInst::Create(RealDest, EdgeBB);
+
+ // Update PHI nodes.
+ AddPredecessorToBlock(RealDest, EdgeBB, BB);
+
+ // BB may have instructions that are being threaded over. Clone these
+ // instructions into EdgeBB. We know that there will be no uses of the
+ // cloned instructions outside of EdgeBB.
+ BasicBlock::iterator InsertPt = EdgeBB->begin();
+ DenseMap<Value*, Value*> TranslateMap; // Track translated values.
+ for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+ continue;
+ }
+ // Clone the instruction.
+ Instruction *N = BBI->clone();
+ if (BBI->hasName()) N->setName(BBI->getName()+".c");
- if (RealDest == BB) continue; // Skip self loops.
+ // Update operands due to translation.
+ for (User::op_iterator i = N->op_begin(), e = N->op_end();
+ i != e; ++i) {
+ DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i);
+ if (PI != TranslateMap.end())
+ *i = PI->second;
+ }
- // The dest block might have PHI nodes, other predecessors and other
- // difficult cases. Instead of being smart about this, just insert a new
- // block that jumps to the destination block, effectively splitting
- // the edge we are about to create.
- BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
- RealDest->getName()+".critedge",
- RealDest->getParent(), RealDest);
- BranchInst::Create(RealDest, EdgeBB);
- PHINode *PN;
- for (BasicBlock::iterator BBI = RealDest->begin();
- (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
- Value *V = PN->getIncomingValueForBlock(BB);
- PN->addIncoming(V, EdgeBB);
+ // Check for trivial simplification.
+ if (Value *V = SimplifyInstruction(N, TD)) {
+ TranslateMap[BBI] = V;
+ delete N; // Instruction folded away, don't need actual inst
+ } else {
+ // Insert the new instruction into its new home.
+ EdgeBB->getInstList().insert(InsertPt, N);
+ if (!BBI->use_empty())
+ TranslateMap[BBI] = N;
}
+ }
- // BB may have instructions that are being threaded over. Clone these
- // instructions into EdgeBB. We know that there will be no uses of the
- // cloned instructions outside of EdgeBB.
- BasicBlock::iterator InsertPt = EdgeBB->begin();
- std::map<Value*, Value*> TranslateMap; // Track translated values.
- for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
- if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
- TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
- } else {
- // Clone the instruction.
- Instruction *N = BBI->clone();
- if (BBI->hasName()) N->setName(BBI->getName()+".c");
-
- // Update operands due to translation.
- for (User::op_iterator i = N->op_begin(), e = N->op_end();
- i != e; ++i) {
- std::map<Value*, Value*>::iterator PI =
- TranslateMap.find(*i);
- if (PI != TranslateMap.end())
- *i = PI->second;
- }
-
- // Check for trivial simplification.
- if (Constant *C = ConstantFoldInstruction(N)) {
- TranslateMap[BBI] = C;
- delete N; // Constant folded away, don't need actual inst
- } else {
- // Insert the new instruction into its new home.
- EdgeBB->getInstList().insert(InsertPt, N);
- if (!BBI->use_empty())
- TranslateMap[BBI] = N;
- }
- }
+ // Loop over all of the edges from PredBB to BB, changing them to branch
+ // to EdgeBB instead.
+ TerminatorInst *PredBBTI = PredBB->getTerminator();
+ for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+ if (PredBBTI->getSuccessor(i) == BB) {
+ BB->removePredecessor(PredBB);
+ PredBBTI->setSuccessor(i, EdgeBB);
}
-
- // Loop over all of the edges from PredBB to BB, changing them to branch
- // to EdgeBB instead.
- TerminatorInst *PredBBTI = PredBB->getTerminator();
- for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
- if (PredBBTI->getSuccessor(i) == BB) {
- BB->removePredecessor(PredBB);
- PredBBTI->setSuccessor(i, EdgeBB);
- }
-
- // Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI) | true;
- }
+
+ // Recurse, simplifying any other constants.
+ return FoldCondBranchOnPHI(BI, TD) | true;
}
return false;
@@ -1154,18 +1175,20 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
/// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
// subsequently causes this merge to happen. We really want control
// dependence information for this check, but simplifycfg can't keep it up
// to date, and this catches most of the cases we care about anyway.
- //
BasicBlock *BB = PN->getParent();
BasicBlock *IfTrue, *IfFalse;
Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
- if (!IfCond) return false;
+ if (!IfCond ||
+ // Don't bother if the branch will be constant folded trivially.
+ isa<ConstantInt>(IfCond))
+ return false;
// Okay, we found that we can merge this two-entry phi node into a select.
// Doing so would require us to fold *all* two entry phi nodes in this block.
@@ -1177,42 +1200,49 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
if (NumPhis > 2)
return false;
- DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
- << IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
-
// Loop over the PHI's seeing if we can promote them all to select
// instructions. While we are at it, keep track of the instructions
// that need to be moved to the dominating block.
- std::set<Instruction*> AggressiveInsts;
-
- BasicBlock::iterator AfterPHIIt = BB->begin();
- while (isa<PHINode>(AfterPHIIt)) {
- PHINode *PN = cast<PHINode>(AfterPHIIt++);
- if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) {
- if (PN->getIncomingValue(0) != PN)
- PN->replaceAllUsesWith(PN->getIncomingValue(0));
- else
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
- } else if (!DominatesMergePoint(PN->getIncomingValue(0), BB,
- &AggressiveInsts) ||
- !DominatesMergePoint(PN->getIncomingValue(1), BB,
- &AggressiveInsts)) {
- return false;
+ SmallPtrSet<Instruction*, 4> AggressiveInsts;
+
+ for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
+ PHINode *PN = cast<PHINode>(II++);
+ if (Value *V = SimplifyInstruction(PN, TD)) {
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ continue;
}
+
+ if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts) ||
+ !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts))
+ return false;
}
+ // If we folded the the first phi, PN dangles at this point. Refresh it. If
+ // we ran out of PHIs then we simplified them all.
+ PN = dyn_cast<PHINode>(BB->begin());
+ if (PN == 0) return true;
+
+ // Don't fold i1 branches on PHIs which contain binary operators. These can
+ // often be turned into switches and other things.
+ if (PN->getType()->isIntegerTy(1) &&
+ (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
+ isa<BinaryOperator>(PN->getIncomingValue(1)) ||
+ isa<BinaryOperator>(IfCond)))
+ return false;
+
// If we all PHI nodes are promotable, check to make sure that all
// instructions in the predecessor blocks can be promoted as well. If
// not, we won't be able to get rid of the control flow, so it's not
// worth promoting to select instructions.
- BasicBlock *DomBlock = 0, *IfBlock1 = 0, *IfBlock2 = 0;
- PN = cast<PHINode>(BB->begin());
- BasicBlock *Pred = PN->getIncomingBlock(0);
- if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) {
- IfBlock1 = Pred;
- DomBlock = *pred_begin(Pred);
- for (BasicBlock::iterator I = Pred->begin();
- !isa<TerminatorInst>(I); ++I)
+ BasicBlock *DomBlock = 0;
+ BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
+ BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
+ if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
+ IfBlock1 = 0;
+ } else {
+ DomBlock = *pred_begin(IfBlock1);
+ for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control
@@ -1221,12 +1251,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
}
}
- Pred = PN->getIncomingBlock(1);
- if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) {
- IfBlock2 = Pred;
- DomBlock = *pred_begin(Pred);
- for (BasicBlock::iterator I = Pred->begin();
- !isa<TerminatorInst>(I); ++I)
+ if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
+ IfBlock2 = 0;
+ } else {
+ DomBlock = *pred_begin(IfBlock2);
+ for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control
@@ -1234,56 +1263,45 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {
return false;
}
}
+
+ DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
+ << IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
// If we can still promote the PHI nodes after this gauntlet of tests,
// do all of the PHI's now.
-
+ Instruction *InsertPt = DomBlock->getTerminator();
+
// Move all 'aggressive' instructions, which are defined in the
// conditional parts of the if's up to the dominating block.
- if (IfBlock1) {
- DomBlock->getInstList().splice(DomBlock->getTerminator(),
- IfBlock1->getInstList(),
- IfBlock1->begin(),
+ if (IfBlock1)
+ DomBlock->getInstList().splice(InsertPt,
+ IfBlock1->getInstList(), IfBlock1->begin(),
IfBlock1->getTerminator());
- }
- if (IfBlock2) {
- DomBlock->getInstList().splice(DomBlock->getTerminator(),
- IfBlock2->getInstList(),
- IfBlock2->begin(),
+ if (IfBlock2)
+ DomBlock->getInstList().splice(InsertPt,
+ IfBlock2->getInstList(), IfBlock2->begin(),
IfBlock2->getTerminator());
- }
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
// Change the PHI node into a select instruction.
- Value *TrueVal =
- PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
- Value *FalseVal =
- PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+ Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+ Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
- Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", AfterPHIIt);
+ Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", InsertPt);
PN->replaceAllUsesWith(NV);
NV->takeName(PN);
-
- BB->getInstList().erase(PN);
+ PN->eraseFromParent();
}
+
+ // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
+ // has been flattened. Change DomBlock to jump directly to our new block to
+ // avoid other simplifycfg's kicking in on the diamond.
+ TerminatorInst *OldTI = DomBlock->getTerminator();
+ BranchInst::Create(BB, OldTI);
+ OldTI->eraseFromParent();
return true;
}
-/// isTerminatorFirstRelevantInsn - Return true if Term is very first
-/// instruction ignoring Phi nodes and dbg intrinsics.
-static bool isTerminatorFirstRelevantInsn(BasicBlock *BB, Instruction *Term) {
- BasicBlock::iterator BBI = Term;
- while (BBI != BB->begin()) {
- --BBI;
- if (!isa<DbgInfoIntrinsic>(BBI))
- break;
- }
-
- if (isa<PHINode>(BBI) || &*BBI == Term || isa<DbgInfoIntrinsic>(BBI))
- return true;
- return false;
-}
-
/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
/// to two returning blocks, try to merge them together into one return,
/// introducing a select if the return values disagree.
@@ -1297,9 +1315,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
// Check to ensure both blocks are empty (just a return) or optionally empty
// with PHI nodes. If there are other instructions, merging would cause extra
// computation on one path or the other.
- if (!isTerminatorFirstRelevantInsn(TrueSucc, TrueRet))
+ if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
return false;
- if (!isTerminatorFirstRelevantInsn(FalseSucc, FalseRet))
+ if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
return false;
// Okay, we found a branch that is going to two return nodes. If
@@ -1386,7 +1404,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// must be at the front of the block.
BasicBlock::iterator FrontIt = BB->front();
// Ignore dbg intrinsics.
- while(isa<DbgInfoIntrinsic>(FrontIt))
+ while (isa<DbgInfoIntrinsic>(FrontIt))
++FrontIt;
// Allow a single instruction to be hoisted in addition to the compare
@@ -1470,7 +1488,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
UsedValues.erase(Pair.first);
if (UsedValues.empty()) break;
- if (Instruction* I = dyn_cast<Instruction>(Pair.first)) {
+ if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
@@ -1498,9 +1516,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// If we need to invert the condition in the pred block to match, do so now.
if (InvertPredCond) {
- Value *NewCond =
- BinaryOperator::CreateNot(PBI->getCondition(),
+ Value *NewCond = PBI->getCondition();
+
+ if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+ CmpInst *CI = cast<CmpInst>(NewCond);
+ CI->setPredicate(CI->getInversePredicate());
+ } else {
+ NewCond = BinaryOperator::CreateNot(NewCond,
PBI->getCondition()->getName()+".not", PBI);
+ }
+
PBI->setCondition(NewCond);
BasicBlock *OldTrue = PBI->getSuccessor(0);
BasicBlock *OldFalse = PBI->getSuccessor(1);
@@ -1686,17 +1711,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// OtherDest may have phi nodes. If so, add an entry from PBI's
// block that are identical to the entries for BI's block.
- PHINode *PN;
- for (BasicBlock::iterator II = OtherDest->begin();
- (PN = dyn_cast<PHINode>(II)); ++II) {
- Value *V = PN->getIncomingValueForBlock(BB);
- PN->addIncoming(V, PBI->getParent());
- }
+ AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
// We know that the CommonDest already had an edge from PBI to
// it. If it has PHIs though, the PHIs may have different
// entries for BB and PBI's BB. If so, insert a select to make
// them agree.
+ PHINode *PN;
for (BasicBlock::iterator II = CommonDest->begin();
(PN = dyn_cast<PHINode>(II)); ++II) {
Value *BIV = PN->getIncomingValueForBlock(BB);
@@ -1718,481 +1739,789 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
return true;
}
-bool SimplifyCFGOpt::run(BasicBlock *BB) {
- bool Changed = false;
- Function *M = BB->getParent();
-
- assert(BB && BB->getParent() && "Block not embedded in function!");
- assert(BB->getTerminator() && "Degenerate basic block encountered!");
+// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a
+// branch to TrueBB if Cond is true or to FalseBB if Cond is false.
+// Takes care of updating the successors and removing the old terminator.
+// Also makes sure not to introduce new successors by assuming that edges to
+// non-successor TrueBBs and FalseBBs aren't reachable.
+static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+ BasicBlock *TrueBB, BasicBlock *FalseBB){
+ // Remove any superfluous successor edges from the CFG.
+ // First, figure out which successors to preserve.
+ // If TrueBB and FalseBB are equal, only try to preserve one copy of that
+ // successor.
+ BasicBlock *KeepEdge1 = TrueBB;
+ BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0;
+
+ // Then remove the rest.
+ for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
+ BasicBlock *Succ = OldTerm->getSuccessor(I);
+ // Make sure only to keep exactly one copy of each edge.
+ if (Succ == KeepEdge1)
+ KeepEdge1 = 0;
+ else if (Succ == KeepEdge2)
+ KeepEdge2 = 0;
+ else
+ Succ->removePredecessor(OldTerm->getParent());
+ }
- // Remove basic blocks that have no predecessors (except the entry block)...
- // or that just have themself as a predecessor. These are unreachable.
- if ((pred_begin(BB) == pred_end(BB) &&
- &BB->getParent()->getEntryBlock() != BB) ||
- BB->getSinglePredecessor() == BB) {
- DEBUG(dbgs() << "Removing BB: \n" << *BB);
- DeleteDeadBlock(BB);
- return true;
+ // Insert an appropriate new terminator.
+ if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) {
+ if (TrueBB == FalseBB)
+ // We were only looking for one successor, and it was present.
+ // Create an unconditional branch to it.
+ BranchInst::Create(TrueBB, OldTerm);
+ else
+ // We found both of the successors we were looking for.
+ // Create a conditional branch sharing the condition of the select.
+ BranchInst::Create(TrueBB, FalseBB, Cond, OldTerm);
+ } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
+ // Neither of the selected blocks were successors, so this
+ // terminator must be unreachable.
+ new UnreachableInst(OldTerm->getContext(), OldTerm);
+ } else {
+ // One of the selected values was a successor, but the other wasn't.
+ // Insert an unconditional branch to the one that was found;
+ // the edge to the one that wasn't must be unreachable.
+ if (KeepEdge1 == 0)
+ // Only TrueBB was found.
+ BranchInst::Create(TrueBB, OldTerm);
+ else
+ // Only FalseBB was found.
+ BranchInst::Create(FalseBB, OldTerm);
}
- // Check to see if we can constant propagate this terminator instruction
- // away...
- Changed |= ConstantFoldTerminator(BB);
+ EraseTerminatorInstAndDCECond(OldTerm);
+ return true;
+}
- // Check for and eliminate duplicate PHI nodes in this block.
- Changed |= EliminateDuplicatePHINodes(BB);
+// SimplifyIndirectBrOnSelect - Replaces
+// (indirectbr (select cond, blockaddress(@fn, BlockA),
+// blockaddress(@fn, BlockB)))
+// with
+// (br cond, BlockA, BlockB).
+static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
+ // Check that both operands of the select are block addresses.
+ BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
+ BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
+ if (!TBA || !FBA)
+ return false;
- // If there is a trivial two-entry PHI node in this basic block, and we can
- // eliminate it, do so now.
- if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
- if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN);
+ // Extract the actual blocks.
+ BasicBlock *TrueBB = TBA->getBasicBlock();
+ BasicBlock *FalseBB = FBA->getBasicBlock();
- // If this is a returning block with only PHI nodes in it, fold the return
- // instruction into any unconditional branch predecessors.
- //
- // If any predecessor is a conditional branch that just selects among
- // different return values, fold the replace the branch/return with a select
- // and return.
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- if (isTerminatorFirstRelevantInsn(BB, BB->getTerminator())) {
- // Find predecessors that end with branches.
- SmallVector<BasicBlock*, 8> UncondBranchPreds;
- SmallVector<BranchInst*, 8> CondBranchPreds;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
- TerminatorInst *PTI = P->getTerminator();
- if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
- if (BI->isUnconditional())
- UncondBranchPreds.push_back(P);
- else
- CondBranchPreds.push_back(BI);
- }
- }
+ // Perform the actual simplification.
+ return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB);
+}
- // If we found some, do the transformation!
- if (!UncondBranchPreds.empty()) {
- while (!UncondBranchPreds.empty()) {
- BasicBlock *Pred = UncondBranchPreds.pop_back_val();
- DEBUG(dbgs() << "FOLDING: " << *BB
- << "INTO UNCOND BRANCH PRED: " << *Pred);
- Instruction *UncondBranch = Pred->getTerminator();
- // Clone the return and add it to the end of the predecessor.
- Instruction *NewRet = RI->clone();
- Pred->getInstList().push_back(NewRet);
-
- // If the return instruction returns a value, and if the value was a
- // PHI node in "BB", propagate the right value into the return.
- for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
- i != e; ++i)
- if (PHINode *PN = dyn_cast<PHINode>(*i))
- if (PN->getParent() == BB)
- *i = PN->getIncomingValueForBlock(Pred);
-
- // Update any PHI nodes in the returning block to realize that we no
- // longer branch to them.
- BB->removePredecessor(Pred);
- Pred->getInstList().erase(UncondBranch);
- }
+/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
+/// instruction (a seteq/setne with a constant) as the only instruction in a
+/// block that ends with an uncond branch. We are looking for a very specific
+/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
+/// this case, we merge the first two "or's of icmp" into a switch, but then the
+/// default value goes to an uncond block with a seteq in it, we get something
+/// like:
+///
+/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
+/// DEFAULT:
+/// %tmp = icmp eq i8 %A, 92
+/// br label %end
+/// end:
+/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
+///
+/// We prefer to split the edge to 'end' so that there is a true/false entry to
+/// the PHI, merging the third icmp into the switch.
+static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
+ const TargetData *TD) {
+ BasicBlock *BB = ICI->getParent();
+ // If the block has any PHIs in it or the icmp has multiple uses, it is too
+ // complex.
+ if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false;
+
+ Value *V = ICI->getOperand(0);
+ ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
+
+ // The pattern we're looking for is where our only predecessor is a switch on
+ // 'V' and this block is the default case for the switch. In this case we can
+ // fold the compared value into the switch to simplify things.
+ BasicBlock *Pred = BB->getSinglePredecessor();
+ if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
+
+ SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
+ if (SI->getCondition() != V)
+ return false;
+
+ // If BB is reachable on a non-default case, then we simply know the value of
+ // V in this block. Substitute it and constant fold the icmp instruction
+ // away.
+ if (SI->getDefaultDest() != BB) {
+ ConstantInt *VVal = SI->findCaseDest(BB);
+ assert(VVal && "Should have a unique destination value");
+ ICI->setOperand(0, VVal);
+
+ if (Value *V = SimplifyInstruction(ICI, TD)) {
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ }
+ // BB is now empty, so it is likely to simplify away.
+ return SimplifyCFG(BB) | true;
+ }
+
+ // Ok, the block is reachable from the default dest. If the constant we're
+ // comparing exists in one of the other edges, then we can constant fold ICI
+ // and zap it.
+ if (SI->findCaseValue(Cst) != 0) {
+ Value *V;
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ V = ConstantInt::getFalse(BB->getContext());
+ else
+ V = ConstantInt::getTrue(BB->getContext());
+
+ ICI->replaceAllUsesWith(V);
+ ICI->eraseFromParent();
+ // BB is now empty, so it is likely to simplify away.
+ return SimplifyCFG(BB) | true;
+ }
+
+ // The use of the icmp has to be in the 'end' block, by the only PHI node in
+ // the block.
+ BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
+ PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back());
+ if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
+ isa<PHINode>(++BasicBlock::iterator(PHIUse)))
+ return false;
- // If we eliminated all predecessors of the block, delete the block now.
- if (pred_begin(BB) == pred_end(BB))
- // We know there are no successors, so just nuke the block.
- M->getBasicBlockList().erase(BB);
+ // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
+ // true in the PHI.
+ Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
+ Constant *NewCst = ConstantInt::getFalse(BB->getContext());
- return true;
- }
+ if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(DefaultCst, NewCst);
- // Check out all of the conditional branches going to this return
- // instruction. If any of them just select between returns, change the
- // branch itself into a select/return pair.
- while (!CondBranchPreds.empty()) {
- BranchInst *BI = CondBranchPreds.pop_back_val();
-
- // Check to see if the non-BB successor is also a return block.
- if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
- isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
- SimplifyCondBranchToTwoReturns(BI))
- return true;
- }
- }
- } else if (isa<UnwindInst>(BB->begin())) {
- // Check to see if the first instruction in this block is just an unwind.
- // If so, replace any invoke instructions which use this as an exception
- // destination with call instructions.
- //
- SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
- while (!Preds.empty()) {
- BasicBlock *Pred = Preds.back();
- if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
- if (II->getUnwindDest() == BB) {
- // Insert a new branch instruction before the invoke, because this
- // is now a fall through.
- BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
- Pred->getInstList().remove(II); // Take out of symbol table
-
- // Insert the call now.
- SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
- CallInst *CI = CallInst::Create(II->getCalledValue(),
- Args.begin(), Args.end(),
- II->getName(), BI);
- CI->setCallingConv(II->getCallingConv());
- CI->setAttributes(II->getAttributes());
- // If the invoke produced a value, the Call now does instead.
- II->replaceAllUsesWith(CI);
- delete II;
- Changed = true;
- }
+ // Replace ICI (which is used by the PHI for the default value) with true or
+ // false depending on if it is EQ or NE.
+ ICI->replaceAllUsesWith(DefaultCst);
+ ICI->eraseFromParent();
- Preds.pop_back();
- }
+ // Okay, the switch goes to this block on a default value. Add an edge from
+ // the switch to the merge point on the compared value.
+ BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
+ BB->getParent(), BB);
+ SI->addCase(Cst, NewBB);
+
+ // NewBB branches to the phi block, add the uncond branch and the phi entry.
+ BranchInst::Create(SuccBlock, NewBB);
+ PHIUse->addIncoming(NewCst, NewBB);
+ return true;
+}
- // If this block is now dead, remove it.
- if (pred_begin(BB) == pred_end(BB)) {
- // We know there are no successors, so just nuke the block.
- M->getBasicBlockList().erase(BB);
- return true;
- }
+/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
+/// Check to see if it is branching on an or/and chain of icmp instructions, and
+/// fold it into a switch instruction if so.
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD) {
+ Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+ if (Cond == 0) return false;
+
+
+ // Change br (X == 0 | X == 1), T, F into a switch instruction.
+ // If this is a bunch of seteq's or'd together, or if it's a bunch of
+ // 'setne's and'ed together, collect them.
+ Value *CompVal = 0;
+ std::vector<ConstantInt*> Values;
+ bool TrueWhenEqual = true;
+ Value *ExtraCase = 0;
+ unsigned UsedICmps = 0;
+
+ if (Cond->getOpcode() == Instruction::Or) {
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+ UsedICmps);
+ } else if (Cond->getOpcode() == Instruction::And) {
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+ UsedICmps);
+ TrueWhenEqual = false;
+ }
+
+ // If we didn't have a multiply compared value, fail.
+ if (CompVal == 0) return false;
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- if (isValueEqualityComparison(SI)) {
- // If we only have one predecessor, and if it is a branch on this value,
- // see if that predecessor totally determines the outcome of this switch.
- if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
- if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred))
- return SimplifyCFG(BB) || 1;
-
- // If the block only contains the switch, see if we can fold the block
- // away into any preds.
- BasicBlock::iterator BBI = BB->begin();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
- if (SI == &*BBI)
- if (FoldValueComparisonIntoPredecessors(SI))
- return SimplifyCFG(BB) || 1;
- }
- } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- if (BI->isUnconditional()) {
- BasicBlock::iterator BBI = BB->getFirstNonPHI();
+ // Avoid turning single icmps into a switch.
+ if (UsedICmps <= 1)
+ return false;
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(BBI))
- ++BBI;
- if (BBI->isTerminator()) // Terminator is the only non-phi instruction!
- if (BB != &BB->getParent()->getEntryBlock())
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
- return true;
+ // There might be duplicate constants in the list, which the switch
+ // instruction can't handle, remove them now.
+ array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
+ Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+
+ // If Extra was used, we require at least two switch values to do the
+ // transformation. A switch with one value is just an cond branch.
+ if (ExtraCase && Values.size() < 2) return false;
+
+ // Figure out which block is which destination.
+ BasicBlock *DefaultBB = BI->getSuccessor(1);
+ BasicBlock *EdgeBB = BI->getSuccessor(0);
+ if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+
+ BasicBlock *BB = BI->getParent();
+
+ DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
+ << " cases into SWITCH. BB is:\n" << *BB);
+
+ // If there are any extra values that couldn't be folded into the switch
+ // then we evaluate them with an explicit branch first. Split the block
+ // right before the condbr to handle it.
+ if (ExtraCase) {
+ BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
+ // Remove the uncond branch added to the old block.
+ TerminatorInst *OldTI = BB->getTerminator();
+
+ if (TrueWhenEqual)
+ BranchInst::Create(EdgeBB, NewBB, ExtraCase, OldTI);
+ else
+ BranchInst::Create(NewBB, EdgeBB, ExtraCase, OldTI);
- } else { // Conditional branch
- if (isValueEqualityComparison(BI)) {
- // If we only have one predecessor, and if it is a branch on this value,
- // see if that predecessor totally determines the outcome of this
- // switch.
- if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
- if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred))
- return SimplifyCFG(BB) | true;
-
- // This block must be empty, except for the setcond inst, if it exists.
- // Ignore dbg intrinsics.
- BasicBlock::iterator I = BB->begin();
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(I))
- ++I;
- if (&*I == BI) {
- if (FoldValueComparisonIntoPredecessors(BI))
- return SimplifyCFG(BB) | true;
- } else if (&*I == cast<Instruction>(BI->getCondition())){
- ++I;
- // Ignore dbg intrinsics.
- while (isa<DbgInfoIntrinsic>(I))
- ++I;
- if(&*I == BI) {
- if (FoldValueComparisonIntoPredecessors(BI))
- return SimplifyCFG(BB) | true;
- }
- }
- }
+ OldTI->eraseFromParent();
+
+ // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
+ // for the edge we just added.
+ AddPredecessorToBlock(EdgeBB, BB, NewBB);
+
+ DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
+ << "\nEXTRABB = " << *BB);
+ BB = NewBB;
+ }
+
+ // Convert pointer to int before we switch.
+ if (CompVal->getType()->isPointerTy()) {
+ assert(TD && "Cannot switch on pointer without TargetData");
+ CompVal = new PtrToIntInst(CompVal,
+ TD->getIntPtrType(CompVal->getContext()),
+ "magicptr", BI);
+ }
+
+ // Create the new switch instruction now.
+ SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB, Values.size(), BI);
+
+ // Add all of the 'cases' to the switch instruction.
+ for (unsigned i = 0, e = Values.size(); i != e; ++i)
+ New->addCase(Values[i], EdgeBB);
+
+ // We added edges from PI to the EdgeBB. As such, if there were any
+ // PHI nodes in EdgeBB, they need entries to be added corresponding to
+ // the number of edges added.
+ for (BasicBlock::iterator BBI = EdgeBB->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ PHINode *PN = cast<PHINode>(BBI);
+ Value *InVal = PN->getIncomingValueForBlock(BB);
+ for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+ PN->addIncoming(InVal, BB);
+ }
+
+ // Erase the old branch instruction.
+ EraseTerminatorInstAndDCECond(BI);
+
+ DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
+ return true;
+}
- // If this is a branch on a phi node in the current block, thread control
- // through this block if any PHI node entries are constants.
- if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
- if (PN->getParent() == BI->getParent())
- if (FoldCondBranchOnPHI(BI))
- return SimplifyCFG(BB) | true;
-
- // If this basic block is ONLY a setcc and a branch, and if a predecessor
- // branches to us and one of our successors, fold the setcc into the
- // predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI))
- return SimplifyCFG(BB) | true;
+bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI) {
+ BasicBlock *BB = RI->getParent();
+ if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+
+ // Find predecessors that end with branches.
+ SmallVector<BasicBlock*, 8> UncondBranchPreds;
+ SmallVector<BranchInst*, 8> CondBranchPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ TerminatorInst *PTI = P->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
+ if (BI->isUnconditional())
+ UncondBranchPreds.push_back(P);
+ else
+ CondBranchPreds.push_back(BI);
+ }
+ }
+
+ // If we found some, do the transformation!
+ if (!UncondBranchPreds.empty() && DupRet) {
+ while (!UncondBranchPreds.empty()) {
+ BasicBlock *Pred = UncondBranchPreds.pop_back_val();
+ DEBUG(dbgs() << "FOLDING: " << *BB
+ << "INTO UNCOND BRANCH PRED: " << *Pred);
+ (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (pred_begin(BB) == pred_end(BB))
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+
+ return true;
+ }
+
+ // Check out all of the conditional branches going to this return
+ // instruction. If any of them just select between returns, change the
+ // branch itself into a select/return pair.
+ while (!CondBranchPreds.empty()) {
+ BranchInst *BI = CondBranchPreds.pop_back_val();
+
+ // Check to see if the non-BB successor is also a return block.
+ if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
+ isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
+ SimplifyCondBranchToTwoReturns(BI))
+ return true;
+ }
+ return false;
+}
+bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI) {
+ // Check to see if the first instruction in this block is just an unwind.
+ // If so, replace any invoke instructions which use this as an exception
+ // destination with call instructions.
+ BasicBlock *BB = UI->getParent();
+ if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
- // Scan predecessor blocks for conditional branches.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
- if (PBI != BI && PBI->isConditional())
- if (SimplifyCondBranchToCondBranch(PBI, BI))
- return SimplifyCFG(BB) | true;
- }
- } else if (isa<UnreachableInst>(BB->getTerminator())) {
- // If there are any instructions immediately before the unreachable that can
- // be removed, do so.
- Instruction *Unreachable = BB->getTerminator();
- while (Unreachable != BB->begin()) {
- BasicBlock::iterator BBI = Unreachable;
- --BBI;
- // Do not delete instructions that can have side effects, like calls
- // (which may never return) and volatile loads and stores.
- if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
-
- if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
- if (SI->isVolatile())
- break;
-
- if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
- if (LI->isVolatile())
- break;
-
- // Delete this instruction
- BB->getInstList().erase(BBI);
+ bool Changed = false;
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ while (!Preds.empty()) {
+ BasicBlock *Pred = Preds.back();
+ InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator());
+ if (II && II->getUnwindDest() == BB) {
+ // Insert a new branch instruction before the invoke, because this
+ // is now a fall through.
+ BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
+ Pred->getInstList().remove(II); // Take out of symbol table
+
+ // Insert the call now.
+ SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
+ CallInst *CI = CallInst::Create(II->getCalledValue(),
+ Args.begin(), Args.end(),
+ II->getName(), BI);
+ CI->setCallingConv(II->getCallingConv());
+ CI->setAttributes(II->getAttributes());
+ // If the invoke produced a value, the Call now does instead.
+ II->replaceAllUsesWith(CI);
+ delete II;
Changed = true;
}
+
+ Preds.pop_back();
+ }
+
+ // If this block is now dead (and isn't the entry block), remove it.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+ return true;
+ }
+
+ return Changed;
+}
- // If the unreachable instruction is the first in the block, take a gander
- // at all of the predecessors of this instruction, and simplify them.
- if (&BB->front() == Unreachable) {
- SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- TerminatorInst *TI = Preds[i]->getTerminator();
-
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- if (BI->isUnconditional()) {
- if (BI->getSuccessor(0) == BB) {
- new UnreachableInst(TI->getContext(), TI);
- TI->eraseFromParent();
- Changed = true;
- }
- } else {
- if (BI->getSuccessor(0) == BB) {
- BranchInst::Create(BI->getSuccessor(1), BI);
- EraseTerminatorInstAndDCECond(BI);
- } else if (BI->getSuccessor(1) == BB) {
- BranchInst::Create(BI->getSuccessor(0), BI);
- EraseTerminatorInstAndDCECond(BI);
- Changed = true;
- }
+bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
+ BasicBlock *BB = UI->getParent();
+
+ bool Changed = false;
+
+ // If there are any instructions immediately before the unreachable that can
+ // be removed, do so.
+ while (UI != BB->begin()) {
+ BasicBlock::iterator BBI = UI;
+ --BBI;
+ // Do not delete instructions that can have side effects, like calls
+ // (which may never return) and volatile loads and stores.
+ if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ if (SI->isVolatile())
+ break;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+ if (LI->isVolatile())
+ break;
+
+ // Delete this instruction
+ BBI->eraseFromParent();
+ Changed = true;
+ }
+
+ // If the unreachable instruction is the first in the block, take a gander
+ // at all of the predecessors of this instruction, and simplify them.
+ if (&BB->front() != UI) return Changed;
+
+ SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ TerminatorInst *TI = Preds[i]->getTerminator();
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isUnconditional()) {
+ if (BI->getSuccessor(0) == BB) {
+ new UnreachableInst(TI->getContext(), TI);
+ TI->eraseFromParent();
+ Changed = true;
+ }
+ } else {
+ if (BI->getSuccessor(0) == BB) {
+ BranchInst::Create(BI->getSuccessor(1), BI);
+ EraseTerminatorInstAndDCECond(BI);
+ } else if (BI->getSuccessor(1) == BB) {
+ BranchInst::Create(BI->getSuccessor(0), BI);
+ EraseTerminatorInstAndDCECond(BI);
+ Changed = true;
+ }
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ if (SI->getSuccessor(i) == BB) {
+ BB->removePredecessor(SI->getParent());
+ SI->removeCase(i);
+ --i; --e;
+ Changed = true;
+ }
+ // If the default value is unreachable, figure out the most popular
+ // destination and make it the default.
+ if (SI->getSuccessor(0) == BB) {
+ std::map<BasicBlock*, unsigned> Popularity;
+ for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+ Popularity[SI->getSuccessor(i)]++;
+
+ // Find the most popular block.
+ unsigned MaxPop = 0;
+ BasicBlock *MaxBlock = 0;
+ for (std::map<BasicBlock*, unsigned>::iterator
+ I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
+ if (I->second > MaxPop) {
+ MaxPop = I->second;
+ MaxBlock = I->first;
}
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ }
+ if (MaxBlock) {
+ // Make this the new default, allowing us to delete any explicit
+ // edges to it.
+ SI->setSuccessor(0, MaxBlock);
+ Changed = true;
+
+ // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
+ // it.
+ if (isa<PHINode>(MaxBlock->begin()))
+ for (unsigned i = 0; i != MaxPop-1; ++i)
+ MaxBlock->removePredecessor(SI->getParent());
+
for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
- if (SI->getSuccessor(i) == BB) {
- BB->removePredecessor(SI->getParent());
+ if (SI->getSuccessor(i) == MaxBlock) {
SI->removeCase(i);
--i; --e;
- Changed = true;
- }
- // If the default value is unreachable, figure out the most popular
- // destination and make it the default.
- if (SI->getSuccessor(0) == BB) {
- std::map<BasicBlock*, unsigned> Popularity;
- for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
- Popularity[SI->getSuccessor(i)]++;
-
- // Find the most popular block.
- unsigned MaxPop = 0;
- BasicBlock *MaxBlock = 0;
- for (std::map<BasicBlock*, unsigned>::iterator
- I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
- if (I->second > MaxPop) {
- MaxPop = I->second;
- MaxBlock = I->first;
- }
- }
- if (MaxBlock) {
- // Make this the new default, allowing us to delete any explicit
- // edges to it.
- SI->setSuccessor(0, MaxBlock);
- Changed = true;
-
- // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
- // it.
- if (isa<PHINode>(MaxBlock->begin()))
- for (unsigned i = 0; i != MaxPop-1; ++i)
- MaxBlock->removePredecessor(SI->getParent());
-
- for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
- if (SI->getSuccessor(i) == MaxBlock) {
- SI->removeCase(i);
- --i; --e;
- }
}
- }
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
- if (II->getUnwindDest() == BB) {
- // Convert the invoke to a call instruction. This would be a good
- // place to note that the call does not throw though.
- BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
- II->removeFromParent(); // Take out of symbol table
-
- // Insert the call now...
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
- CallInst *CI = CallInst::Create(II->getCalledValue(),
- Args.begin(), Args.end(),
- II->getName(), BI);
- CI->setCallingConv(II->getCallingConv());
- CI->setAttributes(II->getAttributes());
- // If the invoke produced a value, the call does now instead.
- II->replaceAllUsesWith(CI);
- delete II;
- Changed = true;
- }
}
}
-
- // If this block is now dead, remove it.
- if (pred_begin(BB) == pred_end(BB) &&
- BB != &BB->getParent()->getEntryBlock()) {
- // We know there are no successors, so just nuke the block.
- M->getBasicBlockList().erase(BB);
- return true;
- }
- }
- } else if (IndirectBrInst *IBI =
- dyn_cast<IndirectBrInst>(BB->getTerminator())) {
- // Eliminate redundant destinations.
- SmallPtrSet<Value *, 8> Succs;
- for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
- BasicBlock *Dest = IBI->getDestination(i);
- if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
- Dest->removePredecessor(BB);
- IBI->removeDestination(i);
- --i; --e;
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
+ if (II->getUnwindDest() == BB) {
+ // Convert the invoke to a call instruction. This would be a good
+ // place to note that the call does not throw though.
+ BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
+ II->removeFromParent(); // Take out of symbol table
+
+ // Insert the call now...
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
+ CallInst *CI = CallInst::Create(II->getCalledValue(),
+ Args.begin(), Args.end(),
+ II->getName(), BI);
+ CI->setCallingConv(II->getCallingConv());
+ CI->setAttributes(II->getAttributes());
+ // If the invoke produced a value, the call does now instead.
+ II->replaceAllUsesWith(CI);
+ delete II;
Changed = true;
}
- }
+ }
+ }
+
+ // If this block is now dead, remove it.
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ // We know there are no successors, so just nuke the block.
+ BB->eraseFromParent();
+ return true;
+ }
- if (IBI->getNumDestinations() == 0) {
- // If the indirectbr has no successors, change it to unreachable.
- new UnreachableInst(IBI->getContext(), IBI);
- IBI->eraseFromParent();
- Changed = true;
- } else if (IBI->getNumDestinations() == 1) {
- // If the indirectbr has one successor, change it to a direct branch.
- BranchInst::Create(IBI->getDestination(0), IBI);
- IBI->eraseFromParent();
+ return Changed;
+}
+
+/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
+/// integer range comparison into a sub, an icmp and a branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI) {
+ assert(SI->getNumCases() > 2 && "Degenerate switch?");
+
+ // Make sure all cases point to the same destination and gather the values.
+ SmallVector<ConstantInt *, 16> Cases;
+ Cases.push_back(SI->getCaseValue(1));
+ for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) {
+ if (SI->getSuccessor(I-1) != SI->getSuccessor(I))
+ return false;
+ Cases.push_back(SI->getCaseValue(I));
+ }
+ assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered");
+
+ // Sort the case values, then check if they form a range we can transform.
+ array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+ for (unsigned I = 1, E = Cases.size(); I != E; ++I) {
+ if (Cases[I-1]->getValue() != Cases[I]->getValue()+1)
+ return false;
+ }
+
+ Constant *Offset = ConstantExpr::getNeg(Cases.back());
+ Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1);
+
+ Value *Sub = SI->getCondition();
+ if (!Offset->isNullValue())
+ Sub = BinaryOperator::CreateAdd(Sub, Offset, Sub->getName()+".off", SI);
+ Value *Cmp = new ICmpInst(SI, ICmpInst::ICMP_ULT, Sub, NumCases, "switch");
+ BranchInst::Create(SI->getSuccessor(1), SI->getDefaultDest(), Cmp, SI);
+
+ // Prune obsolete incoming values off the successor's PHI nodes.
+ for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin();
+ isa<PHINode>(BBI); ++BBI) {
+ for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I)
+ cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ }
+ SI->eraseFromParent();
+
+ return true;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) {
+ // If this switch is too complex to want to look at, ignore it.
+ if (!isValueEqualityComparison(SI))
+ return false;
+
+ BasicBlock *BB = SI->getParent();
+
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred))
+ return SimplifyCFG(BB) | true;
+
+ // If the block only contains the switch, see if we can fold the block
+ // away into any preds.
+ BasicBlock::iterator BBI = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ if (SI == &*BBI)
+ if (FoldValueComparisonIntoPredecessors(SI))
+ return SimplifyCFG(BB) | true;
+
+ // Try to transform the switch into an icmp and a branch.
+ if (TurnSwitchRangeIntoICmp(SI))
+ return SimplifyCFG(BB) | true;
+
+ return false;
+}
+
+bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
+ BasicBlock *BB = IBI->getParent();
+ bool Changed = false;
+
+ // Eliminate redundant destinations.
+ SmallPtrSet<Value *, 8> Succs;
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ BasicBlock *Dest = IBI->getDestination(i);
+ if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+ Dest->removePredecessor(BB);
+ IBI->removeDestination(i);
+ --i; --e;
Changed = true;
}
+ }
+
+ if (IBI->getNumDestinations() == 0) {
+ // If the indirectbr has no successors, change it to unreachable.
+ new UnreachableInst(IBI->getContext(), IBI);
+ EraseTerminatorInstAndDCECond(IBI);
+ return true;
+ }
+
+ if (IBI->getNumDestinations() == 1) {
+ // If the indirectbr has one successor, change it to a direct branch.
+ BranchInst::Create(IBI->getDestination(0), IBI);
+ EraseTerminatorInstAndDCECond(IBI);
+ return true;
}
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
+ if (SimplifyIndirectBrOnSelect(IBI, SI))
+ return SimplifyCFG(BB) | true;
+ }
+ return Changed;
+}
- // Merge basic blocks into their predecessor if there is only one distinct
- // pred, and if there is only one distinct successor of the predecessor, and
- // if there are no PHI nodes.
- //
- if (MergeBlockIntoPredecessor(BB))
+bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI) {
+ BasicBlock *BB = BI->getParent();
+
+ // If the Terminator is the only non-phi instruction, simplify the block.
+ BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
+ if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
+ TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
+
+ // If the only instruction in the block is a seteq/setne comparison
+ // against a constant, try to simplify the block.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+ if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+ ;
+ if (I->isTerminator() && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD))
+ return true;
+ }
+
+ return false;
+}
- // Otherwise, if this block only has a single predecessor, and if that block
- // is a conditional branch, see if we can hoist any code from this block up
- // into our predecessor.
- pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
- BasicBlock *OnlyPred = 0;
- for (; PI != PE; ++PI) { // Search all predecessors, see if they are all same
- if (!OnlyPred)
- OnlyPred = *PI;
- else if (*PI != OnlyPred) {
- OnlyPred = 0; // There are multiple different predecessors...
- break;
+
+bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI) {
+ BasicBlock *BB = BI->getParent();
+
+ // Conditional branch
+ if (isValueEqualityComparison(BI)) {
+ // If we only have one predecessor, and if it is a branch on this value,
+ // see if that predecessor totally determines the outcome of this
+ // switch.
+ if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+ if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred))
+ return SimplifyCFG(BB) | true;
+
+ // This block must be empty, except for the setcond inst, if it exists.
+ // Ignore dbg intrinsics.
+ BasicBlock::iterator I = BB->begin();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI) {
+ if (FoldValueComparisonIntoPredecessors(BI))
+ return SimplifyCFG(BB) | true;
+ } else if (&*I == cast<Instruction>(BI->getCondition())){
+ ++I;
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(I))
+ ++I;
+ if (&*I == BI && FoldValueComparisonIntoPredecessors(BI))
+ return SimplifyCFG(BB) | true;
}
}
- if (OnlyPred)
- if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator()))
- if (BI->isConditional()) {
- // Get the other block.
- BasicBlock *OtherBB = BI->getSuccessor(BI->getSuccessor(0) == BB);
- PI = pred_begin(OtherBB);
- ++PI;
-
- if (PI == pred_end(OtherBB)) {
- // We have a conditional branch to two blocks that are only reachable
- // from the condbr. We know that the condbr dominates the two blocks,
- // so see if there is any identical code in the "then" and "else"
- // blocks. If so, we can hoist it up to the branching block.
- Changed |= HoistThenElseCodeToIf(BI);
- } else {
- BasicBlock* OnlySucc = NULL;
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
- SI != SE; ++SI) {
- if (!OnlySucc)
- OnlySucc = *SI;
- else if (*SI != OnlySucc) {
- OnlySucc = 0; // There are multiple distinct successors!
- break;
- }
- }
+ // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
+ if (SimplifyBranchOnICmpChain(BI, TD))
+ return true;
+
+ // We have a conditional branch to two blocks that are only reachable
+ // from BI. We know that the condbr dominates the two blocks, so see if
+ // there is any identical code in the "then" and "else" blocks. If so, we
+ // can hoist it up to the branching block.
+ if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
+ if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ if (HoistThenElseCodeToIf(BI))
+ return SimplifyCFG(BB) | true;
+ } else {
+ // If Successor #1 has multiple preds, we may be able to conditionally
+ // execute Successor #0 if it branches to successor #1.
+ TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+ if (Succ0TI->getNumSuccessors() == 1 &&
+ Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
+ return SimplifyCFG(BB) | true;
+ }
+ } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ // If Successor #0 has multiple preds, we may be able to conditionally
+ // execute Successor #1 if it branches to successor #0.
+ TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+ if (Succ1TI->getNumSuccessors() == 1 &&
+ Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
+ return SimplifyCFG(BB) | true;
+ }
+
+ // If this is a branch on a phi node in the current block, thread control
+ // through this block if any PHI node entries are constants.
+ if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+ if (PN->getParent() == BI->getParent())
+ if (FoldCondBranchOnPHI(BI, TD))
+ return SimplifyCFG(BB) | true;
+
+ // If this basic block is ONLY a setcc and a branch, and if a predecessor
+ // branches to us and one of our successors, fold the setcc into the
+ // predecessor and use logical operations to pick the right destination.
+ if (FoldBranchToCommonDest(BI))
+ return SimplifyCFG(BB) | true;
+
+ // Scan predecessor blocks for conditional branches.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI != BI && PBI->isConditional())
+ if (SimplifyCondBranchToCondBranch(PBI, BI))
+ return SimplifyCFG(BB) | true;
- if (OnlySucc == OtherBB) {
- // If BB's only successor is the other successor of the predecessor,
- // i.e. a triangle, see if we can hoist any code from this block up
- // to the "if" block.
- Changed |= SpeculativelyExecuteBB(BI, BB);
- }
- }
- }
+ return false;
+}
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- if (BranchInst *BI = dyn_cast<BranchInst>((*PI)->getTerminator()))
- // Change br (X == 0 | X == 1), T, F into a switch instruction.
- if (BI->isConditional() && isa<Instruction>(BI->getCondition())) {
- Instruction *Cond = cast<Instruction>(BI->getCondition());
- // If this is a bunch of seteq's or'd together, or if it's a bunch of
- // 'setne's and'ed together, collect them.
- Value *CompVal = 0;
- std::vector<ConstantInt*> Values;
- bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values);
- if (CompVal) {
- // There might be duplicate constants in the list, which the switch
- // instruction can't handle, remove them now.
- std::sort(Values.begin(), Values.end(), ConstantIntOrdering());
- Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
-
- // Figure out which block is which destination.
- BasicBlock *DefaultBB = BI->getSuccessor(1);
- BasicBlock *EdgeBB = BI->getSuccessor(0);
- if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
-
- // Convert pointer to int before we switch.
- if (CompVal->getType()->isPointerTy()) {
- assert(TD && "Cannot switch on pointer without TargetData");
- CompVal = new PtrToIntInst(CompVal,
- TD->getIntPtrType(CompVal->getContext()),
- "magicptr", BI);
- }
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+ bool Changed = false;
- // Create the new switch instruction now.
- SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB,
- Values.size(), BI);
-
- // Add all of the 'cases' to the switch instruction.
- for (unsigned i = 0, e = Values.size(); i != e; ++i)
- New->addCase(Values[i], EdgeBB);
-
- // We added edges from PI to the EdgeBB. As such, if there were any
- // PHI nodes in EdgeBB, they need entries to be added corresponding to
- // the number of edges added.
- for (BasicBlock::iterator BBI = EdgeBB->begin();
- isa<PHINode>(BBI); ++BBI) {
- PHINode *PN = cast<PHINode>(BBI);
- Value *InVal = PN->getIncomingValueForBlock(*PI);
- for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
- PN->addIncoming(InVal, *PI);
- }
+ assert(BB && BB->getParent() && "Block not embedded in function!");
+ assert(BB->getTerminator() && "Degenerate basic block encountered!");
- // Erase the old branch instruction.
- EraseTerminatorInstAndDCECond(BI);
- return true;
- }
- }
+ // Remove basic blocks that have no predecessors (except the entry block)...
+ // or that just have themself as a predecessor. These are unreachable.
+ if ((pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) ||
+ BB->getSinglePredecessor() == BB) {
+ DEBUG(dbgs() << "Removing BB: \n" << *BB);
+ DeleteDeadBlock(BB);
+ return true;
+ }
+
+ // Check to see if we can constant propagate this terminator instruction
+ // away...
+ Changed |= ConstantFoldTerminator(BB);
+
+ // Check for and eliminate duplicate PHI nodes in this block.
+ Changed |= EliminateDuplicatePHINodes(BB);
+
+ // Merge basic blocks into their predecessor if there is only one distinct
+ // pred, and if there is only one distinct successor of the predecessor, and
+ // if there are no PHI nodes.
+ //
+ if (MergeBlockIntoPredecessor(BB))
+ return true;
+
+ // If there is a trivial two-entry PHI node in this basic block, and we can
+ // eliminate it, do so now.
+ if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+ if (PN->getNumIncomingValues() == 2)
+ Changed |= FoldTwoEntryPHINode(PN, TD);
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ if (BI->isUnconditional()) {
+ if (SimplifyUncondBranch(BI)) return true;
+ } else {
+ if (SimplifyCondBranch(BI)) return true;
+ }
+ } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ if (SimplifyReturn(RI)) return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ if (SimplifySwitch(SI)) return true;
+ } else if (UnreachableInst *UI =
+ dyn_cast<UnreachableInst>(BB->getTerminator())) {
+ if (SimplifyUnreachable(UI)) return true;
+ } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+ if (SimplifyUnwind(UI)) return true;
+ } else if (IndirectBrInst *IBI =
+ dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+ if (SimplifyIndirectBr(IBI)) return true;
+ }
return Changed;
}
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
new file mode 100644
index 0000000..ac005f9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -0,0 +1,94 @@
+//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification. If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions removed");
+
+namespace {
+ struct InstSimplifier : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ InstSimplifier() : FunctionPass(ID) {
+ initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+
+ /// runOnFunction - Remove instructions that simplify.
+ bool runOnFunction(Function &F) {
+ const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+ bool Changed = false;
+
+ do {
+ for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+ DE = df_end(&F.getEntryBlock()); DI != DE; ++DI)
+ for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) {
+ Instruction *I = BI++;
+ // The first time through the loop ToSimplify is empty and we try to
+ // simplify all instructions. On later iterations ToSimplify is not
+ // empty and we only bother simplifying instructions that are in it.
+ if (!ToSimplify->empty() && !ToSimplify->count(I))
+ continue;
+ // Don't waste time simplifying unused instructions.
+ if (!I->use_empty())
+ if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ // Mark all uses for resimplification next time round the loop.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE; ++UI)
+ Next->insert(cast<Instruction>(*UI));
+ I->replaceAllUsesWith(V);
+ ++NumSimplified;
+ Changed = true;
+ }
+ Changed |= RecursivelyDeleteTriviallyDeadInstructions(I);
+ }
+
+ // Place the list of instructions to simplify on the next loop iteration
+ // into ToSimplify.
+ std::swap(ToSimplify, Next);
+ Next->clear();
+ } while (!ToSimplify->empty());
+
+ return Changed;
+ }
+ };
+}
+
+char InstSimplifier::ID = 0;
+INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
+ false, false)
+char &llvm::InstructionSimplifierID = InstSimplifier::ID;
+
+// Public interface to the simplify instructions pass.
+FunctionPass *llvm::createInstructionSimplifierPass() {
+ return new InstSimplifier();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index a51f1e1..ccb8287 100644
--- a/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
char UnifyFunctionExitNodes::ID = 0;
INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
- "Unify function exit nodes", false, false);
+ "Unify function exit nodes", false, false)
Pass *llvm::createUnifyFunctionExitNodesPass() {
return new UnifyFunctionExitNodes();
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
new file mode 100644
index 0000000..24e8c8f
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -0,0 +1,37 @@
+//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// TransformUtils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeTransformUtils - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeTransformUtils(PassRegistry &Registry) {
+ initializeBreakCriticalEdgesPass(Registry);
+ initializeInstNamerPass(Registry);
+ initializeLCSSAPass(Registry);
+ initializeLoopSimplifyPass(Registry);
+ initializeLowerInvokePass(Registry);
+ initializeLowerSwitchPass(Registry);
+ initializePromotePassPass(Registry);
+ initializeUnifyFunctionExitNodesPass(Registry);
+ initializeInstSimplifierPass(Registry);
+}
+
+/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
+ initializeTransformUtils(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index fc4bde7..f5481d3 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -21,147 +21,111 @@
using namespace llvm;
Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
- bool ModuleLevelChanges) {
- Value *&VMSlot = VM[V];
- if (VMSlot) return VMSlot; // Does it exist in the map yet?
+ RemapFlags Flags) {
+ ValueToValueMapTy::iterator I = VM.find(V);
+
+ // If the value already exists in the map, use it.
+ if (I != VM.end() && I->second) return I->second;
- // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
- // DenseMap. This includes any recursive calls to MapValue.
-
// Global values do not need to be seeded into the VM if they
// are using the identity mapping.
- if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) ||
- (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal() &&
- !ModuleLevelChanges))
- return VMSlot = const_cast<Value*>(V);
+ if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V))
+ return VM[V] = const_cast<Value*>(V);
if (const MDNode *MD = dyn_cast<MDNode>(V)) {
- // Start by assuming that we'll use the identity mapping.
- VMSlot = const_cast<Value*>(V);
-
+ // If this is a module-level metadata and we know that nothing at the module
+ // level is changing, then use an identity mapping.
+ if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges))
+ return VM[V] = const_cast<Value*>(V);
+
+ // Create a dummy node in case we have a metadata cycle.
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
+ VM[V] = Dummy;
+
// Check all operands to see if any need to be remapped.
for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
Value *OP = MD->getOperand(i);
- if (!OP || MapValue(OP, VM, ModuleLevelChanges) == OP) continue;
+ if (OP == 0 || MapValue(OP, VM, Flags) == OP) continue;
- // Ok, at least one operand needs remapping.
- MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
- VM[V] = Dummy;
+ // Ok, at least one operand needs remapping.
SmallVector<Value*, 4> Elts;
Elts.reserve(MD->getNumOperands());
- for (i = 0; i != e; ++i)
- Elts.push_back(MD->getOperand(i) ?
- MapValue(MD->getOperand(i), VM, ModuleLevelChanges) : 0);
+ for (i = 0; i != e; ++i) {
+ Value *Op = MD->getOperand(i);
+ Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0);
+ }
MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size());
Dummy->replaceAllUsesWith(NewMD);
+ VM[V] = NewMD;
MDNode::deleteTemporary(Dummy);
- return VM[V] = NewMD;
+ return NewMD;
}
- // No operands needed remapping; keep the identity map.
+ VM[V] = const_cast<Value*>(V);
+ MDNode::deleteTemporary(Dummy);
+
+ // No operands needed remapping. Use an identity mapping.
return const_cast<Value*>(V);
}
+ // Okay, this either must be a constant (which may or may not be mappable) or
+ // is something that is not in the mapping table.
Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
if (C == 0)
return 0;
- if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
- isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
- isa<UndefValue>(C))
- return VMSlot = C; // Primitive constants map directly
-
- if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
- for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
- i != e; ++i) {
- Value *MV = MapValue(*i, VM, ModuleLevelChanges);
- if (MV != *i) {
- // This array must contain a reference to a global, make a new array
- // and return it.
- //
- std::vector<Constant*> Values;
- Values.reserve(CA->getNumOperands());
- for (User::op_iterator j = b; j != i; ++j)
- Values.push_back(cast<Constant>(*j));
- Values.push_back(cast<Constant>(MV));
- for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM,
- ModuleLevelChanges)));
- return VM[V] = ConstantArray::get(CA->getType(), Values);
- }
- }
- return VM[V] = C;
- }
-
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
- for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
- i != e; ++i) {
- Value *MV = MapValue(*i, VM, ModuleLevelChanges);
- if (MV != *i) {
- // This struct must contain a reference to a global, make a new struct
- // and return it.
- //
- std::vector<Constant*> Values;
- Values.reserve(CS->getNumOperands());
- for (User::op_iterator j = b; j != i; ++j)
- Values.push_back(cast<Constant>(*j));
- Values.push_back(cast<Constant>(MV));
- for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM,
- ModuleLevelChanges)));
- return VM[V] = ConstantStruct::get(CS->getType(), Values);
- }
- }
- return VM[V] = C;
+ if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+ Function *F = cast<Function>(MapValue(BA->getFunction(), VM, Flags));
+ BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
+ Flags));
+ return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
}
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+ Value *Op = C->getOperand(i);
+ Value *Mapped = MapValue(Op, VM, Flags);
+ if (Mapped == C) continue;
+
+ // Okay, the operands don't all match. We've already processed some or all
+ // of the operands, set them up now.
std::vector<Constant*> Ops;
- for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
- Ops.push_back(cast<Constant>(MapValue(*i, VM, ModuleLevelChanges)));
- return VM[V] = CE->getWithOperands(Ops);
+ Ops.reserve(C->getNumOperands());
+ for (unsigned j = 0; j != i; ++j)
+ Ops.push_back(cast<Constant>(C->getOperand(i)));
+ Ops.push_back(cast<Constant>(Mapped));
+
+ // Map the rest of the operands that aren't processed yet.
+ for (++i; i != e; ++i)
+ Ops.push_back(cast<Constant>(MapValue(C->getOperand(i), VM, Flags)));
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return VM[V] = CE->getWithOperands(Ops);
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+ return VM[V] = ConstantArray::get(CA->getType(), Ops);
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
+ return VM[V] = ConstantStruct::get(CS->getType(), Ops);
+ assert(isa<ConstantVector>(C) && "Unknown mapped constant type");
+ return VM[V] = ConstantVector::get(Ops);
}
-
- if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
- for (User::op_iterator b = CV->op_begin(), i = b, e = CV->op_end();
- i != e; ++i) {
- Value *MV = MapValue(*i, VM, ModuleLevelChanges);
- if (MV != *i) {
- // This vector value must contain a reference to a global, make a new
- // vector constant and return it.
- //
- std::vector<Constant*> Values;
- Values.reserve(CV->getNumOperands());
- for (User::op_iterator j = b; j != i; ++j)
- Values.push_back(cast<Constant>(*j));
- Values.push_back(cast<Constant>(MV));
- for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM,
- ModuleLevelChanges)));
- return VM[V] = ConstantVector::get(Values);
- }
- }
- return VM[V] = C;
- }
-
- BlockAddress *BA = cast<BlockAddress>(C);
- Function *F = cast<Function>(MapValue(BA->getFunction(), VM,
- ModuleLevelChanges));
- BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM,
- ModuleLevelChanges));
- return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
+
+ // If we reach here, all of the operands of the constant match.
+ return VM[V] = C;
}
/// RemapInstruction - Convert the instruction operands from referencing the
/// current values into those specified by VMap.
///
void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
- bool ModuleLevelChanges) {
+ RemapFlags Flags) {
// Remap operands.
for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
- Value *V = MapValue(*op, VMap, ModuleLevelChanges);
- assert(V && "Referenced value not in value map!");
- *op = V;
+ Value *V = MapValue(*op, VMap, Flags);
+ // If we aren't ignoring missing entries, assert that something happened.
+ if (V != 0)
+ *op = V;
+ else
+ assert((Flags & RF_IgnoreMissingEntries) &&
+ "Referenced value not in value map!");
}
// Remap attached metadata.
@@ -170,7 +134,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
Value *Old = MI->second;
- Value *New = MapValue(Old, VMap, ModuleLevelChanges);
+ Value *New = MapValue(Old, VMap, Flags);
if (New != Old)
I->setMetadata(MI->first, cast<MDNode>(New));
}
diff --git a/contrib/llvm/lib/VMCore/AsmWriter.cpp b/contrib/llvm/lib/VMCore/AsmWriter.cpp
index 831a996..cbc874a 100644
--- a/contrib/llvm/lib/VMCore/AsmWriter.cpp
+++ b/contrib/llvm/lib/VMCore/AsmWriter.cpp
@@ -198,6 +198,7 @@ void TypePrinting::CalcTypeName(const Type *Ty,
case Type::PPC_FP128TyID: OS << "ppc_fp128"; break;
case Type::LabelTyID: OS << "label"; break;
case Type::MetadataTyID: OS << "metadata"; break;
+ case Type::X86_MMXTyID: OS << "x86_mmx"; break;
case Type::IntegerTyID:
OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
break;
@@ -830,7 +831,8 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
Out << " nuw";
if (OBO->hasNoSignedWrap())
Out << " nsw";
- } else if (const SDivOperator *Div = dyn_cast<SDivOperator>(U)) {
+ } else if (const PossiblyExactOperator *Div =
+ dyn_cast<PossiblyExactOperator>(U)) {
if (Div->isExact())
Out << " exact";
} else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
@@ -1057,11 +1059,6 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
return;
}
- if (const MDNode *Node = dyn_cast<MDNode>(CV)) {
- Out << "!" << Machine->getMetadataSlot(Node);
- return;
- }
-
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
Out << CE->getOpcodeName();
WriteOptimizationInfo(Out, CE);
@@ -1165,7 +1162,11 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
else
Machine = new SlotTracker(Context);
}
- Out << '!' << Machine->getMetadataSlot(N);
+ int Slot = Machine->getMetadataSlot(N);
+ if (Slot == -1)
+ Out << "<badref>";
+ else
+ Out << '!' << Slot;
return;
}
@@ -1395,7 +1396,11 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
Out << "!" << NMD->getName() << " = !{";
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
if (i) Out << ", ";
- Out << '!' << Machine.getMetadataSlot(NMD->getOperand(i));
+ int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
+ if (Slot == -1)
+ Out << "<badref>";
+ else
+ Out << '!' << Slot;
}
Out << "}\n";
}
@@ -1455,6 +1460,7 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
if (GV->isThreadLocal()) Out << "thread_local ";
if (unsigned AddressSpace = GV->getType()->getAddressSpace())
Out << "addrspace(" << AddressSpace << ") ";
+ if (GV->hasUnnamedAddr()) Out << "unnamed_addr ";
Out << (GV->isConstant() ? "constant " : "global ");
TypePrinter.print(GV->getType()->getElementType(), Out);
@@ -1575,6 +1581,8 @@ void AssemblyWriter::printFunction(const Function *F) {
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc "; break;
case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
case CallingConv::MSP430_INTR: Out << "msp430_intrcc "; break;
+ case CallingConv::PTX_Kernel: Out << "ptx_kernel"; break;
+ case CallingConv::PTX_Device: Out << "ptx_device"; break;
default: Out << "cc" << F->getCallingConv() << " "; break;
}
@@ -1622,6 +1630,8 @@ void AssemblyWriter::printFunction(const Function *F) {
Out << "..."; // Output varargs portion of signature!
}
Out << ')';
+ if (F->hasUnnamedAddr())
+ Out << " unnamed_addr";
Attributes FnAttrs = Attrs.getFnAttributes();
if (FnAttrs != Attribute::None)
Out << ' ' << Attribute::getAsString(Attrs.getFnAttributes());
@@ -1843,6 +1853,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break;
case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
case CallingConv::MSP430_INTR: Out << " msp430_intrcc "; break;
+ case CallingConv::PTX_Kernel: Out << " ptx_kernel"; break;
+ case CallingConv::PTX_Device: Out << " ptx_device"; break;
default: Out << " cc" << CI->getCallingConv(); break;
}
@@ -1897,6 +1909,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break;
case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
case CallingConv::MSP430_INTR: Out << " msp430_intrcc "; break;
+ case CallingConv::PTX_Kernel: Out << " ptx_kernel"; break;
+ case CallingConv::PTX_Device: Out << " ptx_device"; break;
default: Out << " cc" << II->getCallingConv(); break;
}
@@ -2033,15 +2047,7 @@ static void WriteMDNodeComment(const MDNode *Node,
return;
Out.PadToColumn(50);
- if (Tag == dwarf::DW_TAG_auto_variable)
- Out << "; [ DW_TAG_auto_variable ]";
- else if (Tag == dwarf::DW_TAG_arg_variable)
- Out << "; [ DW_TAG_arg_variable ]";
- else if (Tag == dwarf::DW_TAG_return_variable)
- Out << "; [ DW_TAG_return_variable ]";
- else if (Tag == dwarf::DW_TAG_vector_type)
- Out << "; [ DW_TAG_vector_type ]";
- else if (Tag == dwarf::DW_TAG_user_base)
+ if (Tag == dwarf::DW_TAG_user_base)
Out << "; [ DW_TAG_user_base ]";
else if (Tag.isIntN(32)) {
if (const char *TagName = dwarf::TagString(Tag.getZExtValue()))
diff --git a/contrib/llvm/lib/VMCore/Attributes.cpp b/contrib/llvm/lib/VMCore/Attributes.cpp
index a000aee..92152a3 100644
--- a/contrib/llvm/lib/VMCore/Attributes.cpp
+++ b/contrib/llvm/lib/VMCore/Attributes.cpp
@@ -15,8 +15,8 @@
#include "llvm/Type.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/FoldingSet.h"
-#include "llvm/System/Atomic.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Mutex.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
@@ -70,6 +70,8 @@ std::string Attribute::getAsString(Attributes Attrs) {
Result += "noimplicitfloat ";
if (Attrs & Attribute::Naked)
Result += "naked ";
+ if (Attrs & Attribute::Hotpatch)
+ Result += "hotpatch ";
if (Attrs & Attribute::StackAlignment) {
Result += "alignstack(";
Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
@@ -105,6 +107,14 @@ Attributes Attribute::typeIncompatible(const Type *Ty) {
//===----------------------------------------------------------------------===//
namespace llvm {
+ class AttributeListImpl;
+}
+
+static ManagedStatic<FoldingSet<AttributeListImpl> > AttributesLists;
+
+namespace llvm {
+static ManagedStatic<sys::SmartMutex<true> > ALMutex;
+
class AttributeListImpl : public FoldingSetNode {
sys::cas_flag RefCount;
@@ -120,10 +130,17 @@ public:
RefCount = 0;
}
- void AddRef() { sys::AtomicIncrement(&RefCount); }
+ void AddRef() {
+ sys::SmartScopedLock<true> Lock(*ALMutex);
+ ++RefCount;
+ }
void DropRef() {
- sys::cas_flag old = sys::AtomicDecrement(&RefCount);
- if (old == 0) delete this;
+ sys::SmartScopedLock<true> Lock(*ALMutex);
+ if (!AttributesLists.isConstructed())
+ return;
+ sys::cas_flag new_val = --RefCount;
+ if (new_val == 0)
+ delete this;
}
void Profile(FoldingSetNodeID &ID) const {
@@ -137,11 +154,8 @@ public:
};
}
-static ManagedStatic<sys::SmartMutex<true> > ALMutex;
-static ManagedStatic<FoldingSet<AttributeListImpl> > AttributesLists;
-
AttributeListImpl::~AttributeListImpl() {
- sys::SmartScopedLock<true> Lock(*ALMutex);
+ // NOTE: Lock must be acquired by caller.
AttributesLists->RemoveNode(this);
}
@@ -195,6 +209,7 @@ AttrListPtr::AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {
}
const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) {
+ sys::SmartScopedLock<true> Lock(*ALMutex);
if (AttrList == RHS.AttrList) return *this;
if (AttrList) AttrList->DropRef();
AttrList = RHS.AttrList;
diff --git a/contrib/llvm/lib/VMCore/AutoUpgrade.cpp b/contrib/llvm/lib/VMCore/AutoUpgrade.cpp
index 9330e14..b323540 100644
--- a/contrib/llvm/lib/VMCore/AutoUpgrade.cpp
+++ b/contrib/llvm/lib/VMCore/AutoUpgrade.cpp
@@ -288,37 +288,224 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
break;
case 'x':
// This fixes all MMX shift intrinsic instructions to take a
- // v1i64 instead of a v2i32 as the second parameter.
- if (Name.compare(5,10,"x86.mmx.ps",10) == 0 &&
- (Name.compare(13,4,"psll", 4) == 0 ||
- Name.compare(13,4,"psra", 4) == 0 ||
- Name.compare(13,4,"psrl", 4) == 0) && Name[17] != 'i') {
-
- const llvm::Type *VT =
- VectorType::get(IntegerType::get(FTy->getContext(), 64), 1);
-
- // We don't have to do anything if the parameter already has
- // the correct type.
- if (FTy->getParamType(1) == VT)
+ // x86_mmx instead of a v1i64, v2i32, v4i16, or v8i8.
+ if (Name.compare(5, 8, "x86.mmx.", 8) == 0) {
+ const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
+
+ if (Name.compare(13, 4, "padd", 4) == 0 ||
+ Name.compare(13, 4, "psub", 4) == 0 ||
+ Name.compare(13, 4, "pmul", 4) == 0 ||
+ Name.compare(13, 5, "pmadd", 5) == 0 ||
+ Name.compare(13, 4, "pand", 4) == 0 ||
+ Name.compare(13, 3, "por", 3) == 0 ||
+ Name.compare(13, 4, "pxor", 4) == 0 ||
+ Name.compare(13, 4, "pavg", 4) == 0 ||
+ Name.compare(13, 4, "pmax", 4) == 0 ||
+ Name.compare(13, 4, "pmin", 4) == 0 ||
+ Name.compare(13, 4, "psad", 4) == 0 ||
+ Name.compare(13, 4, "psll", 4) == 0 ||
+ Name.compare(13, 4, "psrl", 4) == 0 ||
+ Name.compare(13, 4, "psra", 4) == 0 ||
+ Name.compare(13, 4, "pack", 4) == 0 ||
+ Name.compare(13, 6, "punpck", 6) == 0 ||
+ Name.compare(13, 4, "pcmp", 4) == 0) {
+ assert(FTy->getNumParams() == 2 && "MMX intrinsic takes 2 args!");
+ const Type *SecondParamTy = X86_MMXTy;
+
+ if (Name.compare(13, 5, "pslli", 5) == 0 ||
+ Name.compare(13, 5, "psrli", 5) == 0 ||
+ Name.compare(13, 5, "psrai", 5) == 0)
+ SecondParamTy = FTy->getParamType(1);
+
+ // Don't do anything if it has the correct types.
+ if (FTy->getReturnType() == X86_MMXTy &&
+ FTy->getParamType(0) == X86_MMXTy &&
+ FTy->getParamType(1) == SecondParamTy)
+ break;
+
+ // We first need to change the name of the old (bad) intrinsic, because
+ // its type is incorrect, but we cannot overload that name. We
+ // arbitrarily unique it here allowing us to construct a correctly named
+ // and typed function below.
+ F->setName("");
+
+ // Now construct the new intrinsic with the correct name and type. We
+ // leave the old function around in order to query its type, whatever it
+ // may be, and correctly convert up to the new type.
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ X86_MMXTy, X86_MMXTy,
+ SecondParamTy, (Type*)0));
+ return true;
+ }
+
+ if (Name.compare(13, 8, "maskmovq", 8) == 0) {
+ // Don't do anything if it has the correct types.
+ if (FTy->getParamType(0) == X86_MMXTy &&
+ FTy->getParamType(1) == X86_MMXTy)
+ break;
+
+ F->setName("");
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ FTy->getReturnType(),
+ X86_MMXTy,
+ X86_MMXTy,
+ FTy->getParamType(2),
+ (Type*)0));
+ return true;
+ }
+
+ if (Name.compare(13, 8, "pmovmskb", 8) == 0) {
+ if (FTy->getParamType(0) == X86_MMXTy)
+ break;
+
+ F->setName("");
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ FTy->getReturnType(),
+ X86_MMXTy,
+ (Type*)0));
+ return true;
+ }
+
+ if (Name.compare(13, 5, "movnt", 5) == 0) {
+ if (FTy->getParamType(1) == X86_MMXTy)
+ break;
+
+ F->setName("");
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ FTy->getReturnType(),
+ FTy->getParamType(0),
+ X86_MMXTy,
+ (Type*)0));
+ return true;
+ }
+
+ if (Name.compare(13, 7, "palignr", 7) == 0) {
+ if (FTy->getReturnType() == X86_MMXTy &&
+ FTy->getParamType(0) == X86_MMXTy &&
+ FTy->getParamType(1) == X86_MMXTy)
+ break;
+
+ F->setName("");
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ X86_MMXTy,
+ X86_MMXTy,
+ X86_MMXTy,
+ FTy->getParamType(2),
+ (Type*)0));
+ return true;
+ }
+
+ if (Name.compare(13, 5, "pextr", 5) == 0) {
+ if (FTy->getParamType(0) == X86_MMXTy)
+ break;
+
+ F->setName("");
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ FTy->getReturnType(),
+ X86_MMXTy,
+ FTy->getParamType(1),
+ (Type*)0));
+ return true;
+ }
+
+ if (Name.compare(13, 5, "pinsr", 5) == 0) {
+ if (FTy->getReturnType() == X86_MMXTy &&
+ FTy->getParamType(0) == X86_MMXTy)
+ break;
+
+ F->setName("");
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ X86_MMXTy,
+ X86_MMXTy,
+ FTy->getParamType(1),
+ FTy->getParamType(2),
+ (Type*)0));
+ return true;
+ }
+
+ if (Name.compare(13, 12, "cvtsi32.si64", 12) == 0) {
+ if (FTy->getReturnType() == X86_MMXTy)
+ break;
+
+ F->setName("");
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ X86_MMXTy,
+ FTy->getParamType(0),
+ (Type*)0));
+ return true;
+ }
+
+ if (Name.compare(13, 12, "cvtsi64.si32", 12) == 0) {
+ if (FTy->getParamType(0) == X86_MMXTy)
+ break;
+
+ F->setName("");
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ FTy->getReturnType(),
+ X86_MMXTy,
+ (Type*)0));
+ return true;
+ }
+
+ if (Name.compare(13, 8, "vec.init", 8) == 0) {
+ if (FTy->getReturnType() == X86_MMXTy)
+ break;
+
+ F->setName("");
+
+ if (Name.compare(21, 2, ".b", 2) == 0)
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ X86_MMXTy,
+ FTy->getParamType(0),
+ FTy->getParamType(1),
+ FTy->getParamType(2),
+ FTy->getParamType(3),
+ FTy->getParamType(4),
+ FTy->getParamType(5),
+ FTy->getParamType(6),
+ FTy->getParamType(7),
+ (Type*)0));
+ else if (Name.compare(21, 2, ".w", 2) == 0)
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ X86_MMXTy,
+ FTy->getParamType(0),
+ FTy->getParamType(1),
+ FTy->getParamType(2),
+ FTy->getParamType(3),
+ (Type*)0));
+ else if (Name.compare(21, 2, ".d", 2) == 0)
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ X86_MMXTy,
+ FTy->getParamType(0),
+ FTy->getParamType(1),
+ (Type*)0));
+ return true;
+ }
+
+
+ if (Name.compare(13, 9, "vec.ext.d", 9) == 0) {
+ if (FTy->getReturnType() == X86_MMXTy &&
+ FTy->getParamType(0) == X86_MMXTy)
+ break;
+
+ F->setName("");
+ NewFn = cast<Function>(M->getOrInsertFunction(Name,
+ X86_MMXTy,
+ X86_MMXTy,
+ FTy->getParamType(1),
+ (Type*)0));
+ return true;
+ }
+
+ if (Name.compare(13, 9, "emms", 4) == 0 ||
+ Name.compare(13, 9, "femms", 5) == 0) {
+ NewFn = 0;
break;
-
- // We first need to change the name of the old (bad) intrinsic, because
- // its type is incorrect, but we cannot overload that name. We
- // arbitrarily unique it here allowing us to construct a correctly named
- // and typed function below.
- F->setName("");
+ }
- assert(FTy->getNumParams() == 2 && "MMX shift intrinsics take 2 args!");
-
- // Now construct the new intrinsic with the correct name and type. We
- // leave the old function around in order to query its type, whatever it
- // may be, and correctly convert up to the new type.
- NewFn = cast<Function>(M->getOrInsertFunction(Name,
- FTy->getReturnType(),
- FTy->getParamType(0),
- VT,
- (Type *)0));
- return true;
+ // We really shouldn't get here ever.
+ assert(0 && "Invalid MMX intrinsic!");
+ break;
} else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 ||
Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 ||
Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 ||
@@ -341,6 +528,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
// or 0.
NewFn = 0;
return true;
+ } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
+ // This is an SSE/MMX instruction.
+ const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
+ NewFn =
+ cast<Function>(M->getOrInsertFunction("llvm.x86.sse.pshuf.w",
+ X86_MMXTy,
+ X86_MMXTy,
+ Type::getInt8Ty(F->getContext()),
+ (Type*)0));
+ return true;
}
break;
@@ -432,6 +629,39 @@ static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) {
"upgraded."+CI->getName(), CI);
}
+/// ConstructNewCallInst - Construct a new CallInst with the signature of NewFn.
+static void ConstructNewCallInst(Function *NewFn, CallInst *OldCI,
+ Value **Operands, unsigned NumOps,
+ bool AssignName = true) {
+ // Construct a new CallInst.
+ CallInst *NewCI =
+ CallInst::Create(NewFn, Operands, Operands + NumOps,
+ AssignName ? "upgraded." + OldCI->getName() : "", OldCI);
+
+ NewCI->setTailCall(OldCI->isTailCall());
+ NewCI->setCallingConv(OldCI->getCallingConv());
+
+ // Handle any uses of the old CallInst. If the type has changed, add a cast.
+ if (!OldCI->use_empty()) {
+ if (OldCI->getType() != NewCI->getType()) {
+ Function *OldFn = OldCI->getCalledFunction();
+ CastInst *RetCast =
+ CastInst::Create(CastInst::getCastOpcode(NewCI, true,
+ OldFn->getReturnType(), true),
+ NewCI, OldFn->getReturnType(), NewCI->getName(),OldCI);
+
+ // Replace all uses of the old call with the new cast which has the
+ // correct type.
+ OldCI->replaceAllUsesWith(RetCast);
+ } else {
+ OldCI->replaceAllUsesWith(NewCI);
+ }
+ }
+
+ // Clean up the old call now that it has been completely upgraded.
+ OldCI->eraseFromParent();
+}
+
// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
// upgraded intrinsic. All argument and return casting must be provided in
// order to seamlessly integrate with existing context.
@@ -629,7 +859,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
for (unsigned i = 0; i != 8; ++i)
Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
- Value *SV = ConstantVector::get(Indices.begin(), Indices.size());
+ Value *SV = ConstantVector::get(Indices);
Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
Rep = Builder.CreateBitCast(Rep, F->getReturnType());
}
@@ -685,7 +915,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
for (unsigned i = 0; i != 16; ++i)
Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
- Value *SV = ConstantVector::get(Indices.begin(), Indices.size());
+ Value *SV = ConstantVector::get(Indices);
Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
Rep = Builder.CreateBitCast(Rep, F->getReturnType());
}
@@ -759,40 +989,265 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
break;
}
+ case Intrinsic::x86_mmx_padd_b:
+ case Intrinsic::x86_mmx_padd_w:
+ case Intrinsic::x86_mmx_padd_d:
+ case Intrinsic::x86_mmx_padd_q:
+ case Intrinsic::x86_mmx_padds_b:
+ case Intrinsic::x86_mmx_padds_w:
+ case Intrinsic::x86_mmx_paddus_b:
+ case Intrinsic::x86_mmx_paddus_w:
+ case Intrinsic::x86_mmx_psub_b:
+ case Intrinsic::x86_mmx_psub_w:
+ case Intrinsic::x86_mmx_psub_d:
+ case Intrinsic::x86_mmx_psub_q:
+ case Intrinsic::x86_mmx_psubs_b:
+ case Intrinsic::x86_mmx_psubs_w:
+ case Intrinsic::x86_mmx_psubus_b:
+ case Intrinsic::x86_mmx_psubus_w:
+ case Intrinsic::x86_mmx_pmulh_w:
+ case Intrinsic::x86_mmx_pmull_w:
+ case Intrinsic::x86_mmx_pmulhu_w:
+ case Intrinsic::x86_mmx_pmulu_dq:
+ case Intrinsic::x86_mmx_pmadd_wd:
+ case Intrinsic::x86_mmx_pand:
+ case Intrinsic::x86_mmx_pandn:
+ case Intrinsic::x86_mmx_por:
+ case Intrinsic::x86_mmx_pxor:
+ case Intrinsic::x86_mmx_pavg_b:
+ case Intrinsic::x86_mmx_pavg_w:
+ case Intrinsic::x86_mmx_pmaxu_b:
+ case Intrinsic::x86_mmx_pmaxs_w:
+ case Intrinsic::x86_mmx_pminu_b:
+ case Intrinsic::x86_mmx_pmins_w:
+ case Intrinsic::x86_mmx_psad_bw:
+ case Intrinsic::x86_mmx_psll_w:
case Intrinsic::x86_mmx_psll_d:
case Intrinsic::x86_mmx_psll_q:
- case Intrinsic::x86_mmx_psll_w:
- case Intrinsic::x86_mmx_psra_d:
- case Intrinsic::x86_mmx_psra_w:
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrl_w:
case Intrinsic::x86_mmx_psrl_d:
case Intrinsic::x86_mmx_psrl_q:
- case Intrinsic::x86_mmx_psrl_w: {
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psra_w:
+ case Intrinsic::x86_mmx_psra_d:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d:
+ case Intrinsic::x86_mmx_packsswb:
+ case Intrinsic::x86_mmx_packssdw:
+ case Intrinsic::x86_mmx_packuswb:
+ case Intrinsic::x86_mmx_punpckhbw:
+ case Intrinsic::x86_mmx_punpckhwd:
+ case Intrinsic::x86_mmx_punpckhdq:
+ case Intrinsic::x86_mmx_punpcklbw:
+ case Intrinsic::x86_mmx_punpcklwd:
+ case Intrinsic::x86_mmx_punpckldq:
+ case Intrinsic::x86_mmx_pcmpeq_b:
+ case Intrinsic::x86_mmx_pcmpeq_w:
+ case Intrinsic::x86_mmx_pcmpeq_d:
+ case Intrinsic::x86_mmx_pcmpgt_b:
+ case Intrinsic::x86_mmx_pcmpgt_w:
+ case Intrinsic::x86_mmx_pcmpgt_d: {
Value *Operands[2];
+ // Cast the operand to the X86 MMX type.
+ Operands[0] = new BitCastInst(CI->getArgOperand(0),
+ NewFn->getFunctionType()->getParamType(0),
+ "upgraded.", CI);
+
+ switch (NewFn->getIntrinsicID()) {
+ default:
+ // Cast to the X86 MMX type.
+ Operands[1] = new BitCastInst(CI->getArgOperand(1),
+ NewFn->getFunctionType()->getParamType(1),
+ "upgraded.", CI);
+ break;
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d:
+ // These take an i32 as their second parameter.
+ Operands[1] = CI->getArgOperand(1);
+ break;
+ }
+
+ ConstructNewCallInst(NewFn, CI, Operands, 2);
+ break;
+ }
+ case Intrinsic::x86_mmx_maskmovq: {
+ Value *Operands[3];
+
+ // Cast the operands to the X86 MMX type.
+ Operands[0] = new BitCastInst(CI->getArgOperand(0),
+ NewFn->getFunctionType()->getParamType(0),
+ "upgraded.", CI);
+ Operands[1] = new BitCastInst(CI->getArgOperand(1),
+ NewFn->getFunctionType()->getParamType(1),
+ "upgraded.", CI);
+ Operands[2] = CI->getArgOperand(2);
+
+ ConstructNewCallInst(NewFn, CI, Operands, 3, false);
+ break;
+ }
+ case Intrinsic::x86_mmx_pmovmskb: {
+ Value *Operands[1];
+
+ // Cast the operand to the X86 MMX type.
+ Operands[0] = new BitCastInst(CI->getArgOperand(0),
+ NewFn->getFunctionType()->getParamType(0),
+ "upgraded.", CI);
+
+ ConstructNewCallInst(NewFn, CI, Operands, 1);
+ break;
+ }
+ case Intrinsic::x86_mmx_movnt_dq: {
+ Value *Operands[2];
+
Operands[0] = CI->getArgOperand(0);
-
- // Cast the second parameter to the correct type.
- BitCastInst *BC = new BitCastInst(CI->getArgOperand(1),
- NewFn->getFunctionType()->getParamType(1),
- "upgraded.", CI);
- Operands[1] = BC;
-
- // Construct a new CallInst
- CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+2,
- "upgraded."+CI->getName(), CI);
- NewCI->setTailCall(CI->isTailCall());
- NewCI->setCallingConv(CI->getCallingConv());
-
- // Handle any uses of the old CallInst.
- if (!CI->use_empty())
- // Replace all uses of the old call with the new cast which has the
- // correct type.
- CI->replaceAllUsesWith(NewCI);
-
- // Clean up the old call now that it has been completely upgraded.
- CI->eraseFromParent();
+
+ // Cast the operand to the X86 MMX type.
+ Operands[1] = new BitCastInst(CI->getArgOperand(1),
+ NewFn->getFunctionType()->getParamType(1),
+ "upgraded.", CI);
+
+ ConstructNewCallInst(NewFn, CI, Operands, 2, false);
break;
- }
+ }
+ case Intrinsic::x86_mmx_palignr_b: {
+ Value *Operands[3];
+
+ // Cast the operands to the X86 MMX type.
+ Operands[0] = new BitCastInst(CI->getArgOperand(0),
+ NewFn->getFunctionType()->getParamType(0),
+ "upgraded.", CI);
+ Operands[1] = new BitCastInst(CI->getArgOperand(1),
+ NewFn->getFunctionType()->getParamType(1),
+ "upgraded.", CI);
+ Operands[2] = CI->getArgOperand(2);
+
+ ConstructNewCallInst(NewFn, CI, Operands, 3);
+ break;
+ }
+ case Intrinsic::x86_mmx_pextr_w: {
+ Value *Operands[2];
+
+ // Cast the operands to the X86 MMX type.
+ Operands[0] = new BitCastInst(CI->getArgOperand(0),
+ NewFn->getFunctionType()->getParamType(0),
+ "upgraded.", CI);
+ Operands[1] = CI->getArgOperand(1);
+
+ ConstructNewCallInst(NewFn, CI, Operands, 2);
+ break;
+ }
+ case Intrinsic::x86_mmx_pinsr_w: {
+ Value *Operands[3];
+
+ // Cast the operands to the X86 MMX type.
+ Operands[0] = new BitCastInst(CI->getArgOperand(0),
+ NewFn->getFunctionType()->getParamType(0),
+ "upgraded.", CI);
+ Operands[1] = CI->getArgOperand(1);
+ Operands[2] = CI->getArgOperand(2);
+
+ ConstructNewCallInst(NewFn, CI, Operands, 3);
+ break;
+ }
+ case Intrinsic::x86_sse_pshuf_w: {
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
+
+ // Cast the operand to the X86 MMX type.
+ Value *Operands[2];
+ Operands[0] =
+ Builder.CreateBitCast(CI->getArgOperand(0),
+ NewFn->getFunctionType()->getParamType(0),
+ "upgraded.");
+ Operands[1] =
+ Builder.CreateTrunc(CI->getArgOperand(1),
+ Type::getInt8Ty(C),
+ "upgraded.");
+
+ ConstructNewCallInst(NewFn, CI, Operands, 2);
+ break;
+ }
+
+#if 0
+ case Intrinsic::x86_mmx_cvtsi32_si64: {
+ // The return type needs to be changed.
+ Value *Operands[1];
+ Operands[0] = CI->getArgOperand(0);
+ ConstructNewCallInst(NewFn, CI, Operands, 1);
+ break;
+ }
+ case Intrinsic::x86_mmx_cvtsi64_si32: {
+ Value *Operands[1];
+
+ // Cast the operand to the X86 MMX type.
+ Operands[0] = new BitCastInst(CI->getArgOperand(0),
+ NewFn->getFunctionType()->getParamType(0),
+ "upgraded.", CI);
+
+ ConstructNewCallInst(NewFn, CI, Operands, 1);
+ break;
+ }
+ case Intrinsic::x86_mmx_vec_init_b:
+ case Intrinsic::x86_mmx_vec_init_w:
+ case Intrinsic::x86_mmx_vec_init_d: {
+ // The return type needs to be changed.
+ Value *Operands[8];
+ unsigned NumOps = 0;
+
+ switch (NewFn->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_mmx_vec_init_b: NumOps = 8; break;
+ case Intrinsic::x86_mmx_vec_init_w: NumOps = 4; break;
+ case Intrinsic::x86_mmx_vec_init_d: NumOps = 2; break;
+ }
+
+ switch (NewFn->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::x86_mmx_vec_init_b:
+ Operands[7] = CI->getArgOperand(7);
+ Operands[6] = CI->getArgOperand(6);
+ Operands[5] = CI->getArgOperand(5);
+ Operands[4] = CI->getArgOperand(4);
+ // FALLTHRU
+ case Intrinsic::x86_mmx_vec_init_w:
+ Operands[3] = CI->getArgOperand(3);
+ Operands[2] = CI->getArgOperand(2);
+ // FALLTHRU
+ case Intrinsic::x86_mmx_vec_init_d:
+ Operands[1] = CI->getArgOperand(1);
+ Operands[0] = CI->getArgOperand(0);
+ break;
+ }
+
+ ConstructNewCallInst(NewFn, CI, Operands, NumOps);
+ break;
+ }
+ case Intrinsic::x86_mmx_vec_ext_d: {
+ Value *Operands[2];
+
+ // Cast the operand to the X86 MMX type.
+ Operands[0] = new BitCastInst(CI->getArgOperand(0),
+ NewFn->getFunctionType()->getParamType(0),
+ "upgraded.", CI);
+ Operands[1] = CI->getArgOperand(1);
+
+ ConstructNewCallInst(NewFn, CI, Operands, 2);
+ break;
+ }
+#endif
+
case Intrinsic::ctlz:
case Intrinsic::ctpop:
case Intrinsic::cttz: {
diff --git a/contrib/llvm/lib/VMCore/BasicBlock.cpp b/contrib/llvm/lib/VMCore/BasicBlock.cpp
index 8ad5373..955a028 100644
--- a/contrib/llvm/lib/VMCore/BasicBlock.cpp
+++ b/contrib/llvm/lib/VMCore/BasicBlock.cpp
@@ -248,10 +248,11 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
// If all incoming values to the Phi are the same, we can replace the Phi
// with that value.
Value* PNV = 0;
- if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue())) {
- PN->replaceAllUsesWith(PNV);
- PN->eraseFromParent();
- }
+ if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue()))
+ if (PNV != PN) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
+ }
}
}
}
diff --git a/contrib/llvm/lib/VMCore/ConstantFold.cpp b/contrib/llvm/lib/VMCore/ConstantFold.cpp
index 9a91daf..573efb7 100644
--- a/contrib/llvm/lib/VMCore/ConstantFold.cpp
+++ b/contrib/llvm/lib/VMCore/ConstantFold.cpp
@@ -42,6 +42,10 @@ using namespace llvm;
/// input vector constant are all simple integer or FP values.
static Constant *BitCastConstantVector(ConstantVector *CV,
const VectorType *DstTy) {
+
+ if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy);
+ if (CV->isNullValue()) return Constant::getNullValue(DstTy);
+
// If this cast changes element count then we can't handle it here:
// doing so requires endianness information. This should be handled by
// Analysis/ConstantFolding.cpp
@@ -145,7 +149,7 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
// This allows for other simplifications (although some of them
// can only be handled by Analysis/ConstantFolding.cpp).
if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
- return ConstantExpr::getBitCast(ConstantVector::get(&V, 1), DestPTy);
+ return ConstantExpr::getBitCast(ConstantVector::get(V), DestPTy);
}
// Finally, implement bitcast folding now. The code below doesn't handle
@@ -202,7 +206,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
APInt V = CI->getValue();
if (ByteStart)
V = V.lshr(ByteStart*8);
- V.trunc(ByteSize*8);
+ V = V.trunc(ByteSize*8);
return ConstantInt::get(CI->getContext(), V);
}
@@ -511,10 +515,14 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
return Constant::getNullValue(DestTy);
return UndefValue::get(DestTy);
}
+
// No compile-time operations on this type yet.
if (V->getType()->isPPC_FP128Ty() || DestTy->isPPC_FP128Ty())
return 0;
+ if (V->isNullValue() && !DestTy->isX86_MMXTy())
+ return Constant::getNullValue(DestTy);
+
// If the cast operand is a constant expression, there's a few things we can
// do to try to simplify it.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
@@ -637,9 +645,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
case Instruction::SIToFP:
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
APInt api = CI->getValue();
- const uint64_t zero[] = {0, 0};
- APFloat apf = APFloat(APInt(DestTy->getPrimitiveSizeInBits(),
- 2, zero));
+ APFloat apf(APInt::getNullValue(DestTy->getPrimitiveSizeInBits()), true);
(void)apf.convertFromAPInt(api,
opc==Instruction::SIToFP,
APFloat::rmNearestTiesToEven);
@@ -649,25 +655,22 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
case Instruction::ZExt:
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
- APInt Result(CI->getValue());
- Result.zext(BitWidth);
- return ConstantInt::get(V->getContext(), Result);
+ return ConstantInt::get(V->getContext(),
+ CI->getValue().zext(BitWidth));
}
return 0;
case Instruction::SExt:
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
- APInt Result(CI->getValue());
- Result.sext(BitWidth);
- return ConstantInt::get(V->getContext(), Result);
+ return ConstantInt::get(V->getContext(),
+ CI->getValue().sext(BitWidth));
}
return 0;
case Instruction::Trunc: {
uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- APInt Result(CI->getValue());
- Result.trunc(DestBitWidth);
- return ConstantInt::get(V->getContext(), Result);
+ return ConstantInt::get(V->getContext(),
+ CI->getValue().trunc(DestBitWidth));
}
// The input must be a constantexpr. See if we can simplify this based on
@@ -690,10 +693,58 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond))
return CB->getZExtValue() ? V1 : V2;
+ // Check for zero aggregate and ConstantVector of zeros
+ if (Cond->isNullValue()) return V2;
+
+ if (ConstantVector* CondV = dyn_cast<ConstantVector>(Cond)) {
+
+ if (CondV->isAllOnesValue()) return V1;
+
+ const VectorType *VTy = cast<VectorType>(V1->getType());
+ ConstantVector *CP1 = dyn_cast<ConstantVector>(V1);
+ ConstantVector *CP2 = dyn_cast<ConstantVector>(V2);
+
+ if ((CP1 || isa<ConstantAggregateZero>(V1)) &&
+ (CP2 || isa<ConstantAggregateZero>(V2))) {
+
+ // Find the element type of the returned vector
+ const Type *EltTy = VTy->getElementType();
+ unsigned NumElem = VTy->getNumElements();
+ std::vector<Constant*> Res(NumElem);
+
+ bool Valid = true;
+ for (unsigned i = 0; i < NumElem; ++i) {
+ ConstantInt* c = dyn_cast<ConstantInt>(CondV->getOperand(i));
+ if (!c) {
+ Valid = false;
+ break;
+ }
+ Constant *C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+ Constant *C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+ Res[i] = c->getZExtValue() ? C1 : C2;
+ }
+ // If we were able to build the vector, return it
+ if (Valid) return ConstantVector::get(Res);
+ }
+ }
+
+
if (isa<UndefValue>(V1)) return V2;
if (isa<UndefValue>(V2)) return V1;
if (isa<UndefValue>(Cond)) return V1;
if (V1 == V2) return V1;
+
+ if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
+ if (TrueVal->getOpcode() == Instruction::Select)
+ if (TrueVal->getOperand(0) == Cond)
+ return ConstantExpr::getSelect(Cond, TrueVal->getOperand(1), V2);
+ }
+ if (ConstantExpr *FalseVal = dyn_cast<ConstantExpr>(V2)) {
+ if (FalseVal->getOpcode() == Instruction::Select)
+ if (FalseVal->getOperand(0) == Cond)
+ return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2));
+ }
+
return 0;
}
@@ -821,7 +872,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
Result.push_back(InElt);
}
- return ConstantVector::get(&Result[0], Result.size());
+ return ConstantVector::get(Result);
}
Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
@@ -982,8 +1033,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
return Constant::getNullValue(C1->getType()); // X lshr undef -> 0
// undef lshr X -> 0
case Instruction::AShr:
- if (!isa<UndefValue>(C2))
- return C1; // undef ashr X --> undef
+ if (!isa<UndefValue>(C2)) // undef ashr X --> all ones
+ return Constant::getAllOnesValue(C1->getType());
else if (isa<UndefValue>(C1))
return C1; // undef ashr undef -> undef
else
@@ -1343,8 +1394,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
// Given ((a + b) + c), if (b + c) folds to something interesting, return
// (a + (b + c)).
- if (Instruction::isAssociative(Opcode, C1->getType()) &&
- CE1->getOpcode() == Opcode) {
+ if (Instruction::isAssociative(Opcode) && CE1->getOpcode() == Opcode) {
Constant *T = ConstantExpr::get(Opcode, CE1->getOperand(1), C2);
if (!isa<ConstantExpr>(T) || cast<ConstantExpr>(T)->getOpcode() != Opcode)
return ConstantExpr::get(Opcode, CE1->getOperand(0), T);
@@ -1413,7 +1463,7 @@ static bool isMaybeZeroSizedType(const Type *Ty) {
/// first is less than the second, return -1, if the second is less than the
/// first, return 1. If the constants are not integral, return -2.
///
-static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) {
+static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) {
if (C1 == C2) return 0;
// Ok, we found a different index. If they are not ConstantInt, we can't do
@@ -1896,11 +1946,11 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
// If we can constant fold the comparison of each element, constant fold
// the whole vector comparison.
SmallVector<Constant*, 4> ResElts;
- for (unsigned i = 0, e = C1Elts.size(); i != e; ++i) {
- // Compare the elements, producing an i1 result or constant expr.
+ // Compare the elements, producing an i1 result or constant expr.
+ for (unsigned i = 0, e = C1Elts.size(); i != e; ++i)
ResElts.push_back(ConstantExpr::getCompare(pred, C1Elts[i], C2Elts[i]));
- }
- return ConstantVector::get(&ResElts[0], ResElts.size());
+
+ return ConstantVector::get(ResElts);
}
if (C1->getType()->isFloatingPointTy()) {
@@ -1948,7 +1998,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
else if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT)
Result = 1;
break;
- case ICmpInst::ICMP_NE: // We know that C1 != C2
+ case FCmpInst::FCMP_ONE: // We know that C1 != C2
// We can only partially decide this relation.
if (pred == FCmpInst::FCMP_OEQ || pred == FCmpInst::FCMP_UEQ)
Result = 0;
@@ -2073,56 +2123,55 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
/// isInBoundsIndices - Test whether the given sequence of *normalized* indices
/// is "inbounds".
-static bool isInBoundsIndices(Constant *const *Idxs, size_t NumIdx) {
+template<typename IndexTy>
+static bool isInBoundsIndices(IndexTy const *Idxs, size_t NumIdx) {
// No indices means nothing that could be out of bounds.
if (NumIdx == 0) return true;
// If the first index is zero, it's in bounds.
- if (Idxs[0]->isNullValue()) return true;
+ if (cast<Constant>(Idxs[0])->isNullValue()) return true;
// If the first index is one and all the rest are zero, it's in bounds,
// by the one-past-the-end rule.
if (!cast<ConstantInt>(Idxs[0])->isOne())
return false;
for (unsigned i = 1, e = NumIdx; i != e; ++i)
- if (!Idxs[i]->isNullValue())
+ if (!cast<Constant>(Idxs[i])->isNullValue())
return false;
return true;
}
-Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
- bool inBounds,
- Constant* const *Idxs,
- unsigned NumIdx) {
+template<typename IndexTy>
+static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
+ bool inBounds,
+ IndexTy const *Idxs,
+ unsigned NumIdx) {
+ Constant *Idx0 = cast<Constant>(Idxs[0]);
if (NumIdx == 0 ||
- (NumIdx == 1 && Idxs[0]->isNullValue()))
+ (NumIdx == 1 && Idx0->isNullValue()))
return C;
if (isa<UndefValue>(C)) {
const PointerType *Ptr = cast<PointerType>(C->getType());
- const Type *Ty = GetElementPtrInst::getIndexedType(Ptr,
- (Value **)Idxs,
- (Value **)Idxs+NumIdx);
+ const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs, Idxs+NumIdx);
assert(Ty != 0 && "Invalid indices for GEP!");
return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
}
- Constant *Idx0 = Idxs[0];
if (C->isNullValue()) {
bool isNull = true;
for (unsigned i = 0, e = NumIdx; i != e; ++i)
- if (!Idxs[i]->isNullValue()) {
+ if (!cast<Constant>(Idxs[i])->isNullValue()) {
isNull = false;
break;
}
if (isNull) {
const PointerType *Ptr = cast<PointerType>(C->getType());
- const Type *Ty = GetElementPtrInst::getIndexedType(Ptr,
- (Value**)Idxs,
- (Value**)Idxs+NumIdx);
+ const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs,
+ Idxs+NumIdx);
assert(Ty != 0 && "Invalid indices for GEP!");
- return ConstantPointerNull::get(
- PointerType::get(Ty,Ptr->getAddressSpace()));
+ return ConstantPointerNull::get(PointerType::get(Ty,
+ Ptr->getAddressSpace()));
}
}
@@ -2173,9 +2222,9 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
}
// Implement folding of:
- // int* getelementptr ([2 x int]* bitcast ([3 x int]* %X to [2 x int]*),
- // long 0, long 0)
- // To: int* getelementptr ([3 x int]* %X, long 0, long 0)
+ // i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*),
+ // i64 0, i64 0)
+ // To: i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
//
if (CE->isCast() && NumIdx > 1 && Idx0->isNullValue()) {
if (const PointerType *SPT =
@@ -2214,7 +2263,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
ATy->getNumElements());
NewIdxs[i] = ConstantExpr::getSRem(CI, Factor);
- Constant *PrevIdx = Idxs[i-1];
+ Constant *PrevIdx = cast<Constant>(Idxs[i-1]);
Constant *Div = ConstantExpr::getSDiv(CI, Factor);
// Before adding, extend both operands to i64 to avoid
@@ -2242,7 +2291,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
// If we did any factoring, start over with the adjusted indices.
if (!NewIdxs.empty()) {
for (unsigned i = 0; i != NumIdx; ++i)
- if (!NewIdxs[i]) NewIdxs[i] = Idxs[i];
+ if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
return inBounds ?
ConstantExpr::getInBoundsGetElementPtr(C, NewIdxs.data(),
NewIdxs.size()) :
@@ -2257,3 +2306,17 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
return 0;
}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+ bool inBounds,
+ Constant* const *Idxs,
+ unsigned NumIdx) {
+ return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx);
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+ bool inBounds,
+ Value* const *Idxs,
+ unsigned NumIdx) {
+ return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx);
+}
diff --git a/contrib/llvm/lib/VMCore/ConstantFold.h b/contrib/llvm/lib/VMCore/ConstantFold.h
index d2dbbdd..0ecd7b4 100644
--- a/contrib/llvm/lib/VMCore/ConstantFold.h
+++ b/contrib/llvm/lib/VMCore/ConstantFold.h
@@ -49,6 +49,8 @@ namespace llvm {
Constant *C1, Constant *C2);
Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
Constant* const *Idxs, unsigned NumIdx);
+ Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
+ Value* const *Idxs, unsigned NumIdx);
} // End llvm namespace
#endif
diff --git a/contrib/llvm/lib/VMCore/Constants.cpp b/contrib/llvm/lib/VMCore/Constants.cpp
index 16eaca8..246fde1 100644
--- a/contrib/llvm/lib/VMCore/Constants.cpp
+++ b/contrib/llvm/lib/VMCore/Constants.cpp
@@ -40,22 +40,25 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
// Constructor to create a '0' constant of arbitrary type...
-static const uint64_t zero[2] = {0, 0};
Constant *Constant::getNullValue(const Type *Ty) {
switch (Ty->getTypeID()) {
case Type::IntegerTyID:
return ConstantInt::get(Ty, 0);
case Type::FloatTyID:
- return ConstantFP::get(Ty->getContext(), APFloat(APInt(32, 0)));
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::IEEEsingle));
case Type::DoubleTyID:
- return ConstantFP::get(Ty->getContext(), APFloat(APInt(64, 0)));
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::IEEEdouble));
case Type::X86_FP80TyID:
- return ConstantFP::get(Ty->getContext(), APFloat(APInt(80, 2, zero)));
+ return ConstantFP::get(Ty->getContext(),
+ APFloat::getZero(APFloat::x87DoubleExtended));
case Type::FP128TyID:
return ConstantFP::get(Ty->getContext(),
- APFloat(APInt(128, 2, zero), true));
+ APFloat::getZero(APFloat::IEEEquad));
case Type::PPC_FP128TyID:
- return ConstantFP::get(Ty->getContext(), APFloat(APInt(128, 2, zero)));
+ return ConstantFP::get(Ty->getContext(),
+ APFloat(APInt::getNullValue(128)));
case Type::PointerTyID:
return ConstantPointerNull::get(cast<PointerType>(Ty));
case Type::StructTyID:
@@ -69,7 +72,7 @@ Constant *Constant::getNullValue(const Type *Ty) {
}
}
-Constant* Constant::getIntegerValue(const Type *Ty, const APInt &V) {
+Constant *Constant::getIntegerValue(const Type *Ty, const APInt &V) {
const Type *ScalarTy = Ty->getScalarType();
// Create the base integer constant.
@@ -86,12 +89,18 @@ Constant* Constant::getIntegerValue(const Type *Ty, const APInt &V) {
return C;
}
-Constant* Constant::getAllOnesValue(const Type *Ty) {
+Constant *Constant::getAllOnesValue(const Type *Ty) {
if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty))
return ConstantInt::get(Ty->getContext(),
APInt::getAllOnesValue(ITy->getBitWidth()));
-
- std::vector<Constant*> Elts;
+
+ if (Ty->isFloatingPointTy()) {
+ APFloat FL = APFloat::getAllOnesValue(Ty->getPrimitiveSizeInBits(),
+ !Ty->isPPC_FP128Ty());
+ return ConstantFP::get(Ty->getContext(), FL);
+ }
+
+ SmallVector<Constant*, 16> Elts;
const VectorType *VTy = cast<VectorType>(Ty);
Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType()));
assert(Elts[0] && "Not a vector integer type!");
@@ -253,6 +262,59 @@ void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
}
+/// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
+/// it. This involves recursively eliminating any dead users of the
+/// constantexpr.
+static bool removeDeadUsersOfConstant(const Constant *C) {
+ if (isa<GlobalValue>(C)) return false; // Cannot remove this
+
+ while (!C->use_empty()) {
+ const Constant *User = dyn_cast<Constant>(C->use_back());
+ if (!User) return false; // Non-constant usage;
+ if (!removeDeadUsersOfConstant(User))
+ return false; // Constant wasn't dead
+ }
+
+ const_cast<Constant*>(C)->destroyConstant();
+ return true;
+}
+
+
+/// removeDeadConstantUsers - If there are any dead constant users dangling
+/// off of this constant, remove them. This method is useful for clients
+/// that want to check to see if a global is unused, but don't want to deal
+/// with potentially dead constants hanging off of the globals.
+void Constant::removeDeadConstantUsers() const {
+ Value::const_use_iterator I = use_begin(), E = use_end();
+ Value::const_use_iterator LastNonDeadUser = E;
+ while (I != E) {
+ const Constant *User = dyn_cast<Constant>(*I);
+ if (User == 0) {
+ LastNonDeadUser = I;
+ ++I;
+ continue;
+ }
+
+ if (!removeDeadUsersOfConstant(User)) {
+ // If the constant wasn't dead, remember that this was the last live use
+ // and move on to the next constant.
+ LastNonDeadUser = I;
+ ++I;
+ continue;
+ }
+
+ // If the constant was dead, then the iterator is invalidated.
+ if (LastNonDeadUser == E) {
+ I = use_begin();
+ if (I == E) break;
+ } else {
+ I = LastNonDeadUser;
+ ++I;
+ }
+ }
+}
+
+
//===----------------------------------------------------------------------===//
// ConstantInt
@@ -265,20 +327,16 @@ ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V)
ConstantInt* ConstantInt::getTrue(LLVMContext &Context) {
LLVMContextImpl *pImpl = Context.pImpl;
- if (pImpl->TheTrueVal)
- return pImpl->TheTrueVal;
- else
- return (pImpl->TheTrueVal =
- ConstantInt::get(IntegerType::get(Context, 1), 1));
+ if (!pImpl->TheTrueVal)
+ pImpl->TheTrueVal = ConstantInt::get(Type::getInt1Ty(Context), 1);
+ return pImpl->TheTrueVal;
}
ConstantInt* ConstantInt::getFalse(LLVMContext &Context) {
LLVMContextImpl *pImpl = Context.pImpl;
- if (pImpl->TheFalseVal)
- return pImpl->TheFalseVal;
- else
- return (pImpl->TheFalseVal =
- ConstantInt::get(IntegerType::get(Context, 1), 0));
+ if (!pImpl->TheFalseVal)
+ pImpl->TheFalseVal = ConstantInt::get(Type::getInt1Ty(Context), 0);
+ return pImpl->TheFalseVal;
}
@@ -297,14 +355,14 @@ ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt& V) {
return Slot;
}
-Constant* ConstantInt::get(const Type* Ty, uint64_t V, bool isSigned) {
+Constant *ConstantInt::get(const Type* Ty, uint64_t V, bool isSigned) {
Constant *C = get(cast<IntegerType>(Ty->getScalarType()),
V, isSigned);
// For vectors, broadcast the value.
if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
- return ConstantVector::get(
- std::vector<Constant *>(VTy->getNumElements(), C));
+ return ConstantVector::get(SmallVector<Constant*,
+ 16>(VTy->getNumElements(), C));
return C;
}
@@ -322,7 +380,7 @@ Constant *ConstantInt::getSigned(const Type *Ty, int64_t V) {
return get(Ty, V, true);
}
-Constant* ConstantInt::get(const Type* Ty, const APInt& V) {
+Constant *ConstantInt::get(const Type* Ty, const APInt& V) {
ConstantInt *C = get(Ty->getContext(), V);
assert(C->getType() == Ty->getScalarType() &&
"ConstantInt type doesn't match the type implied by its value!");
@@ -330,7 +388,7 @@ Constant* ConstantInt::get(const Type* Ty, const APInt& V) {
// For vectors, broadcast the value.
if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
return ConstantVector::get(
- std::vector<Constant *>(VTy->getNumElements(), C));
+ SmallVector<Constant *, 16>(VTy->getNumElements(), C));
return C;
}
@@ -361,7 +419,7 @@ static const fltSemantics *TypeToFloatSemantics(const Type *Ty) {
/// get() - This returns a constant fp for the specified value in the
/// specified type. This should only be used for simple constant values like
/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
-Constant* ConstantFP::get(const Type* Ty, double V) {
+Constant *ConstantFP::get(const Type* Ty, double V) {
LLVMContext &Context = Ty->getContext();
APFloat FV(V);
@@ -373,13 +431,13 @@ Constant* ConstantFP::get(const Type* Ty, double V) {
// For vectors, broadcast the value.
if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
return ConstantVector::get(
- std::vector<Constant *>(VTy->getNumElements(), C));
+ SmallVector<Constant *, 16>(VTy->getNumElements(), C));
return C;
}
-Constant* ConstantFP::get(const Type* Ty, StringRef Str) {
+Constant *ConstantFP::get(const Type* Ty, StringRef Str) {
LLVMContext &Context = Ty->getContext();
APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
@@ -388,7 +446,7 @@ Constant* ConstantFP::get(const Type* Ty, StringRef Str) {
// For vectors, broadcast the value.
if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
return ConstantVector::get(
- std::vector<Constant *>(VTy->getNumElements(), C));
+ SmallVector<Constant *, 16>(VTy->getNumElements(), C));
return C;
}
@@ -402,12 +460,12 @@ ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) {
}
-Constant* ConstantFP::getZeroValueForNegation(const Type* Ty) {
+Constant *ConstantFP::getZeroValueForNegation(const Type* Ty) {
if (const VectorType *PTy = dyn_cast<VectorType>(Ty))
if (PTy->getElementType()->isFloatingPointTy()) {
- std::vector<Constant*> zeros(PTy->getNumElements(),
+ SmallVector<Constant*, 16> zeros(PTy->getNumElements(),
getNegativeZero(PTy->getElementType()));
- return ConstantVector::get(PTy, zeros);
+ return ConstantVector::get(zeros);
}
if (Ty->isFloatingPointTy())
@@ -510,7 +568,7 @@ Constant *ConstantArray::get(const ArrayType *Ty,
}
-Constant* ConstantArray::get(const ArrayType* T, Constant* const* Vals,
+Constant *ConstantArray::get(const ArrayType* T, Constant *const* Vals,
unsigned NumVals) {
// FIXME: make this the primary ctor method.
return get(T, std::vector<Constant*>(Vals, Vals+NumVals));
@@ -522,7 +580,7 @@ Constant* ConstantArray::get(const ArrayType* T, Constant* const* Vals,
/// Otherwise, the length parameter specifies how much of the string to use
/// and it won't be null terminated.
///
-Constant* ConstantArray::get(LLVMContext &Context, StringRef Str,
+Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
bool AddNull) {
std::vector<Constant*> ElementVals;
ElementVals.reserve(Str.size() + size_t(AddNull));
@@ -558,7 +616,7 @@ ConstantStruct::ConstantStruct(const StructType *T,
}
// ConstantStruct accessors.
-Constant* ConstantStruct::get(const StructType* T,
+Constant *ConstantStruct::get(const StructType* T,
const std::vector<Constant*>& V) {
LLVMContextImpl* pImpl = T->getContext().pImpl;
@@ -570,7 +628,7 @@ Constant* ConstantStruct::get(const StructType* T,
return ConstantAggregateZero::get(T);
}
-Constant* ConstantStruct::get(LLVMContext &Context,
+Constant *ConstantStruct::get(LLVMContext &Context,
const std::vector<Constant*>& V, bool packed) {
std::vector<const Type*> StructEls;
StructEls.reserve(V.size());
@@ -579,8 +637,8 @@ Constant* ConstantStruct::get(LLVMContext &Context,
return get(StructType::get(Context, StructEls, packed), V);
}
-Constant* ConstantStruct::get(LLVMContext &Context,
- Constant* const *Vals, unsigned NumVals,
+Constant *ConstantStruct::get(LLVMContext &Context,
+ Constant *const *Vals, unsigned NumVals,
bool Packed) {
// FIXME: make this the primary ctor method.
return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
@@ -592,23 +650,22 @@ ConstantVector::ConstantVector(const VectorType *T,
OperandTraits<ConstantVector>::op_end(this) - V.size(),
V.size()) {
Use *OL = OperandList;
- for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
- I != E; ++I, ++OL) {
- Constant *C = *I;
- assert(C->getType() == T->getElementType() &&
+ for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+ I != E; ++I, ++OL) {
+ Constant *C = *I;
+ assert(C->getType() == T->getElementType() &&
"Initializer for vector element doesn't match vector element type!");
*OL = C;
}
}
// ConstantVector accessors.
-Constant* ConstantVector::get(const VectorType* T,
- const std::vector<Constant*>& V) {
- assert(!V.empty() && "Vectors can't be empty");
- LLVMContext &Context = T->getContext();
- LLVMContextImpl *pImpl = Context.pImpl;
-
- // If this is an all-undef or alll-zero vector, return a
+Constant *ConstantVector::get(const VectorType *T,
+ const std::vector<Constant*> &V) {
+ assert(!V.empty() && "Vectors can't be empty");
+ LLVMContextImpl *pImpl = T->getContext().pImpl;
+
+ // If this is an all-undef or all-zero vector, return a
// ConstantAggregateZero or UndefValue.
Constant *C = V[0];
bool isZero = C->isNullValue();
@@ -630,61 +687,10 @@ Constant* ConstantVector::get(const VectorType* T,
return pImpl->VectorConstants.getOrCreate(T, V);
}
-Constant* ConstantVector::get(const std::vector<Constant*>& V) {
- assert(!V.empty() && "Cannot infer type if V is empty");
- return get(VectorType::get(V.front()->getType(),V.size()), V);
-}
-
-Constant* ConstantVector::get(Constant* const* Vals, unsigned NumVals) {
+Constant *ConstantVector::get(ArrayRef<Constant*> V) {
// FIXME: make this the primary ctor method.
- return get(std::vector<Constant*>(Vals, Vals+NumVals));
-}
-
-Constant* ConstantExpr::getNSWNeg(Constant* C) {
- assert(C->getType()->isIntOrIntVectorTy() &&
- "Cannot NEG a nonintegral value!");
- return getNSWSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
-}
-
-Constant* ConstantExpr::getNUWNeg(Constant* C) {
- assert(C->getType()->isIntOrIntVectorTy() &&
- "Cannot NEG a nonintegral value!");
- return getNUWSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
-}
-
-Constant* ConstantExpr::getNSWAdd(Constant* C1, Constant* C2) {
- return getTy(C1->getType(), Instruction::Add, C1, C2,
- OverflowingBinaryOperator::NoSignedWrap);
-}
-
-Constant* ConstantExpr::getNUWAdd(Constant* C1, Constant* C2) {
- return getTy(C1->getType(), Instruction::Add, C1, C2,
- OverflowingBinaryOperator::NoUnsignedWrap);
-}
-
-Constant* ConstantExpr::getNSWSub(Constant* C1, Constant* C2) {
- return getTy(C1->getType(), Instruction::Sub, C1, C2,
- OverflowingBinaryOperator::NoSignedWrap);
-}
-
-Constant* ConstantExpr::getNUWSub(Constant* C1, Constant* C2) {
- return getTy(C1->getType(), Instruction::Sub, C1, C2,
- OverflowingBinaryOperator::NoUnsignedWrap);
-}
-
-Constant* ConstantExpr::getNSWMul(Constant* C1, Constant* C2) {
- return getTy(C1->getType(), Instruction::Mul, C1, C2,
- OverflowingBinaryOperator::NoSignedWrap);
-}
-
-Constant* ConstantExpr::getNUWMul(Constant* C1, Constant* C2) {
- return getTy(C1->getType(), Instruction::Mul, C1, C2,
- OverflowingBinaryOperator::NoUnsignedWrap);
-}
-
-Constant* ConstantExpr::getExactSDiv(Constant* C1, Constant* C2) {
- return getTy(C1->getType(), Instruction::SDiv, C1, C2,
- SDivOperator::IsExact);
+ assert(!V.empty() && "Vectors cannot be empty");
+ return get(VectorType::get(V.front()->getType(), V.size()), V.vec());
}
// Utility function for determining if a ConstantExpr is a CastOp or not. This
@@ -812,7 +818,7 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
/// operands replaced with the specified values. The specified operands must
/// match count and type with the existing ones.
Constant *ConstantExpr::
-getWithOperands(Constant* const *Ops, unsigned NumOps) const {
+getWithOperands(Constant *const *Ops, unsigned NumOps) const {
assert(NumOps == getNumOperands() && "Operand count mismatch!");
bool AnyChange = false;
for (unsigned i = 0; i != NumOps; ++i) {
@@ -1034,7 +1040,7 @@ bool ConstantVector::isAllOnesValue() const {
/// getSplatValue - If this is a splat constant, where all of the
/// elements have the same value, return that value. Otherwise return null.
-Constant *ConstantVector::getSplatValue() {
+Constant *ConstantVector::getSplatValue() const {
// Check out first element.
Constant *Elt = getOperand(0);
// Then make sure all remaining elements point to the same value.
@@ -1241,7 +1247,7 @@ Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
if (SrcBits == DstBits)
return C; // Avoid a useless cast
Instruction::CastOps opcode =
- (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt);
+ (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt);
return getCast(opcode, C, Ty);
}
@@ -1482,7 +1488,7 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
return getTy(C1->getType(), Opcode, C1, C2, Flags);
}
-Constant* ConstantExpr::getSizeOf(const Type* Ty) {
+Constant *ConstantExpr::getSizeOf(const Type* Ty) {
// sizeof is implemented as: (i64) gep (Ty*)null, 1
// Note that a non-inbounds gep is used, as null isn't within any object.
Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
@@ -1492,7 +1498,7 @@ Constant* ConstantExpr::getSizeOf(const Type* Ty) {
Type::getInt64Ty(Ty->getContext()));
}
-Constant* ConstantExpr::getAlignOf(const Type* Ty) {
+Constant *ConstantExpr::getAlignOf(const Type* Ty) {
// alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
// Note that a non-inbounds gep is used, as null isn't within any object.
const Type *AligningTy = StructType::get(Ty->getContext(),
@@ -1506,12 +1512,12 @@ Constant* ConstantExpr::getAlignOf(const Type* Ty) {
Type::getInt64Ty(Ty->getContext()));
}
-Constant* ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
+Constant *ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()),
FieldNo));
}
-Constant* ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
+Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
// offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
// Note that a non-inbounds gep is used, as null isn't within any object.
Constant *GEPIdx[] = {
@@ -1547,44 +1553,17 @@ Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C,
return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
}
+template<typename IndexTy>
Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
- Value* const *Idxs,
- unsigned NumIdx) {
- assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs,
- Idxs+NumIdx) ==
- cast<PointerType>(ReqTy)->getElementType() &&
- "GEP indices invalid!");
-
- if (Constant *FC = ConstantFoldGetElementPtr(C, /*inBounds=*/false,
- (Constant**)Idxs, NumIdx))
- return FC; // Fold a few common cases...
-
- assert(C->getType()->isPointerTy() &&
- "Non-pointer type for constant GetElementPtr expression");
- // Look up the constant in the table first to ensure uniqueness
- std::vector<Constant*> ArgVec;
- ArgVec.reserve(NumIdx+1);
- ArgVec.push_back(C);
- for (unsigned i = 0; i != NumIdx; ++i)
- ArgVec.push_back(cast<Constant>(Idxs[i]));
- const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec);
-
- LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
- return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
-}
-
-Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy,
- Constant *C,
- Value *const *Idxs,
- unsigned NumIdx) {
+ IndexTy const *Idxs,
+ unsigned NumIdx, bool InBounds) {
assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs,
Idxs+NumIdx) ==
cast<PointerType>(ReqTy)->getElementType() &&
"GEP indices invalid!");
- if (Constant *FC = ConstantFoldGetElementPtr(C, /*inBounds=*/true,
- (Constant**)Idxs, NumIdx))
- return FC; // Fold a few common cases...
+ if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs, NumIdx))
+ return FC; // Fold a few common cases.
assert(C->getType()->isPointerTy() &&
"Non-pointer type for constant GetElementPtr expression");
@@ -1595,42 +1574,31 @@ Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy,
for (unsigned i = 0; i != NumIdx; ++i)
ArgVec.push_back(cast<Constant>(Idxs[i]));
const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
- GEPOperator::IsInBounds);
+ InBounds ? GEPOperator::IsInBounds : 0);
LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
}
-Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
- unsigned NumIdx) {
+template<typename IndexTy>
+Constant *ConstantExpr::getGetElementPtrImpl(Constant *C, IndexTy const *Idxs,
+ unsigned NumIdx, bool InBounds) {
// Get the result type of the getelementptr!
const Type *Ty =
GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
assert(Ty && "GEP indices invalid!");
unsigned As = cast<PointerType>(C->getType())->getAddressSpace();
- return getGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx);
+ return getGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx,InBounds);
}
-Constant *ConstantExpr::getInBoundsGetElementPtr(Constant *C,
- Value* const *Idxs,
- unsigned NumIdx) {
- // Get the result type of the getelementptr!
- const Type *Ty =
- GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
- assert(Ty && "GEP indices invalid!");
- unsigned As = cast<PointerType>(C->getType())->getAddressSpace();
- return getInBoundsGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx);
-}
-
-Constant *ConstantExpr::getGetElementPtr(Constant *C, Constant* const *Idxs,
- unsigned NumIdx) {
- return getGetElementPtr(C, (Value* const *)Idxs, NumIdx);
+Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
+ unsigned NumIdx, bool InBounds) {
+ return getGetElementPtrImpl(C, Idxs, NumIdx, InBounds);
}
-Constant *ConstantExpr::getInBoundsGetElementPtr(Constant *C,
- Constant* const *Idxs,
- unsigned NumIdx) {
- return getInBoundsGetElementPtr(C, (Value* const *)Idxs, NumIdx);
+Constant *ConstantExpr::getGetElementPtr(Constant *C, Constant *const *Idxs,
+ unsigned NumIdx, bool InBounds) {
+ return getGetElementPtrImpl(C, Idxs, NumIdx, InBounds);
}
Constant *
@@ -1804,98 +1772,111 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg,
return getExtractValueTy(ReqTy, Agg, IdxList, NumIdx);
}
-Constant* ConstantExpr::getNeg(Constant* C) {
+Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NEG a nonintegral value!");
- return get(Instruction::Sub,
- ConstantFP::getZeroValueForNegation(C->getType()),
- C);
+ return getSub(ConstantFP::getZeroValueForNegation(C->getType()),
+ C, HasNUW, HasNSW);
}
-Constant* ConstantExpr::getFNeg(Constant* C) {
+Constant *ConstantExpr::getFNeg(Constant *C) {
assert(C->getType()->isFPOrFPVectorTy() &&
"Cannot FNEG a non-floating-point value!");
- return get(Instruction::FSub,
- ConstantFP::getZeroValueForNegation(C->getType()),
- C);
+ return getFSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
}
-Constant* ConstantExpr::getNot(Constant* C) {
+Constant *ConstantExpr::getNot(Constant *C) {
assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NOT a nonintegral value!");
return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
}
-Constant* ConstantExpr::getAdd(Constant* C1, Constant* C2) {
- return get(Instruction::Add, C1, C2);
+Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2,
+ bool HasNUW, bool HasNSW) {
+ unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+ (HasNSW ? OverflowingBinaryOperator::NoSignedWrap : 0);
+ return get(Instruction::Add, C1, C2, Flags);
}
-Constant* ConstantExpr::getFAdd(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
return get(Instruction::FAdd, C1, C2);
}
-Constant* ConstantExpr::getSub(Constant* C1, Constant* C2) {
- return get(Instruction::Sub, C1, C2);
+Constant *ConstantExpr::getSub(Constant *C1, Constant *C2,
+ bool HasNUW, bool HasNSW) {
+ unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+ (HasNSW ? OverflowingBinaryOperator::NoSignedWrap : 0);
+ return get(Instruction::Sub, C1, C2, Flags);
}
-Constant* ConstantExpr::getFSub(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
return get(Instruction::FSub, C1, C2);
}
-Constant* ConstantExpr::getMul(Constant* C1, Constant* C2) {
- return get(Instruction::Mul, C1, C2);
+Constant *ConstantExpr::getMul(Constant *C1, Constant *C2,
+ bool HasNUW, bool HasNSW) {
+ unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+ (HasNSW ? OverflowingBinaryOperator::NoSignedWrap : 0);
+ return get(Instruction::Mul, C1, C2, Flags);
}
-Constant* ConstantExpr::getFMul(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
return get(Instruction::FMul, C1, C2);
}
-Constant* ConstantExpr::getUDiv(Constant* C1, Constant* C2) {
- return get(Instruction::UDiv, C1, C2);
+Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2, bool isExact) {
+ return get(Instruction::UDiv, C1, C2,
+ isExact ? PossiblyExactOperator::IsExact : 0);
}
-Constant* ConstantExpr::getSDiv(Constant* C1, Constant* C2) {
- return get(Instruction::SDiv, C1, C2);
+Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2, bool isExact) {
+ return get(Instruction::SDiv, C1, C2,
+ isExact ? PossiblyExactOperator::IsExact : 0);
}
-Constant* ConstantExpr::getFDiv(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) {
return get(Instruction::FDiv, C1, C2);
}
-Constant* ConstantExpr::getURem(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) {
return get(Instruction::URem, C1, C2);
}
-Constant* ConstantExpr::getSRem(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) {
return get(Instruction::SRem, C1, C2);
}
-Constant* ConstantExpr::getFRem(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) {
return get(Instruction::FRem, C1, C2);
}
-Constant* ConstantExpr::getAnd(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
return get(Instruction::And, C1, C2);
}
-Constant* ConstantExpr::getOr(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getOr(Constant *C1, Constant *C2) {
return get(Instruction::Or, C1, C2);
}
-Constant* ConstantExpr::getXor(Constant* C1, Constant* C2) {
+Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
return get(Instruction::Xor, C1, C2);
}
-Constant* ConstantExpr::getShl(Constant* C1, Constant* C2) {
- return get(Instruction::Shl, C1, C2);
+Constant *ConstantExpr::getShl(Constant *C1, Constant *C2,
+ bool HasNUW, bool HasNSW) {
+ unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+ (HasNSW ? OverflowingBinaryOperator::NoSignedWrap : 0);
+ return get(Instruction::Shl, C1, C2, Flags);
}
-Constant* ConstantExpr::getLShr(Constant* C1, Constant* C2) {
- return get(Instruction::LShr, C1, C2);
+Constant *ConstantExpr::getLShr(Constant *C1, Constant *C2, bool isExact) {
+ return get(Instruction::LShr, C1, C2,
+ isExact ? PossiblyExactOperator::IsExact : 0);
}
-Constant* ConstantExpr::getAShr(Constant* C1, Constant* C2) {
- return get(Instruction::AShr, C1, C2);
+Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) {
+ return get(Instruction::AShr, C1, C2,
+ isExact ? PossiblyExactOperator::IsExact : 0);
}
// destroyConstant - Remove the constant from the constant table...
@@ -2127,7 +2108,8 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
Indices.push_back(Val);
}
Replacement = ConstantExpr::getGetElementPtr(Pointer,
- &Indices[0], Indices.size());
+ &Indices[0], Indices.size(),
+ cast<GEPOperator>(this)->isInBounds());
} else if (getOpcode() == Instruction::ExtractValue) {
Constant *Agg = getOperand(0);
if (Agg == From) Agg = To;
diff --git a/contrib/llvm/lib/VMCore/ConstantsContext.h b/contrib/llvm/lib/VMCore/ConstantsContext.h
index 1c04c3e..ffc673f 100644
--- a/contrib/llvm/lib/VMCore/ConstantsContext.h
+++ b/contrib/llvm/lib/VMCore/ConstantsContext.h
@@ -239,54 +239,64 @@ struct CompareConstantExpr : public ConstantExpr {
};
template <>
-struct OperandTraits<UnaryConstantExpr> : public FixedNumOperandTraits<1> {
+struct OperandTraits<UnaryConstantExpr> :
+ public FixedNumOperandTraits<UnaryConstantExpr, 1> {
};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryConstantExpr, Value)
template <>
-struct OperandTraits<BinaryConstantExpr> : public FixedNumOperandTraits<2> {
+struct OperandTraits<BinaryConstantExpr> :
+ public FixedNumOperandTraits<BinaryConstantExpr, 2> {
};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
template <>
-struct OperandTraits<SelectConstantExpr> : public FixedNumOperandTraits<3> {
+struct OperandTraits<SelectConstantExpr> :
+ public FixedNumOperandTraits<SelectConstantExpr, 3> {
};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
template <>
-struct OperandTraits<ExtractElementConstantExpr> : public FixedNumOperandTraits<2> {
+struct OperandTraits<ExtractElementConstantExpr> :
+ public FixedNumOperandTraits<ExtractElementConstantExpr, 2> {
};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
template <>
-struct OperandTraits<InsertElementConstantExpr> : public FixedNumOperandTraits<3> {
+struct OperandTraits<InsertElementConstantExpr> :
+ public FixedNumOperandTraits<InsertElementConstantExpr, 3> {
};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementConstantExpr, Value)
template <>
-struct OperandTraits<ShuffleVectorConstantExpr> : public FixedNumOperandTraits<3> {
+struct OperandTraits<ShuffleVectorConstantExpr> :
+ public FixedNumOperandTraits<ShuffleVectorConstantExpr, 3> {
};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
template <>
-struct OperandTraits<ExtractValueConstantExpr> : public FixedNumOperandTraits<1> {
+struct OperandTraits<ExtractValueConstantExpr> :
+ public FixedNumOperandTraits<ExtractValueConstantExpr, 1> {
};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
template <>
-struct OperandTraits<InsertValueConstantExpr> : public FixedNumOperandTraits<2> {
+struct OperandTraits<InsertValueConstantExpr> :
+ public FixedNumOperandTraits<InsertValueConstantExpr, 2> {
};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
template <>
-struct OperandTraits<GetElementPtrConstantExpr> : public VariadicOperandTraits<1> {
+struct OperandTraits<GetElementPtrConstantExpr> :
+ public VariadicOperandTraits<GetElementPtrConstantExpr, 1> {
};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrConstantExpr, Value)
template <>
-struct OperandTraits<CompareConstantExpr> : public FixedNumOperandTraits<2> {
+struct OperandTraits<CompareConstantExpr> :
+ public FixedNumOperandTraits<CompareConstantExpr, 2> {
};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
diff --git a/contrib/llvm/lib/VMCore/Core.cpp b/contrib/llvm/lib/VMCore/Core.cpp
index 5aad19d..35c3a2e 100644
--- a/contrib/llvm/lib/VMCore/Core.cpp
+++ b/contrib/llvm/lib/VMCore/Core.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the C bindings for libLLVMCore.a, which implements
-// the LLVM intermediate representation.
+// This file implements the common infrastructure (including the C bindings)
+// for libLLVMCore.a, which implements the LLVM intermediate representation.
//
//===----------------------------------------------------------------------===//
@@ -28,12 +28,24 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
#include <cassert>
#include <cstdlib>
#include <cstring>
using namespace llvm;
+void llvm::initializeCore(PassRegistry &Registry) {
+ initializeDominatorTreePass(Registry);
+ initializePrintModulePassPass(Registry);
+ initializePrintFunctionPassPass(Registry);
+ initializeVerifierPass(Registry);
+ initializePreVerifierPass(Registry);
+}
+
+void LLVMInitializeCore(LLVMPassRegistryRef R) {
+ initializeCore(*unwrap(R));
+}
/*===-- Error handling ----------------------------------------------------===*/
@@ -116,6 +128,10 @@ LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
return wrap(unwrap(M)->getTypeByName(Name));
}
+const char *LLVMGetTypeName(LLVMModuleRef M, LLVMTypeRef Ty) {
+ return unwrap(M)->getTypeName(unwrap(Ty)).c_str();
+}
+
void LLVMDumpModule(LLVMModuleRef M) {
unwrap(M)->dump();
}
@@ -126,6 +142,12 @@ void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) {
}
+/*--.. Operations on module contexts ......................................--*/
+LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) {
+ return wrap(&unwrap(M)->getContext());
+}
+
+
/*===-- Operations on types -----------------------------------------------===*/
/*--.. Operations on all types (mostly) ....................................--*/
@@ -164,6 +186,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
return LLVMOpaqueTypeKind;
case Type::VectorTyID:
return LLVMVectorTypeKind;
+ case Type::X86_MMXTyID:
+ return LLVMX86_MMXTypeKind;
}
}
@@ -232,6 +256,9 @@ LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C) {
LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) {
return (LLVMTypeRef) Type::getPPC_FP128Ty(*unwrap(C));
}
+LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
+ return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
+}
LLVMTypeRef LLVMFloatType(void) {
return LLVMFloatTypeInContext(LLVMGetGlobalContext());
@@ -248,6 +275,9 @@ LLVMTypeRef LLVMFP128Type(void) {
LLVMTypeRef LLVMPPCFP128Type(void) {
return LLVMPPCFP128TypeInContext(LLVMGetGlobalContext());
}
+LLVMTypeRef LLVMX86MMXType(void) {
+ return LLVMX86MMXTypeInContext(LLVMGetGlobalContext());
+}
/*--.. Operations on function types ........................................--*/
@@ -527,6 +557,14 @@ LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), N, SignExtend != 0));
}
+LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
+ unsigned NumWords,
+ const uint64_t Words[]) {
+ IntegerType *Ty = unwrap<IntegerType>(IntTy);
+ return wrap(ConstantInt::get(Ty->getContext(),
+ APInt(Ty->getBitWidth(), NumWords, Words)));
+}
+
LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char Str[],
uint8_t Radix) {
return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str),
@@ -567,7 +605,7 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
LLVMBool DontNullTerminate) {
/* Inverted the sense of AddNull because ', 0)' is a
better mnemonic for null termination than ', 1)'. */
- return wrap(ConstantArray::get(*unwrap(C), std::string(Str, Length),
+ return wrap(ConstantArray::get(*unwrap(C), StringRef(Str, Length),
DontNullTerminate == 0));
}
LLVMValueRef LLVMConstStructInContext(LLVMContextRef C,
@@ -595,8 +633,8 @@ LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
Packed);
}
LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
- return wrap(ConstantVector::get(
- unwrap<Constant>(ScalarConstantVals, Size), Size));
+ return wrap(ConstantVector::get(ArrayRef<Constant*>(
+ unwrap<Constant>(ScalarConstantVals, Size), Size)));
}
/*--.. Constant expressions ................................................--*/
@@ -613,74 +651,62 @@ LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty) {
}
LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
- return wrap(ConstantExpr::getNeg(
- unwrap<Constant>(ConstantVal)));
+ return wrap(ConstantExpr::getNeg(unwrap<Constant>(ConstantVal)));
}
LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) {
- return wrap(ConstantExpr::getNSWNeg(
- unwrap<Constant>(ConstantVal)));
+ return wrap(ConstantExpr::getNSWNeg(unwrap<Constant>(ConstantVal)));
}
LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) {
- return wrap(ConstantExpr::getNUWNeg(
- unwrap<Constant>(ConstantVal)));
+ return wrap(ConstantExpr::getNUWNeg(unwrap<Constant>(ConstantVal)));
}
LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) {
- return wrap(ConstantExpr::getFNeg(
- unwrap<Constant>(ConstantVal)));
+ return wrap(ConstantExpr::getFNeg(unwrap<Constant>(ConstantVal)));
}
LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal) {
- return wrap(ConstantExpr::getNot(
- unwrap<Constant>(ConstantVal)));
+ return wrap(ConstantExpr::getNot(unwrap<Constant>(ConstantVal)));
}
LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getAdd(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getAdd(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant,
LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getNSWAdd(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getNSWAdd(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant,
LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getNUWAdd(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getNUWAdd(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getFAdd(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getFAdd(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getSub(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getSub(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant,
LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getNSWSub(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getNSWSub(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant,
LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getNUWSub(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getNUWSub(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
@@ -690,89 +716,75 @@ LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
}
LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getMul(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getMul(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant,
LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getNSWMul(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getNSWMul(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getNUWMul(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getNUWMul(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getFMul(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getFMul(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getUDiv(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getUDiv(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getSDiv(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant,
LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getExactSDiv(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getExactSDiv(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getFDiv(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getFDiv(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getURem(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getURem(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getSRem(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getSRem(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getFRem(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getFRem(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getAnd(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getOr(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getOr(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getXor(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getXor(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
@@ -791,27 +803,23 @@ LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate,
}
LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getShl(
- unwrap<Constant>(LHSConstant),
- unwrap<Constant>(RHSConstant)));
+ return wrap(ConstantExpr::getShl(unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getLShr(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getLShr(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
- return wrap(ConstantExpr::getAShr(
- unwrap<Constant>(LHSConstant),
+ return wrap(ConstantExpr::getAShr(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
}
LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
LLVMValueRef *ConstantIndices, unsigned NumIndices) {
- return wrap(ConstantExpr::getGetElementPtr(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
unwrap<Constant>(ConstantIndices,
NumIndices),
NumIndices));
@@ -826,38 +834,32 @@ LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
}
LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getTrunc(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getTrunc(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getSExt(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getSExt(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getZExt(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getZExt(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getFPTrunc(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getFPTrunc(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getFPExtend(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getFPExtend(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getUIToFP(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getUIToFP(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
@@ -872,92 +874,78 @@ LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
}
LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getFPToSI(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getFPToSI(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getPtrToInt(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getPtrToInt(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getIntToPtr(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getIntToPtr(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getBitCast(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getBitCast(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getZExtOrBitCast(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getZExtOrBitCast(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal,
LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getSExtOrBitCast(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getSExtOrBitCast(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getTruncOrBitCast(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getTruncOrBitCast(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getPointerCast(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getPointerCast(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
LLVMBool isSigned) {
- return wrap(ConstantExpr::getIntegerCast(
- unwrap<Constant>(ConstantVal),
- unwrap(ToType),
- isSigned));
+ return wrap(ConstantExpr::getIntegerCast(unwrap<Constant>(ConstantVal),
+ unwrap(ToType), isSigned));
}
LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
- return wrap(ConstantExpr::getFPCast(
- unwrap<Constant>(ConstantVal),
+ return wrap(ConstantExpr::getFPCast(unwrap<Constant>(ConstantVal),
unwrap(ToType)));
}
LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
LLVMValueRef ConstantIfTrue,
LLVMValueRef ConstantIfFalse) {
- return wrap(ConstantExpr::getSelect(
- unwrap<Constant>(ConstantCondition),
+ return wrap(ConstantExpr::getSelect(unwrap<Constant>(ConstantCondition),
unwrap<Constant>(ConstantIfTrue),
unwrap<Constant>(ConstantIfFalse)));
}
LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
LLVMValueRef IndexConstant) {
- return wrap(ConstantExpr::getExtractElement(
- unwrap<Constant>(VectorConstant),
+ return wrap(ConstantExpr::getExtractElement(unwrap<Constant>(VectorConstant),
unwrap<Constant>(IndexConstant)));
}
LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
LLVMValueRef ElementValueConstant,
LLVMValueRef IndexConstant) {
- return wrap(ConstantExpr::getInsertElement(
- unwrap<Constant>(VectorConstant),
+ return wrap(ConstantExpr::getInsertElement(unwrap<Constant>(VectorConstant),
unwrap<Constant>(ElementValueConstant),
unwrap<Constant>(IndexConstant)));
}
@@ -965,24 +953,21 @@ LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
LLVMValueRef VectorBConstant,
LLVMValueRef MaskConstant) {
- return wrap(ConstantExpr::getShuffleVector(
- unwrap<Constant>(VectorAConstant),
+ return wrap(ConstantExpr::getShuffleVector(unwrap<Constant>(VectorAConstant),
unwrap<Constant>(VectorBConstant),
unwrap<Constant>(MaskConstant)));
}
LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
unsigned NumIdx) {
- return wrap(ConstantExpr::getExtractValue(
- unwrap<Constant>(AggConstant),
+ return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
IdxList, NumIdx));
}
LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
LLVMValueRef ElementValueConstant,
unsigned *IdxList, unsigned NumIdx) {
- return wrap(ConstantExpr::getInsertValue(
- unwrap<Constant>(AggConstant),
+ return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
unwrap<Constant>(ElementValueConstant),
IdxList, NumIdx));
}
@@ -2186,25 +2171,27 @@ LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(
LLVMMemoryBufferRef *OutMemBuf,
char **OutMessage) {
- std::string Error;
- if (MemoryBuffer *MB = MemoryBuffer::getFile(Path, &Error)) {
- *OutMemBuf = wrap(MB);
+ OwningPtr<MemoryBuffer> MB;
+ error_code ec;
+ if (!(ec = MemoryBuffer::getFile(Path, MB))) {
+ *OutMemBuf = wrap(MB.take());
return 0;
}
-
- *OutMessage = strdup(Error.c_str());
+
+ *OutMessage = strdup(ec.message().c_str());
return 1;
}
LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
char **OutMessage) {
- std::string Error;
- if (MemoryBuffer *MB = MemoryBuffer::getSTDIN(&Error)) {
- *OutMemBuf = wrap(MB);
+ OwningPtr<MemoryBuffer> MB;
+ error_code ec;
+ if (!(ec = MemoryBuffer::getSTDIN(MB))) {
+ *OutMemBuf = wrap(MB.take());
return 0;
}
- *OutMessage = strdup(Error.c_str());
+ *OutMessage = strdup(ec.message().c_str());
return 1;
}
@@ -2212,6 +2199,11 @@ void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
delete unwrap(MemBuf);
}
+/*===-- Pass Registry -----------------------------------------------------===*/
+
+LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void) {
+ return wrap(PassRegistry::getPassRegistry());
+}
/*===-- Pass Manager ------------------------------------------------------===*/
diff --git a/contrib/llvm/lib/VMCore/Dominators.cpp b/contrib/llvm/lib/VMCore/Dominators.cpp
index f3dad82..c374b06 100644
--- a/contrib/llvm/lib/VMCore/Dominators.cpp
+++ b/contrib/llvm/lib/VMCore/Dominators.cpp
@@ -19,10 +19,10 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/Instructions.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/CommandLine.h"
@@ -44,7 +44,7 @@ VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
//===----------------------------------------------------------------------===//
//
// Provide public access to DominatorTree information. Implementation details
-// can be found in DominatorCalculation.h.
+// can be found in DominatorInternals.h.
//
//===----------------------------------------------------------------------===//
@@ -53,7 +53,7 @@ TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>);
char DominatorTree::ID = 0;
INITIALIZE_PASS(DominatorTree, "domtree",
- "Dominator Tree Construction", true, true);
+ "Dominator Tree Construction", true, true)
bool DominatorTree::runOnFunction(Function &F) {
DT->recalculate(F);
@@ -67,7 +67,14 @@ void DominatorTree::verifyAnalysis() const {
DominatorTree OtherDT;
OtherDT.getBase().recalculate(F);
- assert(!compare(OtherDT) && "Invalid DominatorTree info!");
+ if (compare(OtherDT)) {
+ errs() << "DominatorTree is not up to date! Computed:\n";
+ print(errs());
+
+ errs() << "\nActual:\n";
+ OtherDT.print(errs());
+ abort();
+ }
}
void DominatorTree::print(raw_ostream &OS, const Module *) const {
@@ -98,263 +105,3 @@ bool DominatorTree::dominates(const Instruction *A, const Instruction *B) const{
return &*I == A;
}
-
-
-
-//===----------------------------------------------------------------------===//
-// DominanceFrontier Implementation
-//===----------------------------------------------------------------------===//
-
-char DominanceFrontier::ID = 0;
-INITIALIZE_PASS(DominanceFrontier, "domfrontier",
- "Dominance Frontier Construction", true, true);
-
-void DominanceFrontier::verifyAnalysis() const {
- if (!VerifyDomInfo) return;
-
- DominatorTree &DT = getAnalysis<DominatorTree>();
-
- DominanceFrontier OtherDF;
- const std::vector<BasicBlock*> &DTRoots = DT.getRoots();
- OtherDF.calculate(DT, DT.getNode(DTRoots[0]));
- assert(!compare(OtherDF) && "Invalid DominanceFrontier info!");
-}
-
-// NewBB is split and now it has one successor. Update dominance frontier to
-// reflect this change.
-void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
- assert(NewBB->getTerminator()->getNumSuccessors() == 1 &&
- "NewBB should have a single successor!");
- BasicBlock *NewBBSucc = NewBB->getTerminator()->getSuccessor(0);
-
- // NewBBSucc inherits original NewBB frontier.
- DominanceFrontier::iterator NewBBI = find(NewBB);
- if (NewBBI != end())
- addBasicBlock(NewBBSucc, NewBBI->second);
-
- // If NewBB dominates NewBBSucc, then DF(NewBB) is now going to be the
- // DF(NewBBSucc) without the stuff that the new block does not dominate
- // a predecessor of.
- DominatorTree &DT = getAnalysis<DominatorTree>();
- DomTreeNode *NewBBNode = DT.getNode(NewBB);
- DomTreeNode *NewBBSuccNode = DT.getNode(NewBBSucc);
- if (DT.dominates(NewBBNode, NewBBSuccNode)) {
- DominanceFrontier::iterator DFI = find(NewBBSucc);
- if (DFI != end()) {
- DominanceFrontier::DomSetType Set = DFI->second;
- // Filter out stuff in Set that we do not dominate a predecessor of.
- for (DominanceFrontier::DomSetType::iterator SetI = Set.begin(),
- E = Set.end(); SetI != E;) {
- bool DominatesPred = false;
- for (pred_iterator PI = pred_begin(*SetI), E = pred_end(*SetI);
- PI != E; ++PI)
- if (DT.dominates(NewBBNode, DT.getNode(*PI))) {
- DominatesPred = true;
- break;
- }
- if (!DominatesPred)
- Set.erase(SetI++);
- else
- ++SetI;
- }
-
- if (NewBBI != end()) {
- for (DominanceFrontier::DomSetType::iterator SetI = Set.begin(),
- E = Set.end(); SetI != E; ++SetI) {
- BasicBlock *SB = *SetI;
- addToFrontier(NewBBI, SB);
- }
- } else
- addBasicBlock(NewBB, Set);
- }
-
- } else {
- // DF(NewBB) is {NewBBSucc} because NewBB does not strictly dominate
- // NewBBSucc, but it does dominate itself (and there is an edge (NewBB ->
- // NewBBSucc)). NewBBSucc is the single successor of NewBB.
- DominanceFrontier::DomSetType NewDFSet;
- NewDFSet.insert(NewBBSucc);
- addBasicBlock(NewBB, NewDFSet);
- }
-
- // Now update dominance frontiers which either used to contain NewBBSucc
- // or which now need to include NewBB.
-
- // Collect the set of blocks which dominate a predecessor of NewBB or
- // NewSuccBB and which don't dominate both. This is an initial
- // approximation of the blocks whose dominance frontiers will need updates.
- SmallVector<DomTreeNode *, 16> AllPredDoms;
-
- // Compute the block which dominates both NewBBSucc and NewBB. This is
- // the immediate dominator of NewBBSucc unless NewBB dominates NewBBSucc.
- // The code below which climbs dominator trees will stop at this point,
- // because from this point up, dominance frontiers are unaffected.
- DomTreeNode *DominatesBoth = 0;
- if (NewBBSuccNode) {
- DominatesBoth = NewBBSuccNode->getIDom();
- if (DominatesBoth == NewBBNode)
- DominatesBoth = NewBBNode->getIDom();
- }
-
- // Collect the set of all blocks which dominate a predecessor of NewBB.
- SmallPtrSet<DomTreeNode *, 8> NewBBPredDoms;
- for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI)
- for (DomTreeNode *DTN = DT.getNode(*PI); DTN; DTN = DTN->getIDom()) {
- if (DTN == DominatesBoth)
- break;
- if (!NewBBPredDoms.insert(DTN))
- break;
- AllPredDoms.push_back(DTN);
- }
-
- // Collect the set of all blocks which dominate a predecessor of NewSuccBB.
- SmallPtrSet<DomTreeNode *, 8> NewBBSuccPredDoms;
- for (pred_iterator PI = pred_begin(NewBBSucc),
- E = pred_end(NewBBSucc); PI != E; ++PI)
- for (DomTreeNode *DTN = DT.getNode(*PI); DTN; DTN = DTN->getIDom()) {
- if (DTN == DominatesBoth)
- break;
- if (!NewBBSuccPredDoms.insert(DTN))
- break;
- if (!NewBBPredDoms.count(DTN))
- AllPredDoms.push_back(DTN);
- }
-
- // Visit all relevant dominance frontiers and make any needed updates.
- for (SmallVectorImpl<DomTreeNode *>::const_iterator I = AllPredDoms.begin(),
- E = AllPredDoms.end(); I != E; ++I) {
- DomTreeNode *DTN = *I;
- iterator DFI = find((*I)->getBlock());
-
- // Only consider nodes that have NewBBSucc in their dominator frontier.
- if (DFI == end() || !DFI->second.count(NewBBSucc)) continue;
-
- // If the block dominates a predecessor of NewBB but does not properly
- // dominate NewBB itself, add NewBB to its dominance frontier.
- if (NewBBPredDoms.count(DTN) &&
- !DT.properlyDominates(DTN, NewBBNode))
- addToFrontier(DFI, NewBB);
-
- // If the block does not dominate a predecessor of NewBBSucc or
- // properly dominates NewBBSucc itself, remove NewBBSucc from its
- // dominance frontier.
- if (!NewBBSuccPredDoms.count(DTN) ||
- DT.properlyDominates(DTN, NewBBSuccNode))
- removeFromFrontier(DFI, NewBBSucc);
- }
-}
-
-namespace {
- class DFCalculateWorkObject {
- public:
- DFCalculateWorkObject(BasicBlock *B, BasicBlock *P,
- const DomTreeNode *N,
- const DomTreeNode *PN)
- : currentBB(B), parentBB(P), Node(N), parentNode(PN) {}
- BasicBlock *currentBB;
- BasicBlock *parentBB;
- const DomTreeNode *Node;
- const DomTreeNode *parentNode;
- };
-}
-
-const DominanceFrontier::DomSetType &
-DominanceFrontier::calculate(const DominatorTree &DT,
- const DomTreeNode *Node) {
- BasicBlock *BB = Node->getBlock();
- DomSetType *Result = NULL;
-
- std::vector<DFCalculateWorkObject> workList;
- SmallPtrSet<BasicBlock *, 32> visited;
-
- workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
- do {
- DFCalculateWorkObject *currentW = &workList.back();
- assert (currentW && "Missing work object.");
-
- BasicBlock *currentBB = currentW->currentBB;
- BasicBlock *parentBB = currentW->parentBB;
- const DomTreeNode *currentNode = currentW->Node;
- const DomTreeNode *parentNode = currentW->parentNode;
- assert (currentBB && "Invalid work object. Missing current Basic Block");
- assert (currentNode && "Invalid work object. Missing current Node");
- DomSetType &S = Frontiers[currentBB];
-
- // Visit each block only once.
- if (visited.count(currentBB) == 0) {
- visited.insert(currentBB);
-
- // Loop over CFG successors to calculate DFlocal[currentNode]
- for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB);
- SI != SE; ++SI) {
- // Does Node immediately dominate this successor?
- if (DT[*SI]->getIDom() != currentNode)
- S.insert(*SI);
- }
- }
-
- // At this point, S is DFlocal. Now we union in DFup's of our children...
- // Loop through and visit the nodes that Node immediately dominates (Node's
- // children in the IDomTree)
- bool visitChild = false;
- for (DomTreeNode::const_iterator NI = currentNode->begin(),
- NE = currentNode->end(); NI != NE; ++NI) {
- DomTreeNode *IDominee = *NI;
- BasicBlock *childBB = IDominee->getBlock();
- if (visited.count(childBB) == 0) {
- workList.push_back(DFCalculateWorkObject(childBB, currentBB,
- IDominee, currentNode));
- visitChild = true;
- }
- }
-
- // If all children are visited or there is any child then pop this block
- // from the workList.
- if (!visitChild) {
-
- if (!parentBB) {
- Result = &S;
- break;
- }
-
- DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end();
- DomSetType &parentSet = Frontiers[parentBB];
- for (; CDFI != CDFE; ++CDFI) {
- if (!DT.properlyDominates(parentNode, DT[*CDFI]))
- parentSet.insert(*CDFI);
- }
- workList.pop_back();
- }
-
- } while (!workList.empty());
-
- return *Result;
-}
-
-void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
- for (const_iterator I = begin(), E = end(); I != E; ++I) {
- OS << " DomFrontier for BB ";
- if (I->first)
- WriteAsOperand(OS, I->first, false);
- else
- OS << " <<exit node>>";
- OS << " is:\t";
-
- const std::set<BasicBlock*> &BBs = I->second;
-
- for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
- I != E; ++I) {
- OS << ' ';
- if (*I)
- WriteAsOperand(OS, *I, false);
- else
- OS << "<<exit node>>";
- }
- OS << "\n";
- }
-}
-
-void DominanceFrontierBase::dump() const {
- print(dbgs());
-}
-
diff --git a/contrib/llvm/lib/VMCore/Function.cpp b/contrib/llvm/lib/VMCore/Function.cpp
index 8f94efc..00d1d78 100644
--- a/contrib/llvm/lib/VMCore/Function.cpp
+++ b/contrib/llvm/lib/VMCore/Function.cpp
@@ -20,8 +20,8 @@
#include "llvm/Support/LeakDetector.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/StringPool.h"
-#include "llvm/System/RWMutex.h"
-#include "llvm/System/Threading.h"
+#include "llvm/Support/RWMutex.h"
+#include "llvm/Support/Threading.h"
#include "SymbolTableListTraitsImpl.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringExtras.h"
@@ -227,19 +227,10 @@ void Function::dropAllReferences() {
for (iterator I = begin(), E = end(); I != E; ++I)
I->dropAllReferences();
- // Delete all basic blocks.
- while (!BasicBlocks.empty()) {
- // If there is still a reference to the block, it must be a 'blockaddress'
- // constant pointing to it. Just replace the BlockAddress with undef.
- BasicBlock *BB = BasicBlocks.begin();
- if (!BB->use_empty()) {
- BlockAddress *BA = cast<BlockAddress>(BB->use_back());
- BA->replaceAllUsesWith(UndefValue::get(BA->getType()));
- BA->destroyConstant();
- }
-
- BB->eraseFromParent();
- }
+ // Delete all basic blocks. They are now unused, except possibly by
+ // blockaddresses, but BasicBlock's destructor takes care of those.
+ while (!BasicBlocks.empty())
+ BasicBlocks.begin()->eraseFromParent();
}
void Function::addAttribute(unsigned i, Attributes attr) {
diff --git a/contrib/llvm/lib/VMCore/Globals.cpp b/contrib/llvm/lib/VMCore/Globals.cpp
index 96716ee..60000ad 100644
--- a/contrib/llvm/lib/VMCore/Globals.cpp
+++ b/contrib/llvm/lib/VMCore/Globals.cpp
@@ -26,23 +26,6 @@ using namespace llvm;
// GlobalValue Class
//===----------------------------------------------------------------------===//
-/// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
-/// it. This involves recursively eliminating any dead users of the
-/// constantexpr.
-static bool removeDeadUsersOfConstant(const Constant *C) {
- if (isa<GlobalValue>(C)) return false; // Cannot remove this
-
- while (!C->use_empty()) {
- const Constant *User = dyn_cast<Constant>(C->use_back());
- if (!User) return false; // Non-constant usage;
- if (!removeDeadUsersOfConstant(User))
- return false; // Constant wasn't dead
- }
-
- const_cast<Constant*>(C)->destroyConstant();
- return true;
-}
-
bool GlobalValue::isMaterializable() const {
return getParent() && getParent()->isMaterializable(this);
}
@@ -56,38 +39,6 @@ void GlobalValue::Dematerialize() {
getParent()->Dematerialize(this);
}
-/// removeDeadConstantUsers - If there are any dead constant users dangling
-/// off of this global value, remove them. This method is useful for clients
-/// that want to check to see if a global is unused, but don't want to deal
-/// with potentially dead constants hanging off of the globals.
-void GlobalValue::removeDeadConstantUsers() const {
- Value::const_use_iterator I = use_begin(), E = use_end();
- Value::const_use_iterator LastNonDeadUser = E;
- while (I != E) {
- if (const Constant *User = dyn_cast<Constant>(*I)) {
- if (!removeDeadUsersOfConstant(User)) {
- // If the constant wasn't dead, remember that this was the last live use
- // and move on to the next constant.
- LastNonDeadUser = I;
- ++I;
- } else {
- // If the constant was dead, then the iterator is invalidated.
- if (LastNonDeadUser == E) {
- I = use_begin();
- if (I == E) break;
- } else {
- I = LastNonDeadUser;
- ++I;
- }
- }
- } else {
- LastNonDeadUser = I;
- ++I;
- }
- }
-}
-
-
/// Override destroyConstant to make sure it doesn't get called on
/// GlobalValue's because they shouldn't be treated like other constants.
void GlobalValue::destroyConstant() {
diff --git a/contrib/llvm/lib/VMCore/IRBuilder.cpp b/contrib/llvm/lib/VMCore/IRBuilder.cpp
index c1b783c..595dea4 100644
--- a/contrib/llvm/lib/VMCore/IRBuilder.cpp
+++ b/contrib/llvm/lib/VMCore/IRBuilder.cpp
@@ -15,6 +15,7 @@
#include "llvm/Support/IRBuilder.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
using namespace llvm;
@@ -36,3 +37,83 @@ const Type *IRBuilderBase::getCurrentFunctionReturnType() const {
assert(BB && BB->getParent() && "No current function!");
return BB->getParent()->getReturnType();
}
+
+Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
+ const PointerType *PT = cast<PointerType>(Ptr->getType());
+ if (PT->getElementType()->isIntegerTy(8))
+ return Ptr;
+
+ // Otherwise, we need to insert a bitcast.
+ PT = getInt8PtrTy(PT->getAddressSpace());
+ BitCastInst *BCI = new BitCastInst(Ptr, PT, "");
+ BB->getInstList().insert(InsertPt, BCI);
+ SetInstDebugLocation(BCI);
+ return BCI;
+}
+
+static CallInst *createCallHelper(Value *Callee, Value *const* Ops,
+ unsigned NumOps, IRBuilderBase *Builder) {
+ CallInst *CI = CallInst::Create(Callee, Ops, Ops + NumOps, "");
+ Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),CI);
+ Builder->SetInstDebugLocation(CI);
+ return CI;
+}
+
+
+CallInst *IRBuilderBase::
+CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
+ bool isVolatile, MDNode *TBAATag) {
+ Ptr = getCastedInt8PtrValue(Ptr);
+ Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) };
+ const Type *Tys[] = { Ptr->getType(), Size->getType() };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+
+ CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+
+ // Set the TBAA info if present.
+ if (TBAATag)
+ CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ return CI;
+}
+
+CallInst *IRBuilderBase::
+CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
+ bool isVolatile, MDNode *TBAATag) {
+ Dst = getCastedInt8PtrValue(Dst);
+ Src = getCastedInt8PtrValue(Src);
+
+ Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+ const Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys, 3);
+
+ CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+
+ // Set the TBAA info if present.
+ if (TBAATag)
+ CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ return CI;
+}
+
+CallInst *IRBuilderBase::
+CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
+ bool isVolatile, MDNode *TBAATag) {
+ Dst = getCastedInt8PtrValue(Dst);
+ Src = getCastedInt8PtrValue(Src);
+
+ Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+ const Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+ Module *M = BB->getParent()->getParent();
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys, 3);
+
+ CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+
+ // Set the TBAA info if present.
+ if (TBAATag)
+ CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+ return CI;
+}
diff --git a/contrib/llvm/lib/VMCore/InlineAsm.cpp b/contrib/llvm/lib/VMCore/InlineAsm.cpp
index 69f713b..e4f99f0 100644
--- a/contrib/llvm/lib/VMCore/InlineAsm.cpp
+++ b/contrib/llvm/lib/VMCore/InlineAsm.cpp
@@ -47,26 +47,54 @@ InlineAsm::InlineAsm(const PointerType *Ty, const std::string &asmString,
}
void InlineAsm::destroyConstant() {
+ getRawType()->getContext().pImpl->InlineAsms.remove(this);
delete this;
}
const FunctionType *InlineAsm::getFunctionType() const {
return cast<FunctionType>(getType()->getElementType());
}
+
+///Default constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo() :
+ Type(isInput), isEarlyClobber(false),
+ MatchingInput(-1), isCommutative(false),
+ isIndirect(false), isMultipleAlternative(false),
+ currentAlternativeIndex(0) {
+}
+
+/// Copy constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo(const ConstraintInfo &other) :
+ Type(other.Type), isEarlyClobber(other.isEarlyClobber),
+ MatchingInput(other.MatchingInput), isCommutative(other.isCommutative),
+ isIndirect(other.isIndirect), Codes(other.Codes),
+ isMultipleAlternative(other.isMultipleAlternative),
+ multipleAlternatives(other.multipleAlternatives),
+ currentAlternativeIndex(other.currentAlternativeIndex) {
+}
/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
/// fields in this structure. If the constraint string is not understood,
/// return true, otherwise return false.
bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
- std::vector<InlineAsm::ConstraintInfo> &ConstraintsSoFar) {
+ InlineAsm::ConstraintInfoVector &ConstraintsSoFar) {
StringRef::iterator I = Str.begin(), E = Str.end();
+ unsigned multipleAlternativeCount = Str.count('|') + 1;
+ unsigned multipleAlternativeIndex = 0;
+ ConstraintCodeVector *pCodes = &Codes;
// Initialize
+ isMultipleAlternative = (multipleAlternativeCount > 1 ? true : false);
+ if (isMultipleAlternative) {
+ multipleAlternatives.resize(multipleAlternativeCount);
+ pCodes = &multipleAlternatives[0].Codes;
+ }
Type = isInput;
isEarlyClobber = false;
MatchingInput = -1;
isCommutative = false;
isIndirect = false;
+ currentAlternativeIndex = 0;
// Parse prefixes.
if (*I == '~') {
@@ -120,15 +148,15 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
// Find the end of the register name.
StringRef::iterator ConstraintEnd = std::find(I+1, E, '}');
if (ConstraintEnd == E) return true; // "{foo"
- Codes.push_back(std::string(I, ConstraintEnd+1));
+ pCodes->push_back(std::string(I, ConstraintEnd+1));
I = ConstraintEnd+1;
} else if (isdigit(*I)) { // Matching Constraint
// Maximal munch numbers.
StringRef::iterator NumStart = I;
while (I != E && isdigit(*I))
++I;
- Codes.push_back(std::string(NumStart, I));
- unsigned N = atoi(Codes.back().c_str());
+ pCodes->push_back(std::string(NumStart, I));
+ unsigned N = atoi(pCodes->back().c_str());
// Check that this is a valid matching constraint!
if (N >= ConstraintsSoFar.size() || ConstraintsSoFar[N].Type != isOutput||
Type != isInput)
@@ -136,14 +164,26 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
// If Operand N already has a matching input, reject this. An output
// can't be constrained to the same value as multiple inputs.
- if (ConstraintsSoFar[N].hasMatchingInput())
- return true;
-
- // Note that operand #n has a matching input.
- ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
+ if (isMultipleAlternative) {
+ InlineAsm::SubConstraintInfo &scInfo =
+ ConstraintsSoFar[N].multipleAlternatives[multipleAlternativeIndex];
+ if (scInfo.MatchingInput != -1)
+ return true;
+ // Note that operand #n has a matching input.
+ scInfo.MatchingInput = ConstraintsSoFar.size();
+ } else {
+ if (ConstraintsSoFar[N].hasMatchingInput())
+ return true;
+ // Note that operand #n has a matching input.
+ ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
+ }
+ } else if (*I == '|') {
+ multipleAlternativeIndex++;
+ pCodes = &multipleAlternatives[multipleAlternativeIndex].Codes;
+ ++I;
} else {
// Single letter constraint.
- Codes.push_back(std::string(I, I+1));
+ pCodes->push_back(std::string(I, I+1));
++I;
}
}
@@ -151,9 +191,21 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
return false;
}
-std::vector<InlineAsm::ConstraintInfo>
+/// selectAlternative - Point this constraint to the alternative constraint
+/// indicated by the index.
+void InlineAsm::ConstraintInfo::selectAlternative(unsigned index) {
+ if (index < multipleAlternatives.size()) {
+ currentAlternativeIndex = index;
+ InlineAsm::SubConstraintInfo &scInfo =
+ multipleAlternatives[currentAlternativeIndex];
+ MatchingInput = scInfo.MatchingInput;
+ Codes = scInfo.Codes;
+ }
+}
+
+InlineAsm::ConstraintInfoVector
InlineAsm::ParseConstraints(StringRef Constraints) {
- std::vector<ConstraintInfo> Result;
+ ConstraintInfoVector Result;
// Scan the constraints string.
for (StringRef::iterator I = Constraints.begin(),
@@ -183,13 +235,12 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
return Result;
}
-
/// Verify - Verify that the specified constraint string is reasonable for the
/// specified function type, and otherwise validate the constraint string.
bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
if (Ty->isVarArg()) return false;
- std::vector<ConstraintInfo> Constraints = ParseConstraints(ConstStr);
+ ConstraintInfoVector Constraints = ParseConstraints(ConstStr);
// Error parsing constraints.
if (Constraints.empty() && !ConstStr.empty()) return false;
diff --git a/contrib/llvm/lib/VMCore/Instruction.cpp b/contrib/llvm/lib/VMCore/Instruction.cpp
index 05bed4c..2c8b8b2 100644
--- a/contrib/llvm/lib/VMCore/Instruction.cpp
+++ b/contrib/llvm/lib/VMCore/Instruction.cpp
@@ -200,12 +200,10 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
if (const CallInst *CI = dyn_cast<CallInst>(this))
return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
- CI->getAttributes().getRawPointer() ==
- cast<CallInst>(I)->getAttributes().getRawPointer();
+ CI->getAttributes() == cast<CallInst>(I)->getAttributes();
if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
- CI->getAttributes().getRawPointer() ==
- cast<InvokeInst>(I)->getAttributes().getRawPointer();
+ CI->getAttributes() == cast<InvokeInst>(I)->getAttributes();
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) {
if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices())
return false;
@@ -253,12 +251,11 @@ bool Instruction::isSameOperationAs(const Instruction *I) const {
if (const CallInst *CI = dyn_cast<CallInst>(this))
return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
- CI->getAttributes().getRawPointer() ==
- cast<CallInst>(I)->getAttributes().getRawPointer();
+ CI->getAttributes() == cast<CallInst>(I)->getAttributes();
if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
- CI->getAttributes().getRawPointer() ==
- cast<InvokeInst>(I)->getAttributes().getRawPointer();
+ CI->getAttributes() ==
+ cast<InvokeInst>(I)->getAttributes();
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) {
if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices())
return false;
@@ -348,7 +345,7 @@ bool Instruction::mayThrow() const {
///
/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
///
-bool Instruction::isAssociative(unsigned Opcode, const Type *Ty) {
+bool Instruction::isAssociative(unsigned Opcode) {
return Opcode == And || Opcode == Or || Opcode == Xor ||
Opcode == Add || Opcode == Mul;
}
@@ -398,25 +395,10 @@ bool Instruction::isSafeToSpeculativelyExecute() const {
return Op && !Op->isNullValue() && !Op->isAllOnesValue();
}
case Load: {
- if (cast<LoadInst>(this)->isVolatile())
+ const LoadInst *LI = cast<LoadInst>(this);
+ if (LI->isVolatile())
return false;
- // Note that it is not safe to speculate into a malloc'd region because
- // malloc may return null.
- // It's also not safe to follow a bitcast, for example:
- // bitcast i8* (alloca i8) to i32*
- // would result in a 4-byte load from a 1-byte alloca.
- Value *Op0 = getOperand(0);
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) {
- // TODO: it's safe to do this for any GEP with constant indices that
- // compute inside the allocated type, but not for any inbounds gep.
- if (GEP->hasAllZeroIndices())
- Op0 = GEP->getPointerOperand();
- }
- if (isa<AllocaInst>(Op0))
- return true;
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(getOperand(0)))
- return !GV->hasExternalWeakLinkage();
- return false;
+ return LI->getPointerOperand()->isDereferenceablePointer();
}
case Call:
return false; // The called function could have undefined behavior or
diff --git a/contrib/llvm/lib/VMCore/Instructions.cpp b/contrib/llvm/lib/VMCore/Instructions.cpp
index 401802e..d129028 100644
--- a/contrib/llvm/lib/VMCore/Instructions.cpp
+++ b/contrib/llvm/lib/VMCore/Instructions.cpp
@@ -19,7 +19,6 @@
#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/Operator.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/ConstantRange.h"
@@ -97,8 +96,7 @@ PHINode::PHINode(const PHINode &PN)
}
PHINode::~PHINode() {
- if (OperandList)
- dropHungoffUses(OperandList);
+ dropHungoffUses();
}
// removeIncomingValue - Remove an incoming value. This is useful if a
@@ -159,66 +157,18 @@ void PHINode::resizeOperands(unsigned NumOps) {
Use *NewOps = allocHungoffUses(NumOps);
std::copy(OldOps, OldOps + e, NewOps);
OperandList = NewOps;
- if (OldOps) Use::zap(OldOps, OldOps + e, true);
+ Use::zap(OldOps, OldOps + e, true);
}
/// hasConstantValue - If the specified PHI node always merges together the same
/// value, return the value, otherwise return null.
-///
-/// If the PHI has undef operands, but all the rest of the operands are
-/// some unique value, return that value if it can be proved that the
-/// value dominates the PHI. If DT is null, use a conservative check,
-/// otherwise use DT to test for dominance.
-///
-Value *PHINode::hasConstantValue(DominatorTree *DT) const {
- // If the PHI node only has one incoming value, eliminate the PHI node.
- if (getNumIncomingValues() == 1) {
- if (getIncomingValue(0) != this) // not X = phi X
- return getIncomingValue(0);
- return UndefValue::get(getType()); // Self cycle is dead.
- }
-
- // Otherwise if all of the incoming values are the same for the PHI, replace
- // the PHI node with the incoming value.
- //
- Value *InVal = 0;
- bool HasUndefInput = false;
- for (unsigned i = 0, e = getNumIncomingValues(); i != e; ++i)
- if (isa<UndefValue>(getIncomingValue(i))) {
- HasUndefInput = true;
- } else if (getIncomingValue(i) != this) { // Not the PHI node itself...
- if (InVal && getIncomingValue(i) != InVal)
- return 0; // Not the same, bail out.
- InVal = getIncomingValue(i);
- }
-
- // The only case that could cause InVal to be null is if we have a PHI node
- // that only has entries for itself. In this case, there is no entry into the
- // loop, so kill the PHI.
- //
- if (InVal == 0) InVal = UndefValue::get(getType());
-
- // If we have a PHI node like phi(X, undef, X), where X is defined by some
- // instruction, we cannot always return X as the result of the PHI node. Only
- // do this if X is not an instruction (thus it must dominate the PHI block),
- // or if the client is prepared to deal with this possibility.
- if (!HasUndefInput || !isa<Instruction>(InVal))
- return InVal;
-
- Instruction *IV = cast<Instruction>(InVal);
- if (DT) {
- // We have a DominatorTree. Do a precise test.
- if (!DT->dominates(IV, this))
- return 0;
- } else {
- // If it is in the entry block, it obviously dominates everything.
- if (IV->getParent() != &IV->getParent()->getParent()->getEntryBlock() ||
- isa<InvokeInst>(IV))
- return 0; // Cannot guarantee that InVal dominates this PHINode.
- }
-
- // All of the incoming values are the same, return the value now.
- return InVal;
+Value *PHINode::hasConstantValue() const {
+ // Exploit the fact that phi nodes always have at least one entry.
+ Value *ConstantValue = getIncomingValue(0);
+ for (unsigned i = 1, e = getNumIncomingValues(); i != e; ++i)
+ if (getIncomingValue(i) != ConstantValue)
+ return 0; // Incoming values not all the same.
+ return ConstantValue;
}
@@ -235,7 +185,7 @@ void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
- FTy = FTy; // silence warning.
+ (void)FTy; // silence warning.
assert((NumParams == FTy->getNumParams() ||
(FTy->isVarArg() && NumParams > FTy->getNumParams())) &&
@@ -256,7 +206,7 @@ void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
- FTy = FTy; // silence warning.
+ (void)FTy; // silence warning.
assert((FTy->getNumParams() == 2 ||
(FTy->isVarArg() && FTy->getNumParams() < 2)) &&
@@ -276,7 +226,7 @@ void CallInst::init(Value *Func, Value *Actual) {
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
- FTy = FTy; // silence warning.
+ (void)FTy; // silence warning.
assert((FTy->getNumParams() == 1 ||
(FTy->isVarArg() && FTy->getNumParams() == 0)) &&
@@ -292,7 +242,7 @@ void CallInst::init(Value *Func) {
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
- FTy = FTy; // silence warning.
+ (void)FTy; // silence warning.
assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
}
@@ -549,7 +499,7 @@ void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
Op<-1>() = IfException;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
- FTy = FTy; // silence warning.
+ (void)FTy; // silence warning.
assert(((NumArgs == FTy->getNumParams()) ||
(FTy->isVarArg() && NumArgs > FTy->getNumParams())) &&
@@ -779,31 +729,6 @@ BranchInst::BranchInst(const BranchInst &BI) :
SubclassOptionalData = BI.SubclassOptionalData;
}
-
-Use* Use::getPrefix() {
- PointerIntPair<Use**, 2, PrevPtrTag> &PotentialPrefix(this[-1].Prev);
- if (PotentialPrefix.getOpaqueValue())
- return 0;
-
- return reinterpret_cast<Use*>((char*)&PotentialPrefix + 1);
-}
-
-BranchInst::~BranchInst() {
- if (NumOperands == 1) {
- if (Use *Prefix = OperandList->getPrefix()) {
- Op<-1>() = 0;
- //
- // mark OperandList to have a special value for scrutiny
- // by baseclass destructors and operator delete
- OperandList = Prefix;
- } else {
- NumOperands = 3;
- OperandList = op_begin();
- }
- }
-}
-
-
BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
return getSuccessor(idx);
}
@@ -899,7 +824,7 @@ void AllocaInst::setAlignment(unsigned Align) {
bool AllocaInst::isArrayAllocation() const {
if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(0)))
- return CI->getZExtValue() != 1;
+ return !CI->isOne();
return true;
}
@@ -1248,6 +1173,12 @@ const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
}
const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
+ Constant* const *Idxs,
+ unsigned NumIdx) {
+ return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
+const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
uint64_t const *Idxs,
unsigned NumIdx) {
return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
@@ -1473,6 +1404,8 @@ int ShuffleVectorInst::getMaskValue(unsigned i) const {
void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx,
unsigned NumIdx, const Twine &Name) {
assert(NumOperands == 2 && "NumOperands not initialized?");
+ assert(ExtractValueInst::getIndexedType(Agg->getType(), Idx, Idx + NumIdx) ==
+ Val->getType() && "Inserted value must match indexed type!");
Op<0>() = Agg;
Op<1>() = Val;
@@ -1483,6 +1416,8 @@ void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx,
void InsertValueInst::init(Value *Agg, Value *Val, unsigned Idx,
const Twine &Name) {
assert(NumOperands == 2 && "NumOperands not initialized?");
+ assert(ExtractValueInst::getIndexedType(Agg->getType(), Idx) == Val->getType()
+ && "Inserted value must match indexed type!");
Op<0>() = Agg;
Op<1>() = Val;
@@ -1555,13 +1490,26 @@ ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
const Type* ExtractValueInst::getIndexedType(const Type *Agg,
const unsigned *Idxs,
unsigned NumIdx) {
- unsigned CurIdx = 0;
- for (; CurIdx != NumIdx; ++CurIdx) {
- const CompositeType *CT = dyn_cast<CompositeType>(Agg);
- if (!CT || CT->isPointerTy() || CT->isVectorTy()) return 0;
+ for (unsigned CurIdx = 0; CurIdx != NumIdx; ++CurIdx) {
unsigned Index = Idxs[CurIdx];
- if (!CT->indexValid(Index)) return 0;
- Agg = CT->getTypeAtIndex(Index);
+ // We can't use CompositeType::indexValid(Index) here.
+ // indexValid() always returns true for arrays because getelementptr allows
+ // out-of-bounds indices. Since we don't allow those for extractvalue and
+ // insertvalue we need to check array indexing manually.
+ // Since the only other types we can index into are struct types it's just
+ // as easy to check those manually as well.
+ if (const ArrayType *AT = dyn_cast<ArrayType>(Agg)) {
+ if (Index >= AT->getNumElements())
+ return 0;
+ } else if (const StructType *ST = dyn_cast<StructType>(Agg)) {
+ if (Index >= ST->getNumElements())
+ return 0;
+ } else {
+ // Not a valid type to index into.
+ return 0;
+ }
+
+ Agg = cast<CompositeType>(Agg)->getTypeAtIndex(Index);
// If the new type forwards to another type, then it is in the middle
// of being refined to another type (and hence, may have dropped all
@@ -1570,7 +1518,7 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
if (const Type *Ty = Agg->getForwardedType())
Agg = Ty;
}
- return CurIdx == NumIdx ? Agg : 0;
+ return Agg;
}
const Type* ExtractValueInst::getIndexedType(const Type *Agg,
@@ -1611,7 +1559,7 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
void BinaryOperator::init(BinaryOps iType) {
Value *LHS = getOperand(0), *RHS = getOperand(1);
- LHS = LHS; RHS = RHS; // Silence warnings.
+ (void)LHS; (void)RHS; // Silence warnings.
assert(LHS->getType() == RHS->getType() &&
"Binary operator operand types must match!");
#ifndef NDEBUG
@@ -1874,7 +1822,7 @@ void BinaryOperator::setHasNoSignedWrap(bool b) {
}
void BinaryOperator::setIsExact(bool b) {
- cast<SDivOperator>(this)->setIsExact(b);
+ cast<PossiblyExactOperator>(this)->setIsExact(b);
}
bool BinaryOperator::hasNoUnsignedWrap() const {
@@ -1886,7 +1834,7 @@ bool BinaryOperator::hasNoSignedWrap() const {
}
bool BinaryOperator::isExact() const {
- return cast<SDivOperator>(this)->isExact();
+ return cast<PossiblyExactOperator>(this)->isExact();
}
//===----------------------------------------------------------------------===//
@@ -2360,6 +2308,8 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
} else { // Casting from something else
return false;
}
+ } else if (DestTy->isX86_MMXTy()) {
+ return SrcBits == 64;
} else { // Casting to something else
return false;
}
@@ -2441,6 +2391,10 @@ CastInst::getCastOpcode(
return BitCast; // vector -> vector
} else if (DestPTy->getBitWidth() == SrcBits) {
return BitCast; // float/int -> vector
+ } else if (SrcTy->isX86_MMXTy()) {
+ assert(DestPTy->getBitWidth()==64 &&
+ "Casting X86_MMX to vector of wrong width");
+ return BitCast; // MMX to 64-bit vector
} else {
assert(!"Illegal cast to vector (wrong type or size)");
}
@@ -2452,6 +2406,14 @@ CastInst::getCastOpcode(
} else {
assert(!"Casting pointer to other than pointer or int");
}
+ } else if (DestTy->isX86_MMXTy()) {
+ if (isa<VectorType>(SrcTy)) {
+ assert(cast<VectorType>(SrcTy)->getBitWidth() == 64 &&
+ "Casting vector of wrong width to X86_MMX");
+ return BitCast; // 64-bit vector to MMX
+ } else {
+ assert(!"Illegal cast to X86_MMX");
+ }
} else {
assert(!"Casting to type that is not first-class");
}
@@ -2754,14 +2716,14 @@ void CmpInst::swapOperands() {
cast<FCmpInst>(this)->swapOperands();
}
-bool CmpInst::isCommutative() {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
+bool CmpInst::isCommutative() const {
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
return IC->isCommutative();
return cast<FCmpInst>(this)->isCommutative();
}
-bool CmpInst::isEquality() {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
+bool CmpInst::isEquality() const {
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
return IC->isEquality();
return cast<FCmpInst>(this)->isEquality();
}
@@ -2974,9 +2936,9 @@ bool CmpInst::isFalseWhenEqual(unsigned short predicate) {
// SwitchInst Implementation
//===----------------------------------------------------------------------===//
-void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumCases) {
- assert(Value && Default);
- ReservedSpace = 2+NumCases*2;
+void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
+ assert(Value && Default && NumReserved);
+ ReservedSpace = NumReserved;
NumOperands = 2;
OperandList = allocHungoffUses(ReservedSpace);
@@ -2992,7 +2954,7 @@ SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
Instruction *InsertBefore)
: TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
0, 0, InsertBefore) {
- init(Value, Default, NumCases);
+ init(Value, Default, 2+NumCases*2);
}
/// SwitchInst ctor - Create a new switch instruction, specifying a value to
@@ -3003,14 +2965,15 @@ SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
BasicBlock *InsertAtEnd)
: TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
0, 0, InsertAtEnd) {
- init(Value, Default, NumCases);
+ init(Value, Default, 2+NumCases*2);
}
SwitchInst::SwitchInst(const SwitchInst &SI)
- : TerminatorInst(Type::getVoidTy(SI.getContext()), Instruction::Switch,
- allocHungoffUses(SI.getNumOperands()), SI.getNumOperands()) {
+ : TerminatorInst(SI.getType(), Instruction::Switch, 0, 0) {
+ init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands());
+ NumOperands = SI.getNumOperands();
Use *OL = OperandList, *InOL = SI.OperandList;
- for (unsigned i = 0, E = SI.getNumOperands(); i != E; i+=2) {
+ for (unsigned i = 2, E = SI.getNumOperands(); i != E; i += 2) {
OL[i] = InOL[i];
OL[i+1] = InOL[i+1];
}
@@ -3018,7 +2981,7 @@ SwitchInst::SwitchInst(const SwitchInst &SI)
}
SwitchInst::~SwitchInst() {
- dropHungoffUses(OperandList);
+ dropHungoffUses();
}
@@ -3046,14 +3009,10 @@ void SwitchInst::removeCase(unsigned idx) {
unsigned NumOps = getNumOperands();
Use *OL = OperandList;
- // Move everything after this operand down.
- //
- // FIXME: we could just swap with the end of the list, then erase. However,
- // client might not expect this to happen. The code as it is thrashes the
- // use/def lists, which is kinda lame.
- for (unsigned i = (idx+1)*2; i != NumOps; i += 2) {
- OL[i-2] = OL[i];
- OL[i-2+1] = OL[i+1];
+ // Overwrite this case with the end of the list.
+ if ((idx + 1) * 2 != NumOps) {
+ OL[idx * 2] = OL[NumOps - 2];
+ OL[idx * 2 + 1] = OL[NumOps - 1];
}
// Nuke the last value.
@@ -3089,7 +3048,7 @@ void SwitchInst::resizeOperands(unsigned NumOps) {
NewOps[i] = OldOps[i];
}
OperandList = NewOps;
- if (OldOps) Use::zap(OldOps, OldOps + e, true);
+ Use::zap(OldOps, OldOps + e, true);
}
@@ -3104,7 +3063,7 @@ void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
}
//===----------------------------------------------------------------------===//
-// SwitchInst Implementation
+// IndirectBrInst Implementation
//===----------------------------------------------------------------------===//
void IndirectBrInst::init(Value *Address, unsigned NumDests) {
@@ -3144,7 +3103,7 @@ void IndirectBrInst::resizeOperands(unsigned NumOps) {
for (unsigned i = 0; i != e; ++i)
NewOps[i] = OldOps[i];
OperandList = NewOps;
- if (OldOps) Use::zap(OldOps, OldOps + e, true);
+ Use::zap(OldOps, OldOps + e, true);
}
IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
@@ -3172,7 +3131,7 @@ IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI)
}
IndirectBrInst::~IndirectBrInst() {
- dropHungoffUses(OperandList);
+ dropHungoffUses();
}
/// addDestination - Add a destination.
@@ -3346,8 +3305,7 @@ ReturnInst *ReturnInst::clone_impl() const {
}
BranchInst *BranchInst::clone_impl() const {
- unsigned Ops(getNumOperands());
- return new(Ops, Ops == 1) BranchInst(*this);
+ return new(getNumOperands()) BranchInst(*this);
}
SwitchInst *SwitchInst::clone_impl() const {
diff --git a/contrib/llvm/lib/VMCore/LLVMContext.cpp b/contrib/llvm/lib/VMCore/LLVMContext.cpp
index 563c651..1bd497d 100644
--- a/contrib/llvm/lib/VMCore/LLVMContext.cpp
+++ b/contrib/llvm/lib/VMCore/LLVMContext.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/SourceMgr.h"
#include "LLVMContextImpl.h"
+#include <cctype>
using namespace llvm;
static ManagedStatic<LLVMContext> GlobalContext;
@@ -28,25 +29,42 @@ LLVMContext& llvm::getGlobalContext() {
}
LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
- // Create the first metadata kind, which is always 'dbg'.
+ // Create the fixed metadata kinds. This is done in the same order as the
+ // MD_* enum values so that they correspond.
+
+ // Create the 'dbg' metadata kind.
unsigned DbgID = getMDKindID("dbg");
assert(DbgID == MD_dbg && "dbg kind id drifted"); (void)DbgID;
+
+ // Create the 'tbaa' metadata kind.
+ unsigned TBAAID = getMDKindID("tbaa");
+ assert(TBAAID == MD_tbaa && "tbaa kind id drifted"); (void)TBAAID;
}
LLVMContext::~LLVMContext() { delete pImpl; }
+void LLVMContext::addModule(Module *M) {
+ pImpl->OwnedModules.insert(M);
+}
+
+void LLVMContext::removeModule(Module *M) {
+ pImpl->OwnedModules.erase(M);
+}
+
//===----------------------------------------------------------------------===//
// Recoverable Backend Errors
//===----------------------------------------------------------------------===//
-void LLVMContext::setInlineAsmDiagnosticHandler(void *DiagHandler,
- void *DiagContext) {
+void LLVMContext::
+setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler,
+ void *DiagContext) {
pImpl->InlineAsmDiagHandler = DiagHandler;
pImpl->InlineAsmDiagContext = DiagContext;
}
/// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by
/// setInlineAsmDiagnosticHandler.
-void *LLVMContext::getInlineAsmDiagnosticHandler() const {
+LLVMContext::InlineAsmDiagHandlerTy
+LLVMContext::getInlineAsmDiagnosticHandler() const {
return pImpl->InlineAsmDiagHandler;
}
@@ -76,13 +94,11 @@ void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
errs() << "error: " << ErrorStr << "\n";
exit(1);
}
-
+
// If we do have an error handler, we can report the error and keep going.
SMDiagnostic Diag("", "error: " + ErrorStr.str());
-
- ((SourceMgr::DiagHandlerTy)(intptr_t)pImpl->InlineAsmDiagHandler)
- (Diag, pImpl->InlineAsmDiagContext, LocCookie);
-
+
+ pImpl->InlineAsmDiagHandler(Diag, pImpl->InlineAsmDiagContext, LocCookie);
}
//===----------------------------------------------------------------------===//
@@ -94,13 +110,13 @@ void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
static bool isValidName(StringRef MDName) {
if (MDName.empty())
return false;
-
- if (!isalpha(MDName[0]))
+
+ if (!std::isalpha(MDName[0]))
return false;
-
+
for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E;
++I) {
- if (!isalnum(*I) && *I != '_' && *I != '-' && *I != '.')
+ if (!std::isalnum(*I) && *I != '_' && *I != '-' && *I != '.')
return false;
}
return true;
diff --git a/contrib/llvm/lib/VMCore/LLVMContextImpl.cpp b/contrib/llvm/lib/VMCore/LLVMContextImpl.cpp
index 93a075f..ccb8dc5 100644
--- a/contrib/llvm/lib/VMCore/LLVMContextImpl.cpp
+++ b/contrib/llvm/lib/VMCore/LLVMContextImpl.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "LLVMContextImpl.h"
+#include "llvm/Module.h"
#include <algorithm>
using namespace llvm;
@@ -25,6 +26,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
X86_FP80Ty(C, Type::X86_FP80TyID),
FP128Ty(C, Type::FP128TyID),
PPC_FP128Ty(C, Type::PPC_FP128TyID),
+ X86_MMXTy(C, Type::X86_MMXTyID),
Int1Ty(C, 1),
Int8Ty(C, 8),
Int16Ty(C, 16),
@@ -51,6 +53,15 @@ struct DropReferences {
}
LLVMContextImpl::~LLVMContextImpl() {
+ // NOTE: We need to delete the contents of OwnedModules, but we have to
+ // duplicate it into a temporary vector, because the destructor of Module
+ // will try to remove itself from OwnedModules set. This would cause
+ // iterator invalidation if we iterated on the set directly.
+ std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
+ for (std::vector<Module*>::iterator I = Modules.begin(), E = Modules.end();
+ I != E; ++I)
+ delete *I;
+
std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
DropReferences());
std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
@@ -90,7 +101,7 @@ LLVMContextImpl::~LLVMContextImpl() {
MDNodes.push_back(&*I);
}
MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
- for (SmallVector<MDNode*, 8>::iterator I = MDNodes.begin(),
+ for (SmallVectorImpl<MDNode *>::iterator I = MDNodes.begin(),
E = MDNodes.end(); I != E; ++I) {
(*I)->destroy();
}
diff --git a/contrib/llvm/lib/VMCore/LLVMContextImpl.h b/contrib/llvm/lib/VMCore/LLVMContextImpl.h
index 51b2992..23971aa 100644
--- a/contrib/llvm/lib/VMCore/LLVMContextImpl.h
+++ b/contrib/llvm/lib/VMCore/LLVMContextImpl.h
@@ -115,7 +115,12 @@ public:
class LLVMContextImpl {
public:
- void *InlineAsmDiagHandler, *InlineAsmDiagContext;
+ /// OwnedModules - The set of modules instantiated in this context, and which
+ /// will be automatically deleted if this context is deleted.
+ SmallPtrSet<Module*, 4> OwnedModules;
+
+ LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler;
+ void *InlineAsmDiagContext;
typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*,
DenseMapAPIntKeyInfo> IntMapTy;
@@ -170,6 +175,7 @@ public:
const Type X86_FP80Ty;
const Type FP128Ty;
const Type PPC_FP128Ty;
+ const Type X86_MMXTy;
const IntegerType Int1Ty;
const IntegerType Int8Ty;
const IntegerType Int16Ty;
diff --git a/contrib/llvm/lib/VMCore/LeakDetector.cpp b/contrib/llvm/lib/VMCore/LeakDetector.cpp
index a44f61d..f6651e9 100644
--- a/contrib/llvm/lib/VMCore/LeakDetector.cpp
+++ b/contrib/llvm/lib/VMCore/LeakDetector.cpp
@@ -16,8 +16,8 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/Threading.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Threading.h"
#include "llvm/Value.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/VMCore/Metadata.cpp b/contrib/llvm/lib/VMCore/Metadata.cpp
index da69c43..0b8e8df 100644
--- a/contrib/llvm/lib/VMCore/Metadata.cpp
+++ b/contrib/llvm/lib/VMCore/Metadata.cpp
@@ -339,17 +339,14 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
// Now that the node is out of the folding set, get ready to reinsert it.
// First, check to see if another node with the same operands already exists
- // in the set. If it doesn't exist, this returns the position to insert it.
+ // in the set. If so, then this node is redundant.
FoldingSetNodeID ID;
Profile(ID);
void *InsertPoint;
- MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
-
- if (N) {
- N->replaceAllUsesWith(this);
- N->destroy();
- N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
- assert(N == 0 && "shouldn't be in the map now!"); (void)N;
+ if (MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)) {
+ replaceAllUsesWith(N);
+ destroy();
+ return;
}
// InsertPoint will have been set by the FindNodeOrInsertPos call.
diff --git a/contrib/llvm/lib/VMCore/Module.cpp b/contrib/llvm/lib/VMCore/Module.cpp
index d7ddf96..341e527 100644
--- a/contrib/llvm/lib/VMCore/Module.cpp
+++ b/contrib/llvm/lib/VMCore/Module.cpp
@@ -62,9 +62,11 @@ Module::Module(StringRef MID, LLVMContext& C)
ValSymTab = new ValueSymbolTable();
TypeSymTab = new TypeSymbolTable();
NamedMDSymTab = new StringMap<NamedMDNode *>();
+ Context.addModule(this);
}
Module::~Module() {
+ Context.removeModule(this);
dropAllReferences();
GlobalList.clear();
FunctionList.clear();
diff --git a/contrib/llvm/lib/VMCore/Pass.cpp b/contrib/llvm/lib/VMCore/Pass.cpp
index a7d7f61..9afc540 100644
--- a/contrib/llvm/lib/VMCore/Pass.cpp
+++ b/contrib/llvm/lib/VMCore/Pass.cpp
@@ -213,7 +213,6 @@ RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID,
*this, isDefault);
}
-
//===----------------------------------------------------------------------===//
// PassRegistrationListener implementation
//
diff --git a/contrib/llvm/lib/VMCore/PassManager.cpp b/contrib/llvm/lib/VMCore/PassManager.cpp
index ab4d4e5..8bfef98 100644
--- a/contrib/llvm/lib/VMCore/PassManager.cpp
+++ b/contrib/llvm/lib/VMCore/PassManager.cpp
@@ -24,7 +24,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PassNameParser.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include <algorithm>
#include <cstdio>
#include <map>
@@ -497,9 +497,14 @@ PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) {
}
/// Set pass P as the last user of the given analysis passes.
-void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses,
- Pass *P) {
- for (SmallVector<Pass *, 12>::iterator I = AnalysisPasses.begin(),
+void
+PMTopLevelManager::setLastUser(const SmallVectorImpl<Pass *> &AnalysisPasses,
+ Pass *P) {
+ unsigned PDepth = 0;
+ if (P->getResolver())
+ PDepth = P->getResolver()->getPMDataManager().getDepth();
+
+ for (SmallVectorImpl<Pass *>::const_iterator I = AnalysisPasses.begin(),
E = AnalysisPasses.end(); I != E; ++I) {
Pass *AP = *I;
LastUser[AP] = P;
@@ -507,20 +512,47 @@ void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses,
if (P == AP)
continue;
+ // Update the last users of passes that are required transitive by AP.
+ AnalysisUsage *AnUsage = findAnalysisUsage(AP);
+ const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+ SmallVector<Pass *, 12> LastUses;
+ SmallVector<Pass *, 12> LastPMUses;
+ for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+ E = IDs.end(); I != E; ++I) {
+ Pass *AnalysisPass = findAnalysisPass(*I);
+ assert(AnalysisPass && "Expected analysis pass to exist.");
+ AnalysisResolver *AR = AnalysisPass->getResolver();
+ assert(AR && "Expected analysis resolver to exist.");
+ unsigned APDepth = AR->getPMDataManager().getDepth();
+
+ if (PDepth == APDepth)
+ LastUses.push_back(AnalysisPass);
+ else if (PDepth > APDepth)
+ LastPMUses.push_back(AnalysisPass);
+ }
+
+ setLastUser(LastUses, P);
+
+ // If this pass has a corresponding pass manager, push higher level
+ // analysis to this pass manager.
+ if (P->getResolver())
+ setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass());
+
+
// If AP is the last user of other passes then make P last user of
// such passes.
for (DenseMap<Pass *, Pass *>::iterator LUI = LastUser.begin(),
LUE = LastUser.end(); LUI != LUE; ++LUI) {
if (LUI->second == AP)
// DenseMap iterator is not invalidated here because
- // this is just updating exisitng entry.
+ // this is just updating existing entries.
LastUser[LUI->first] = P;
}
}
}
/// Collect passes whose last user is P
-void PMTopLevelManager::collectLastUses(SmallVector<Pass *, 12> &LastUses,
+void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
Pass *P) {
DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
InversedLastUser.find(P);
@@ -612,41 +644,40 @@ void PMTopLevelManager::schedulePass(Pass *P) {
/// then return NULL.
Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
- Pass *P = NULL;
// Check pass managers
- for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
- E = PassManagers.end(); P == NULL && I != E; ++I) {
- PMDataManager *PMD = *I;
- P = PMD->findAnalysisPass(AID, false);
- }
+ for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+ E = PassManagers.end(); I != E; ++I)
+ if (Pass *P = (*I)->findAnalysisPass(AID, false))
+ return P;
// Check other pass managers
- for (SmallVector<PMDataManager *, 8>::iterator
+ for (SmallVectorImpl<PMDataManager *>::iterator
I = IndirectPassManagers.begin(),
- E = IndirectPassManagers.end(); P == NULL && I != E; ++I)
- P = (*I)->findAnalysisPass(AID, false);
-
- for (SmallVector<ImmutablePass *, 8>::iterator I = ImmutablePasses.begin(),
- E = ImmutablePasses.end(); P == NULL && I != E; ++I) {
+ E = IndirectPassManagers.end(); I != E; ++I)
+ if (Pass *P = (*I)->findAnalysisPass(AID, false))
+ return P;
+
+ // Check the immutable passes. Iterate in reverse order so that we find
+ // the most recently registered passes first.
+ for (SmallVector<ImmutablePass *, 8>::reverse_iterator I =
+ ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) {
AnalysisID PI = (*I)->getPassID();
if (PI == AID)
- P = *I;
+ return *I;
// If Pass not found then check the interfaces implemented by Immutable Pass
- if (!P) {
- const PassInfo *PassInf =
- PassRegistry::getPassRegistry()->getPassInfo(PI);
- const std::vector<const PassInfo*> &ImmPI =
- PassInf->getInterfacesImplemented();
- for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
- EE = ImmPI.end(); II != EE; ++II) {
- if ((*II)->getTypeInfo() == AID)
- P = *I;
- }
+ const PassInfo *PassInf =
+ PassRegistry::getPassRegistry()->getPassInfo(PI);
+ const std::vector<const PassInfo*> &ImmPI =
+ PassInf->getInterfacesImplemented();
+ for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
+ EE = ImmPI.end(); II != EE; ++II) {
+ if ((*II)->getTypeInfo() == AID)
+ return *I;
}
}
- return P;
+ return 0;
}
// Print passes managed by this top level manager.
@@ -675,6 +706,12 @@ void PMTopLevelManager::dumpArguments() const {
return;
dbgs() << "Pass Arguments: ";
+ for (SmallVector<ImmutablePass *, 8>::const_iterator I =
+ ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+ if (const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
+ if (!PI->isAnalysisGroup())
+ dbgs() << " -" << PI->getPassArgument();
for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
E = PassManagers.end(); I != E; ++I)
(*I)->dumpPassArguments();
@@ -682,12 +719,12 @@ void PMTopLevelManager::dumpArguments() const {
}
void PMTopLevelManager::initializeAllAnalysisInfo() {
- for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
+ for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
E = PassManagers.end(); I != E; ++I)
(*I)->initializeAnalysisInfo();
// Initailize other pass managers
- for (SmallVector<PMDataManager *, 8>::iterator
+ for (SmallVectorImpl<PMDataManager *>::iterator
I = IndirectPassManagers.begin(), E = IndirectPassManagers.end();
I != E; ++I)
(*I)->initializeAnalysisInfo();
@@ -708,11 +745,11 @@ void PMTopLevelManager::initializeAllAnalysisInfo() {
/// Destructor
PMTopLevelManager::~PMTopLevelManager() {
- for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
+ for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
E = PassManagers.end(); I != E; ++I)
delete *I;
- for (SmallVector<ImmutablePass *, 8>::iterator
+ for (SmallVectorImpl<ImmutablePass *>::iterator
I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
delete *I;
@@ -749,7 +786,7 @@ bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
return true;
const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
- for (SmallVector<Pass *, 8>::iterator I = HigherLevelAnalysis.begin(),
+ for (SmallVectorImpl<Pass *>::iterator I = HigherLevelAnalysis.begin(),
E = HigherLevelAnalysis.end(); I != E; ++I) {
Pass *P1 = *I;
if (P1->getAsImmutablePass() == 0 &&
@@ -849,7 +886,7 @@ void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
dbgs() << " Free these instances\n";
}
- for (SmallVector<Pass *, 12>::iterator I = DeadPasses.begin(),
+ for (SmallVectorImpl<Pass *>::iterator I = DeadPasses.begin(),
E = DeadPasses.end(); I != E; ++I)
freePass(*I, Msg, DBG_STR);
}
@@ -910,7 +947,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
collectRequiredAnalysis(RequiredPasses,
ReqAnalysisNotAvailable, P);
- for (SmallVector<Pass *, 8>::iterator I = RequiredPasses.begin(),
+ for (SmallVectorImpl<Pass *>::iterator I = RequiredPasses.begin(),
E = RequiredPasses.end(); I != E; ++I) {
Pass *PRequired = *I;
unsigned RDepth = 0;
@@ -944,7 +981,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
}
// Now, take care of required analyses that are not available.
- for (SmallVector<AnalysisID, 8>::iterator
+ for (SmallVectorImpl<AnalysisID>::iterator
I = ReqAnalysisNotAvailable.begin(),
E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
@@ -965,8 +1002,8 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
/// Populate RP with analysis pass that are required by
/// pass P and are available. Populate RP_NotAvail with analysis
/// pass that are required by pass P but are not available.
-void PMDataManager::collectRequiredAnalysis(SmallVector<Pass *, 8>&RP,
- SmallVector<AnalysisID, 8> &RP_NotAvail,
+void PMDataManager::collectRequiredAnalysis(SmallVectorImpl<Pass *> &RP,
+ SmallVectorImpl<AnalysisID> &RP_NotAvail,
Pass *P) {
AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
@@ -1038,7 +1075,7 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
TPM->collectLastUses(LUses, P);
- for (SmallVector<Pass *, 12>::iterator I = LUses.begin(),
+ for (SmallVectorImpl<Pass *>::iterator I = LUses.begin(),
E = LUses.end(); I != E; ++I) {
llvm::dbgs() << "--" << std::string(Offset*2, ' ');
(*I)->dumpPassStructure(0);
@@ -1046,7 +1083,7 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
}
void PMDataManager::dumpPassArguments() const {
- for (SmallVector<Pass *, 8>::const_iterator I = PassVector.begin(),
+ for (SmallVectorImpl<Pass *>::const_iterator I = PassVector.begin(),
E = PassVector.end(); I != E; ++I) {
if (PMDataManager *PMD = (*I)->getAsPMDataManager())
PMD->dumpPassArguments();
@@ -1087,6 +1124,9 @@ void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
case ON_MODULE_MSG:
dbgs() << "' on Module '" << Msg << "'...\n";
break;
+ case ON_REGION_MSG:
+ dbgs() << "' on Region '" << Msg << "'...\n";
+ break;
case ON_LOOP_MSG:
dbgs() << "' on Loop '" << Msg << "'...\n";
break;
@@ -1163,7 +1203,7 @@ Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
// Destructor
PMDataManager::~PMDataManager() {
- for (SmallVector<Pass *, 8>::iterator I = PassVector.begin(),
+ for (SmallVectorImpl<Pass *>::iterator I = PassVector.begin(),
E = PassVector.end(); I != E; ++I)
delete *I;
}
@@ -1563,7 +1603,7 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
FPP->add(RequiredPass);
// Register P as the last user of RequiredPass.
- SmallVector<Pass *, 12> LU;
+ SmallVector<Pass *, 1> LU;
LU.push_back(RequiredPass);
FPP->setLastUser(LU, P);
}
diff --git a/contrib/llvm/lib/VMCore/PassRegistry.cpp b/contrib/llvm/lib/VMCore/PassRegistry.cpp
index 21dba56..c97a170 100644
--- a/contrib/llvm/lib/VMCore/PassRegistry.cpp
+++ b/contrib/llvm/lib/VMCore/PassRegistry.cpp
@@ -16,93 +16,125 @@
#include "llvm/PassSupport.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include <vector>
using namespace llvm;
-static PassRegistry *PassRegistryObj = 0;
-PassRegistry *PassRegistry::getPassRegistry() {
- // Use double-checked locking to safely initialize the registrar when
- // we're running in multithreaded mode.
- PassRegistry* tmp = PassRegistryObj;
- if (llvm_is_multithreaded()) {
- sys::MemoryFence();
- if (!tmp) {
- llvm_acquire_global_lock();
- tmp = PassRegistryObj;
- if (!tmp) {
- tmp = new PassRegistry();
- sys::MemoryFence();
- PassRegistryObj = tmp;
- }
- llvm_release_global_lock();
- }
- } else if (!tmp) {
- PassRegistryObj = new PassRegistry();
- }
-
- return PassRegistryObj;
-}
-
-namespace {
-
-// FIXME: We use ManagedCleanup to erase the pass registrar on shutdown.
+// FIXME: We use ManagedStatic to erase the pass registrar on shutdown.
// Unfortunately, passes are registered with static ctors, and having
// llvm_shutdown clear this map prevents successful ressurection after
// llvm_shutdown is run. Ideally we should find a solution so that we don't
// leak the map, AND can still resurrect after shutdown.
-void cleanupPassRegistry(void*) {
- if (PassRegistryObj) {
- delete PassRegistryObj;
- PassRegistryObj = 0;
- }
+static ManagedStatic<PassRegistry> PassRegistryObj;
+PassRegistry *PassRegistry::getPassRegistry() {
+ return &*PassRegistryObj;
}
-ManagedCleanup<&cleanupPassRegistry> registryCleanup ATTRIBUTE_USED;
+static ManagedStatic<sys::SmartMutex<true> > Lock;
+
+//===----------------------------------------------------------------------===//
+// PassRegistryImpl
+//
+
+namespace {
+struct PassRegistryImpl {
+ /// PassInfoMap - Keep track of the PassInfo object for each registered pass.
+ typedef DenseMap<const void*, const PassInfo*> MapType;
+ MapType PassInfoMap;
+
+ typedef StringMap<const PassInfo*> StringMapType;
+ StringMapType PassInfoStringMap;
+
+ /// AnalysisGroupInfo - Keep track of information for each analysis group.
+ struct AnalysisGroupInfo {
+ SmallPtrSet<const PassInfo *, 8> Implementations;
+ };
+ DenseMap<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap;
+
+ std::vector<const PassInfo*> ToFree;
+ std::vector<PassRegistrationListener*> Listeners;
+};
+} // end anonymous namespace
+
+void *PassRegistry::getImpl() const {
+ if (!pImpl)
+ pImpl = new PassRegistryImpl();
+ return pImpl;
+}
+
+//===----------------------------------------------------------------------===//
+// Accessors
+//
+
+PassRegistry::~PassRegistry() {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(pImpl);
+
+ for (std::vector<const PassInfo*>::iterator I = Impl->ToFree.begin(),
+ E = Impl->ToFree.end(); I != E; ++I)
+ delete *I;
+
+ delete Impl;
+ pImpl = 0;
}
const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
- sys::SmartScopedLock<true> Guard(Lock);
- MapType::const_iterator I = PassInfoMap.find(TI);
- return I != PassInfoMap.end() ? I->second : 0;
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI);
+ return I != Impl->PassInfoMap.end() ? I->second : 0;
}
const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
- sys::SmartScopedLock<true> Guard(Lock);
- StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
- return I != PassInfoStringMap.end() ? I->second : 0;
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ PassRegistryImpl::StringMapType::const_iterator
+ I = Impl->PassInfoStringMap.find(Arg);
+ return I != Impl->PassInfoStringMap.end() ? I->second : 0;
}
//===----------------------------------------------------------------------===//
// Pass Registration mechanism
//
-void PassRegistry::registerPass(const PassInfo &PI) {
- sys::SmartScopedLock<true> Guard(Lock);
+void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) {
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
bool Inserted =
- PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
- assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted;
- PassInfoStringMap[PI.getPassArgument()] = &PI;
+ Impl->PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
+ assert(Inserted && "Pass registered multiple times!");
+ (void)Inserted;
+ Impl->PassInfoStringMap[PI.getPassArgument()] = &PI;
// Notify any listeners.
for (std::vector<PassRegistrationListener*>::iterator
- I = Listeners.begin(), E = Listeners.end(); I != E; ++I)
+ I = Impl->Listeners.begin(), E = Impl->Listeners.end(); I != E; ++I)
(*I)->passRegistered(&PI);
+
+ if (ShouldFree) Impl->ToFree.push_back(&PI);
}
void PassRegistry::unregisterPass(const PassInfo &PI) {
- sys::SmartScopedLock<true> Guard(Lock);
- MapType::iterator I = PassInfoMap.find(PI.getTypeInfo());
- assert(I != PassInfoMap.end() && "Pass registered but not in map!");
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ PassRegistryImpl::MapType::iterator I =
+ Impl->PassInfoMap.find(PI.getTypeInfo());
+ assert(I != Impl->PassInfoMap.end() && "Pass registered but not in map!");
// Remove pass from the map.
- PassInfoMap.erase(I);
- PassInfoStringMap.erase(PI.getPassArgument());
+ Impl->PassInfoMap.erase(I);
+ Impl->PassInfoStringMap.erase(PI.getPassArgument());
}
void PassRegistry::enumerateWith(PassRegistrationListener *L) {
- sys::SmartScopedLock<true> Guard(Lock);
- for (MapType::const_iterator I = PassInfoMap.begin(),
- E = PassInfoMap.end(); I != E; ++I)
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ for (PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.begin(),
+ E = Impl->PassInfoMap.end(); I != E; ++I)
L->passEnumerate(I->second);
}
@@ -111,7 +143,8 @@ void PassRegistry::enumerateWith(PassRegistrationListener *L) {
void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
const void *PassID,
PassInfo& Registeree,
- bool isDefault) {
+ bool isDefault,
+ bool ShouldFree) {
PassInfo *InterfaceInfo = const_cast<PassInfo*>(getPassInfo(InterfaceID));
if (InterfaceInfo == 0) {
// First reference to Interface, register it now.
@@ -126,12 +159,15 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
assert(ImplementationInfo &&
"Must register pass before adding to AnalysisGroup!");
+ sys::SmartScopedLock<true> Guard(*Lock);
+
// Make sure we keep track of the fact that the implementation implements
// the interface.
ImplementationInfo->addInterfaceImplemented(InterfaceInfo);
- sys::SmartScopedLock<true> Guard(Lock);
- AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo];
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ PassRegistryImpl::AnalysisGroupInfo &AGI =
+ Impl->AnalysisGroupInfoMap[InterfaceInfo];
assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
"Cannot add a pass to the same analysis group more than once!");
AGI.Implementations.insert(ImplementationInfo);
@@ -143,17 +179,30 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
}
}
+
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ if (ShouldFree) Impl->ToFree.push_back(&Registeree);
}
void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
- sys::SmartScopedLock<true> Guard(Lock);
- Listeners.push_back(L);
+ sys::SmartScopedLock<true> Guard(*Lock);
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ Impl->Listeners.push_back(L);
}
void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
- sys::SmartScopedLock<true> Guard(Lock);
+ sys::SmartScopedLock<true> Guard(*Lock);
+
+ // NOTE: This is necessary, because removeRegistrationListener() can be called
+ // as part of the llvm_shutdown sequence. Since we have no control over the
+ // order of that sequence, we need to gracefully handle the case where the
+ // PassRegistry is destructed before the object that triggers this call.
+ if (!pImpl) return;
+
+ PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
std::vector<PassRegistrationListener*>::iterator I =
- std::find(Listeners.begin(), Listeners.end(), L);
- assert(I != Listeners.end() && "PassRegistrationListener not registered!");
- Listeners.erase(I);
+ std::find(Impl->Listeners.begin(), Impl->Listeners.end(), L);
+ assert(I != Impl->Listeners.end() &&
+ "PassRegistrationListener not registered!");
+ Impl->Listeners.erase(I);
}
diff --git a/contrib/llvm/lib/VMCore/PrintModulePass.cpp b/contrib/llvm/lib/VMCore/PrintModulePass.cpp
index 2ee49d2..1f1fbc9 100644
--- a/contrib/llvm/lib/VMCore/PrintModulePass.cpp
+++ b/contrib/llvm/lib/VMCore/PrintModulePass.cpp
@@ -78,10 +78,10 @@ namespace {
char PrintModulePass::ID = 0;
INITIALIZE_PASS(PrintModulePass, "print-module",
- "Print module to stderr", false, false);
+ "Print module to stderr", false, false)
char PrintFunctionPass::ID = 0;
INITIALIZE_PASS(PrintFunctionPass, "print-function",
- "Print function to stderr", false, false);
+ "Print function to stderr", false, false)
/// createPrintModulePass - Create and return a pass that writes the
/// module to the specified raw_ostream.
diff --git a/contrib/llvm/lib/VMCore/Type.cpp b/contrib/llvm/lib/VMCore/Type.cpp
index c55e626..be28ad1 100644
--- a/contrib/llvm/lib/VMCore/Type.cpp
+++ b/contrib/llvm/lib/VMCore/Type.cpp
@@ -27,7 +27,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Threading.h"
+#include "llvm/Support/Threading.h"
#include <algorithm>
#include <cstdarg>
using namespace llvm;
@@ -109,6 +109,7 @@ const Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
case PPC_FP128TyID : return getPPC_FP128Ty(C);
case LabelTyID : return getLabelTy(C);
case MetadataTyID : return getMetadataTy(C);
+ case X86_MMXTyID : return getX86_MMXTy(C);
default:
return 0;
}
@@ -172,10 +173,20 @@ bool Type::canLosslesslyBitCastTo(const Type *Ty) const {
return false;
// Vector -> Vector conversions are always lossless if the two vector types
- // have the same size, otherwise not.
- if (const VectorType *thisPTy = dyn_cast<VectorType>(this))
+ // have the same size, otherwise not. Also, 64-bit vector types can be
+ // converted to x86mmx.
+ if (const VectorType *thisPTy = dyn_cast<VectorType>(this)) {
if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
return thisPTy->getBitWidth() == thatPTy->getBitWidth();
+ if (Ty->getTypeID() == Type::X86_MMXTyID &&
+ thisPTy->getBitWidth() == 64)
+ return true;
+ }
+
+ if (this->getTypeID() == Type::X86_MMXTyID)
+ if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+ if (thatPTy->getBitWidth() == 64)
+ return true;
// At this point we have only various mismatches of the first class types
// remaining and ptr->ptr. Just select the lossless conversions. Everything
@@ -192,6 +203,7 @@ unsigned Type::getPrimitiveSizeInBits() const {
case Type::X86_FP80TyID: return 80;
case Type::FP128TyID: return 128;
case Type::PPC_FP128TyID: return 128;
+ case Type::X86_MMXTyID: return 64;
case Type::IntegerTyID: return cast<IntegerType>(this)->getBitWidth();
case Type::VectorTyID: return cast<VectorType>(this)->getBitWidth();
default: return 0;
@@ -354,6 +366,10 @@ const Type *Type::getPPC_FP128Ty(LLVMContext &C) {
return &C.pImpl->PPC_FP128Ty;
}
+const Type *Type::getX86_MMXTy(LLVMContext &C) {
+ return &C.pImpl->X86_MMXTy;
+}
+
const IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
return IntegerType::get(C, N);
}
@@ -398,6 +414,10 @@ const PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
return getPPC_FP128Ty(C)->getPointerTo(AS);
}
+const PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
+ return getX86_MMXTy(C)->getPointerTo(AS);
+}
+
const PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
return getIntNTy(C, N)->getPointerTo(AS);
}
@@ -1083,7 +1103,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
while (!AbstractTypeUsers.empty() && NewTy != this) {
AbstractTypeUser *User = AbstractTypeUsers.back();
- unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
+ unsigned OldSize = AbstractTypeUsers.size(); (void)OldSize;
#ifdef DEBUG_MERGE_TYPES
DEBUG(dbgs() << " REFINING user " << OldSize-1 << "[" << (void*)User
<< "] of abstract type [" << (void*)this << " "
@@ -1110,7 +1130,7 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() {
DEBUG(dbgs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n");
#endif
- unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
+ unsigned OldSize = AbstractTypeUsers.size(); (void)OldSize;
while (!AbstractTypeUsers.empty()) {
AbstractTypeUser *ATU = AbstractTypeUsers.back();
ATU->typeBecameConcrete(this);
diff --git a/contrib/llvm/lib/VMCore/TypesContext.h b/contrib/llvm/lib/VMCore/TypesContext.h
index 5a90917..4694486 100644
--- a/contrib/llvm/lib/VMCore/TypesContext.h
+++ b/contrib/llvm/lib/VMCore/TypesContext.h
@@ -317,7 +317,7 @@ public:
// The old record is now out-of-date, because one of the children has been
// updated. Remove the obsolete entry from the map.
unsigned NumErased = Map.erase(ValType::get(Ty));
- assert(NumErased && "Element not found!"); NumErased = NumErased;
+ assert(NumErased && "Element not found!"); (void)NumErased;
// Remember the structural hash for the type before we start hacking on it,
// in case we need it later.
diff --git a/contrib/llvm/lib/VMCore/Use.cpp b/contrib/llvm/lib/VMCore/Use.cpp
index fec710b..2258b8d 100644
--- a/contrib/llvm/lib/VMCore/Use.cpp
+++ b/contrib/llvm/lib/VMCore/Use.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/User.h"
+#include "llvm/Value.h"
namespace llvm {
@@ -85,7 +85,8 @@ const Use *Use::getImpliedUser() const {
// Use initTags Implementation
//===----------------------------------------------------------------------===//
-Use *Use::initTags(Use * const Start, Use *Stop, ptrdiff_t Done) {
+Use *Use::initTags(Use * const Start, Use *Stop) {
+ ptrdiff_t Done = 0;
while (Done < 20) {
if (Start == Stop--)
return Start;
@@ -97,20 +98,18 @@ Use *Use::initTags(Use * const Start, Use *Stop, ptrdiff_t Done) {
oneDigitTag, oneDigitTag, oneDigitTag,
oneDigitTag, stopTag
};
- Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(tags[Done++]));
- Stop->Val = 0;
+ new(Stop) Use(tags[Done++]);
}
ptrdiff_t Count = Done;
while (Start != Stop) {
--Stop;
- Stop->Val = 0;
if (!Count) {
- Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(stopTag));
+ new(Stop) Use(stopTag);
++Done;
Count = Done;
} else {
- Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(Count & 1));
+ new(Stop) Use(PrevPtrTag(Count & 1));
Count >>= 1;
++Done;
}
@@ -124,123 +123,24 @@ Use *Use::initTags(Use * const Start, Use *Stop, ptrdiff_t Done) {
//===----------------------------------------------------------------------===//
void Use::zap(Use *Start, const Use *Stop, bool del) {
- if (del) {
- while (Start != Stop) {
- (--Stop)->~Use();
- }
+ while (Start != Stop)
+ (--Stop)->~Use();
+ if (del)
::operator delete(Start);
- return;
- }
-
- while (Start != Stop) {
- (Start++)->set(0);
- }
}
//===----------------------------------------------------------------------===//
-// AugmentedUse layout struct
-//===----------------------------------------------------------------------===//
-
-struct AugmentedUse : public Use {
- PointerIntPair<User*, 1, Tag> ref;
- AugmentedUse(); // not implemented
-};
-
-
-//===----------------------------------------------------------------------===//
// Use getUser Implementation
//===----------------------------------------------------------------------===//
User *Use::getUser() const {
const Use *End = getImpliedUser();
- const PointerIntPair<User*, 1, Tag>& ref(
- static_cast<const AugmentedUse*>(End - 1)->ref);
+ const PointerIntPair<User*, 1, unsigned>&
+ ref(static_cast<const AugmentedUse*>(End - 1)->ref);
User *She = ref.getPointer();
return ref.getInt()
? She
: (User*)End;
}
-//===----------------------------------------------------------------------===//
-// User allocHungoffUses Implementation
-//===----------------------------------------------------------------------===//
-
-Use *User::allocHungoffUses(unsigned N) const {
- Use *Begin = static_cast<Use*>(::operator new(sizeof(Use) * N
- + sizeof(AugmentedUse)
- - sizeof(Use)));
- Use *End = Begin + N;
- PointerIntPair<User*, 1, Tag>& ref(static_cast<AugmentedUse&>(End[-1]).ref);
- ref.setPointer(const_cast<User*>(this));
- ref.setInt(tagOne);
- return Use::initTags(Begin, End);
-}
-
-//===----------------------------------------------------------------------===//
-// User operator new Implementations
-//===----------------------------------------------------------------------===//
-
-void *User::operator new(size_t s, unsigned Us) {
- void *Storage = ::operator new(s + sizeof(Use) * Us);
- Use *Start = static_cast<Use*>(Storage);
- Use *End = Start + Us;
- User *Obj = reinterpret_cast<User*>(End);
- Obj->OperandList = Start;
- Obj->NumOperands = Us;
- Use::initTags(Start, End);
- return Obj;
-}
-
-/// Prefixed allocation - just before the first Use, allocate a NULL pointer.
-/// The destructor can detect its presence and readjust the OperandList
-/// for deletition.
-///
-void *User::operator new(size_t s, unsigned Us, bool Prefix) {
- // currently prefixed allocation only admissible for
- // unconditional branch instructions
- if (!Prefix)
- return operator new(s, Us);
-
- assert(Us == 1 && "Other than one Use allocated?");
- typedef PointerIntPair<void*, 2, Use::PrevPtrTag> TaggedPrefix;
- void *Raw = ::operator new(s + sizeof(TaggedPrefix) + sizeof(Use) * Us);
- TaggedPrefix *Pre = static_cast<TaggedPrefix*>(Raw);
- Pre->setFromOpaqueValue(0);
- void *Storage = Pre + 1; // skip over prefix
- Use *Start = static_cast<Use*>(Storage);
- Use *End = Start + Us;
- User *Obj = reinterpret_cast<User*>(End);
- Obj->OperandList = Start;
- Obj->NumOperands = Us;
- Use::initTags(Start, End);
- return Obj;
-}
-
-//===----------------------------------------------------------------------===//
-// User operator delete Implementation
-//===----------------------------------------------------------------------===//
-
-void User::operator delete(void *Usr) {
- User *Start = static_cast<User*>(Usr);
- Use *Storage = static_cast<Use*>(Usr) - Start->NumOperands;
- //
- // look for a variadic User
- if (Storage == Start->OperandList) {
- ::operator delete(Storage);
- return;
- }
- //
- // check for the flag whether the destructor has detected a prefixed
- // allocation, in which case we remove the flag and delete starting
- // at OperandList
- if (reinterpret_cast<intptr_t>(Start->OperandList) & 1) {
- ::operator delete(reinterpret_cast<char*>(Start->OperandList) - 1);
- return;
- }
- //
- // in all other cases just delete the nullary User (covers hung-off
- // uses also
- ::operator delete(Usr);
-}
-
} // End llvm namespace
diff --git a/contrib/llvm/lib/VMCore/User.cpp b/contrib/llvm/lib/VMCore/User.cpp
new file mode 100644
index 0000000..2f4587d
--- /dev/null
+++ b/contrib/llvm/lib/VMCore/User.cpp
@@ -0,0 +1,81 @@
+//===-- User.cpp - Implement the User class -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constant.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/User.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// User Class
+//===----------------------------------------------------------------------===//
+
+// replaceUsesOfWith - Replaces all references to the "From" definition with
+// references to the "To" definition.
+//
+void User::replaceUsesOfWith(Value *From, Value *To) {
+ if (From == To) return; // Duh what?
+
+ assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
+ "Cannot call User::replaceUsesOfWith on a constant!");
+
+ for (unsigned i = 0, E = getNumOperands(); i != E; ++i)
+ if (getOperand(i) == From) { // Is This operand is pointing to oldval?
+ // The side effects of this setOperand call include linking to
+ // "To", adding "this" to the uses list of To, and
+ // most importantly, removing "this" from the use list of "From".
+ setOperand(i, To); // Fix it now...
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// User allocHungoffUses Implementation
+//===----------------------------------------------------------------------===//
+
+Use *User::allocHungoffUses(unsigned N) const {
+ Use *Begin = static_cast<Use*>(::operator new(sizeof(Use) * N
+ + sizeof(AugmentedUse)
+ - sizeof(Use)));
+ Use *End = Begin + N;
+ PointerIntPair<User*, 1, unsigned>&
+ ref(static_cast<AugmentedUse&>(End[-1]).ref);
+ ref.setPointer(const_cast<User*>(this));
+ ref.setInt(1);
+ return Use::initTags(Begin, End);
+}
+
+//===----------------------------------------------------------------------===//
+// User operator new Implementations
+//===----------------------------------------------------------------------===//
+
+void *User::operator new(size_t s, unsigned Us) {
+ void *Storage = ::operator new(s + sizeof(Use) * Us);
+ Use *Start = static_cast<Use*>(Storage);
+ Use *End = Start + Us;
+ User *Obj = reinterpret_cast<User*>(End);
+ Obj->OperandList = Start;
+ Obj->NumOperands = Us;
+ Use::initTags(Start, End);
+ return Obj;
+}
+
+//===----------------------------------------------------------------------===//
+// User operator delete Implementation
+//===----------------------------------------------------------------------===//
+
+void User::operator delete(void *Usr) {
+ User *Start = static_cast<User*>(Usr);
+ Use *Storage = static_cast<Use*>(Usr) - Start->NumOperands;
+ // If there were hung-off uses, they will have been freed already and
+ // NumOperands reset to 0, so here we just free the User itself.
+ ::operator delete(Storage);
+}
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/VMCore/Value.cpp b/contrib/llvm/lib/VMCore/Value.cpp
index b8c6775..29f6a80 100644
--- a/contrib/llvm/lib/VMCore/Value.cpp
+++ b/contrib/llvm/lib/VMCore/Value.cpp
@@ -22,6 +22,7 @@
#include "llvm/ValueSymbolTable.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LeakDetector.h"
#include "llvm/Support/ManagedStatic.h"
@@ -254,7 +255,7 @@ void Value::takeName(Value *V) {
// Get V's ST, this should always succed, because V has a name.
ValueSymbolTable *VST;
bool Failure = getSymTab(V, VST);
- assert(!Failure && "V has a name, so it should have a ST!"); Failure=Failure;
+ assert(!Failure && "V has a name, so it should have a ST!"); (void)Failure;
// If these values are both in the same symtab, we can do this very fast.
// This works even if both values have no symtab yet.
@@ -345,25 +346,62 @@ Value *Value::stripPointerCasts() {
return V;
}
-Value *Value::getUnderlyingObject(unsigned MaxLookup) {
- if (!getType()->isPointerTy())
- return this;
- Value *V = this;
- for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- V = GEP->getPointerOperand();
- } else if (Operator::getOpcode(V) == Instruction::BitCast) {
- V = cast<Operator>(V)->getOperand(0);
- } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- if (GA->mayBeOverridden())
- return V;
- V = GA->getAliasee();
- } else {
- return V;
+/// isDereferenceablePointer - Test if this value is always a pointer to
+/// allocated and suitably aligned memory for a simple load or store.
+bool Value::isDereferenceablePointer() const {
+ // Note that it is not safe to speculate into a malloc'd region because
+ // malloc may return null.
+ // It's also not always safe to follow a bitcast, for example:
+ // bitcast i8* (alloca i8) to i32*
+ // would result in a 4-byte load from a 1-byte alloca. Some cases could
+ // be handled using TargetData to check sizes and alignments though.
+
+ // These are obviously ok.
+ if (isa<AllocaInst>(this)) return true;
+
+ // Global variables which can't collapse to null are ok.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+ return !GV->hasExternalWeakLinkage();
+
+ // byval arguments are ok.
+ if (const Argument *A = dyn_cast<Argument>(this))
+ return A->hasByValAttr();
+
+ // For GEPs, determine if the indexing lands within the allocated object.
+ if (const GEPOperator *GEP = dyn_cast<GEPOperator>(this)) {
+ // Conservatively require that the base pointer be fully dereferenceable.
+ if (!GEP->getOperand(0)->isDereferenceablePointer())
+ return false;
+ // Check the indices.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::const_op_iterator I = GEP->op_begin()+1,
+ E = GEP->op_end(); I != E; ++I) {
+ Value *Index = *I;
+ const Type *Ty = *GTI++;
+ // Struct indices can't be out of bounds.
+ if (isa<StructType>(Ty))
+ continue;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Index);
+ if (!CI)
+ return false;
+ // Zero is always ok.
+ if (CI->isZero())
+ continue;
+ // Check to see that it's within the bounds of an array.
+ const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+ if (!ATy)
+ return false;
+ if (CI->getValue().getActiveBits() > 64)
+ return false;
+ if (CI->getZExtValue() >= ATy->getNumElements())
+ return false;
}
- assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ // Indices check out; this is dereferenceable.
+ return true;
}
- return V;
+
+ // If we don't know, assume the worst.
+ return false;
}
/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
@@ -600,26 +638,3 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
/// ~CallbackVH. Empty, but defined here to avoid emitting the vtable
/// more than once.
CallbackVH::~CallbackVH() {}
-
-
-//===----------------------------------------------------------------------===//
-// User Class
-//===----------------------------------------------------------------------===//
-
-// replaceUsesOfWith - Replaces all references to the "From" definition with
-// references to the "To" definition.
-//
-void User::replaceUsesOfWith(Value *From, Value *To) {
- if (From == To) return; // Duh what?
-
- assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
- "Cannot call User::replaceUsesOfWith on a constant!");
-
- for (unsigned i = 0, E = getNumOperands(); i != E; ++i)
- if (getOperand(i) == From) { // Is This operand is pointing to oldval?
- // The side effects of this setOperand call include linking to
- // "To", adding "this" to the uses list of To, and
- // most importantly, removing "this" from the use list of "From".
- setOperand(i, To); // Fix it now...
- }
-}
diff --git a/contrib/llvm/lib/VMCore/ValueTypes.cpp b/contrib/llvm/lib/VMCore/ValueTypes.cpp
index d2a8ce3..c054ae4 100644
--- a/contrib/llvm/lib/VMCore/ValueTypes.cpp
+++ b/contrib/llvm/lib/VMCore/ValueTypes.cpp
@@ -109,7 +109,8 @@ std::string EVT::getEVTString() const {
case MVT::ppcf128: return "ppcf128";
case MVT::isVoid: return "isVoid";
case MVT::Other: return "ch";
- case MVT::Flag: return "flag";
+ case MVT::Glue: return "glue";
+ case MVT::x86mmx: return "x86mmx";
case MVT::v2i8: return "v2i8";
case MVT::v4i8: return "v4i8";
case MVT::v8i8: return "v8i8";
@@ -155,6 +156,7 @@ const Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::f80: return Type::getX86_FP80Ty(Context);
case MVT::f128: return Type::getFP128Ty(Context);
case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
+ case MVT::x86mmx: return Type::getX86_MMXTy(Context);
case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2);
case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4);
case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8);
@@ -196,6 +198,7 @@ EVT EVT::getEVT(const Type *Ty, bool HandleUnknown){
case Type::FloatTyID: return MVT(MVT::f32);
case Type::DoubleTyID: return MVT(MVT::f64);
case Type::X86_FP80TyID: return MVT(MVT::f80);
+ case Type::X86_MMXTyID: return MVT(MVT::x86mmx);
case Type::FP128TyID: return MVT(MVT::f128);
case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
case Type::PointerTyID: return MVT(MVT::iPTR);
diff --git a/contrib/llvm/lib/VMCore/Verifier.cpp b/contrib/llvm/lib/VMCore/Verifier.cpp
index e3ecc97..58ec6fe 100644
--- a/contrib/llvm/lib/VMCore/Verifier.cpp
+++ b/contrib/llvm/lib/VMCore/Verifier.cpp
@@ -72,7 +72,9 @@ namespace { // Anonymous namespace for class
struct PreVerifier : public FunctionPass {
static char ID; // Pass ID, replacement for typeid
- PreVerifier() : FunctionPass(ID) { }
+ PreVerifier() : FunctionPass(ID) {
+ initializePreVerifierPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -103,8 +105,8 @@ namespace { // Anonymous namespace for class
char PreVerifier::ID = 0;
INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification",
- false, false);
-char &PreVerifyID = PreVerifier::ID;
+ false, false)
+static char &PreVerifyID = PreVerifier::ID;
namespace {
class TypeSet : public AbstractTypeUser {
@@ -184,11 +186,15 @@ namespace {
Verifier()
: FunctionPass(ID),
Broken(false), RealPass(true), action(AbortProcessAction),
- Mod(0), Context(0), DT(0), MessagesStr(Messages) {}
+ Mod(0), Context(0), DT(0), MessagesStr(Messages) {
+ initializeVerifierPass(*PassRegistry::getPassRegistry());
+ }
explicit Verifier(VerifierFailureAction ctn)
: FunctionPass(ID),
Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0),
- MessagesStr(Messages) {}
+ MessagesStr(Messages) {
+ initializeVerifierPass(*PassRegistry::getPassRegistry());
+ }
bool doInitialization(Module &M) {
Mod = &M;
@@ -393,7 +399,10 @@ namespace {
} // End anonymous namespace
char Verifier::ID = 0;
-INITIALIZE_PASS(Verifier, "verify", "Module Verifier", false, false);
+INITIALIZE_PASS_BEGIN(Verifier, "verify", "Module Verifier", false, false)
+INITIALIZE_PASS_DEPENDENCY(PreVerifier)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(Verifier, "verify", "Module Verifier", false, false)
// Assert - We know that cond should be true, if not print an error message.
#define Assert(C, M) \
@@ -475,6 +484,7 @@ void Verifier::visitGlobalAlias(GlobalAlias &GA) {
"Aliasee cannot be NULL!", &GA);
Assert1(GA.getType() == GA.getAliasee()->getType(),
"Alias and aliasee types should match!", &GA);
+ Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA);
if (!isa<GlobalValue>(GA.getAliasee())) {
const ConstantExpr *CE = dyn_cast<ConstantExpr>(GA.getAliasee());
@@ -685,6 +695,8 @@ void Verifier::visitFunction(Function &F) {
case CallingConv::Cold:
case CallingConv::X86_FastCall:
case CallingConv::X86_ThisCall:
+ case CallingConv::PTX_Kernel:
+ case CallingConv::PTX_Device:
Assert1(!F.isVarArg(),
"Varargs functions must have C calling conventions!", &F);
break;
@@ -1643,10 +1655,14 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
if (ID == Intrinsic::gcroot) {
AllocaInst *AI =
dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
- Assert1(AI && AI->getType()->getElementType()->isPointerTy(),
- "llvm.gcroot parameter #1 must be a pointer alloca.", &CI);
+ Assert1(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
Assert1(isa<Constant>(CI.getArgOperand(1)),
"llvm.gcroot parameter #2 must be a constant.", &CI);
+ if (!AI->getType()->getElementType()->isPointerTy()) {
+ Assert1(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
+ "llvm.gcroot parameter #1 must either be a pointer alloca, "
+ "or argument #2 must be a non-null constant.", &CI);
+ }
}
Assert1(CI.getParent()->getParent()->hasGC(),
OpenPOWER on IntegriCloud